Skip to content

Instantly share code, notes, and snippets.

@lohxx
Last active September 12, 2021 03:26
Show Gist options
  • Save lohxx/69ae3e7038913c2c08fd3990b3365b36 to your computer and use it in GitHub Desktop.
Save lohxx/69ae3e7038913c2c08fd3990b3365b36 to your computer and use it in GitHub Desktop.
Port do scraper de classificações do brasileirão para rust
[package]
name = "brasileirao-rust-port"
version = "0.1.0"
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
reqwest = { version = "0.11", features = ["blocking", "json"] }
scraper = "0.12.0"
console = "0.14.1"
#[allow(dead_code)]
#[allow(unused_variables)]
use std::fmt;
use std::cmp::Reverse;
use std::{io::Read, vec};
use reqwest;
use console::Style;
use scraper::{Html, Selector, html::Select};
#[derive(Debug)]
struct Classificacao {
pontos: i8,
jogos: i8,
vitorias: i8,
empates: i8,
derrotas: i8,
gp: i8,
gc: i8,
sg: i8,
ca: i8,
cv: i8
}
#[derive(Debug)]
struct Time {
nome: String,
classificacao: Classificacao
}
impl From<Vec<String>> for Time {
fn from(time: Vec<String>) -> Self {
let nome = time[2].split("-").next().unwrap();
Time {
nome: nome.trim().to_string(),
classificacao: Classificacao {
pontos: time[3].parse().unwrap(),
jogos: time[4].parse().unwrap(),
vitorias: time[5].parse().unwrap(),
empates: time[6].parse().unwrap(),
derrotas: time[7].parse().unwrap(),
gc: time[9].parse().unwrap(),
gp: time[8].parse().unwrap(),
sg: time[10].parse().unwrap(),
ca: time[11].parse().unwrap(),
cv: time[12].parse().unwrap()
}
}
}
}
impl fmt::Display for Time {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f, "{:<30} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}",
self.nome,
self.classificacao.pontos,
self.classificacao.jogos,
self.classificacao.vitorias,
self.classificacao.derrotas,
self.classificacao.gp,
self.classificacao.gc,
self.classificacao.sg)
}
}
fn download_page(url: &str) -> String {
let client = reqwest::blocking::Client::new();
let mut res = client.get(url).send().unwrap();
let mut body = String::new();
res.read_to_string(&mut body).unwrap();
body
}
fn parse_times(body: &mut String) -> Vec<Vec<String>> {
let document = Html::parse_document(body);
let table_selector = &Selector::parse(
"#menu-panel > article > div.container > div > div > section.m-b-50.p-t-10.row > div.col-md-8.col-lg-9 > table > tbody").unwrap();
let trow_selector = &Selector::parse("tr.expand-trigger").unwrap();
let mut tbody: Select = document.select(table_selector);
let mut classificacoes: Vec<Vec<String>> = vec![];
for element in tbody.next().unwrap().select(trow_selector) {
let element_text: Vec<String> = element.text().map(
|text: &str| {String::from(text.trim())})
.filter(|text: &String| {!text.is_empty()}).collect::<Vec<String>>();
classificacoes.push(element_text);
}
classificacoes
}
fn print_classificacao(classificacoes: &[Time], style: Style, mut posicao: u8) {
for time in classificacoes {
println!("{} {}", style.apply_to(posicao), time);
posicao += 1;
}
}
fn main() {
let url = "https://www.cbf.com.br/futebol-brasileiro/competicoes/campeonato-brasileiro-serie-a";
let mut body = download_page(url);
let rows: Vec<Vec<String>> = parse_times(&mut body);
let mut times: Vec<Time> = vec![];
for time in rows {
times.push(Time::from(time));
}
times.sort_by_key(|time| (Reverse(time.classificacao.pontos), time.nome.clone()));
print_classificacao(&times[0..4], Style::new().blue(), 1);
print_classificacao(&times[4..6], Style::new().yellow(), 5);
print_classificacao(&times[6..12], Style::new().green(), 7);
print_classificacao(&times[12..16], Style::new().white(), 13);
print_classificacao(&times[16..20], Style::new().red(), 17);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment