2022-08-15 15:46:58 +02:00
|
|
|
use regex::Regex;
|
2022-08-15 14:53:09 +02:00
|
|
|
use scraper::{Html, Selector};
|
2022-08-15 12:20:16 +02:00
|
|
|
|
|
|
|
pub async fn info() {
|
2022-08-15 14:53:09 +02:00
|
|
|
let document = get_webpage().await.expect("Can't reach info website.");
|
|
|
|
|
|
|
|
// Selectors
|
|
|
|
let sel_ul = Selector::parse("ul").unwrap();
|
2022-08-15 15:46:58 +02:00
|
|
|
let sel_li = Selector::parse("li").unwrap();
|
2022-08-15 14:53:09 +02:00
|
|
|
|
|
|
|
// Find the raw infos in html page
|
2022-08-15 15:46:58 +02:00
|
|
|
let mut raw_data = Vec::new();
|
2022-08-15 14:53:09 +02:00
|
|
|
for (i, data) in document.select(&sel_ul).enumerate() {
|
|
|
|
if [1, 2].contains(&i) {
|
2022-08-15 15:46:58 +02:00
|
|
|
raw_data.push(data);
|
2022-08-15 14:53:09 +02:00
|
|
|
}
|
|
|
|
}
|
2022-08-15 15:46:58 +02:00
|
|
|
|
|
|
|
let mut data = std::collections::HashMap::new();
|
|
|
|
// d => date
|
|
|
|
// r => repetition
|
|
|
|
let re = Regex::new(r"(?P<d>\d{1,2} \w+ \d{4}).+(?P<r>\d)").unwrap();
|
|
|
|
for (i, ul) in raw_data.into_iter().enumerate() {
|
|
|
|
for element in ul.select(&sel_li) {
|
|
|
|
match element.inner_html() {
|
|
|
|
e if e.starts_with("Début") => {
|
|
|
|
let captures = re.captures(&e).unwrap();
|
|
|
|
data.insert(
|
|
|
|
i,
|
|
|
|
format!(
|
|
|
|
"{} pendant {}s",
|
|
|
|
captures.name("d").unwrap().as_str(),
|
|
|
|
captures.name("r").unwrap().as_str()
|
|
|
|
),
|
|
|
|
);
|
|
|
|
}
|
|
|
|
e if e.starts_with("Reprise") => {
|
|
|
|
let captures = re.captures(&e).unwrap();
|
|
|
|
captures.name("g");
|
|
|
|
data.insert(
|
|
|
|
i,
|
|
|
|
format!(
|
|
|
|
"{} puis reprise {} pendant {}s",
|
|
|
|
data.get(&i).unwrap(),
|
|
|
|
captures.name("d").unwrap().as_str(),
|
|
|
|
captures.name("r").unwrap().as_str()
|
|
|
|
),
|
|
|
|
);
|
|
|
|
}
|
|
|
|
_ => (),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
println!("{:#?}", data);
|
2022-08-15 12:20:16 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
async fn get_webpage() -> Result<Html, Box<dyn std::error::Error>> {
|
|
|
|
/* let html = reqwest::get("https://informatique.up8.edu/licence-iv/edt").await?.text().await?;
|
|
|
|
|
|
|
|
Ok(Html::parse_document(&html)) */
|
|
|
|
|
|
|
|
let html = include_str!("../target/debug2.html");
|
|
|
|
Ok(Html::parse_document(html))
|
|
|
|
}
|