Archived
1
0
Fork 0
forked from Anri/cal8tor

wip: parsing school range

This commit is contained in:
Mylloon 2022-08-15 15:46:58 +02:00
parent 449b200629
commit 1e09947b68
Signed by: Anri
GPG key ID: A82D63DFF8D1317F

View file

@ -1,3 +1,4 @@
use regex::Regex;
use scraper::{Html, Selector}; use scraper::{Html, Selector};
pub async fn info() { pub async fn info() {
@ -5,13 +6,53 @@ pub async fn info() {
// Selectors // Selectors
let sel_ul = Selector::parse("ul").unwrap(); let sel_ul = Selector::parse("ul").unwrap();
let sel_li = Selector::parse("li").unwrap();
// Find the raw infos in html page // Find the raw infos in html page
let mut raw_data = Vec::new();
for (i, data) in document.select(&sel_ul).enumerate() { for (i, data) in document.select(&sel_ul).enumerate() {
if [1, 2].contains(&i) { if [1, 2].contains(&i) {
println!("\n{} - {:#?}", data.value().name(), data.inner_html()); raw_data.push(data);
} }
} }
let mut data = std::collections::HashMap::new();
// d => date
// r => repetition
let re = Regex::new(r"(?P<d>\d{1,2} \w+ \d{4}).+(?P<r>\d)").unwrap();
for (i, ul) in raw_data.into_iter().enumerate() {
for element in ul.select(&sel_li) {
match element.inner_html() {
e if e.starts_with("Début") => {
let captures = re.captures(&e).unwrap();
data.insert(
i,
format!(
"{} pendant {}s",
captures.name("d").unwrap().as_str(),
captures.name("r").unwrap().as_str()
),
);
}
e if e.starts_with("Reprise") => {
let captures = re.captures(&e).unwrap();
captures.name("g");
data.insert(
i,
format!(
"{} puis reprise {} pendant {}s",
data.get(&i).unwrap(),
captures.name("d").unwrap().as_str(),
captures.name("r").unwrap().as_str()
),
);
}
_ => (),
}
}
}
println!("{:#?}", data);
} }
async fn get_webpage() -> Result<Html, Box<dyn std::error::Error>> { async fn get_webpage() -> Result<Html, Box<dyn std::error::Error>> {