From 1e09947b68b8a4b351260a076cac66e04ac995be Mon Sep 17 00:00:00 2001 From: Mylloon Date: Mon, 15 Aug 2022 15:46:58 +0200 Subject: [PATCH] wip: parsing school range --- src/info.rs | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/src/info.rs b/src/info.rs index a1cc581..6b4a2ad 100644 --- a/src/info.rs +++ b/src/info.rs @@ -1,3 +1,4 @@ +use regex::Regex; use scraper::{Html, Selector}; pub async fn info() { @@ -5,13 +6,53 @@ pub async fn info() { // Selectors let sel_ul = Selector::parse("ul").unwrap(); + let sel_li = Selector::parse("li").unwrap(); // Find the raw infos in html page + let mut raw_data = Vec::new(); for (i, data) in document.select(&sel_ul).enumerate() { if [1, 2].contains(&i) { - println!("\n{} - {:#?}", data.value().name(), data.inner_html()); + raw_data.push(data); } } + + let mut data = std::collections::HashMap::new(); + // d => date + // r => repetition + let re = Regex::new(r"(?P\d{1,2} \w+ \d{4}).+(?P\d)").unwrap(); + for (i, ul) in raw_data.into_iter().enumerate() { + for element in ul.select(&sel_li) { + match element.inner_html() { + e if e.starts_with("Début") => { + let captures = re.captures(&e).unwrap(); + data.insert( + i, + format!( + "{} pendant {}s", + captures.name("d").unwrap().as_str(), + captures.name("r").unwrap().as_str() + ), + ); + } + e if e.starts_with("Reprise") => { + let captures = re.captures(&e).unwrap(); + captures.name("g"); + data.insert( + i, + format!( + "{} puis reprise {} pendant {}s", + data.get(&i).unwrap(), + captures.name("d").unwrap().as_str(), + captures.name("r").unwrap().as_str() + ), + ); + } + _ => (), + } + } + } + + println!("{:#?}", data); } async fn get_webpage() -> Result> {