forked from Anri/cal8tor
wip: parsing school range
This commit is contained in:
parent
449b200629
commit
1e09947b68
1 changed files with 42 additions and 1 deletions
43
src/info.rs
43
src/info.rs
|
@ -1,3 +1,4 @@
|
|||
use regex::Regex;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
pub async fn info() {
|
||||
|
@ -5,13 +6,53 @@ pub async fn info() {
|
|||
|
||||
// Selectors
|
||||
let sel_ul = Selector::parse("ul").unwrap();
|
||||
let sel_li = Selector::parse("li").unwrap();
|
||||
|
||||
// Find the raw infos in html page
|
||||
let mut raw_data = Vec::new();
|
||||
for (i, data) in document.select(&sel_ul).enumerate() {
|
||||
if [1, 2].contains(&i) {
|
||||
println!("\n{} - {:#?}", data.value().name(), data.inner_html());
|
||||
raw_data.push(data);
|
||||
}
|
||||
}
|
||||
|
||||
let mut data = std::collections::HashMap::new();
|
||||
// d => date
|
||||
// r => repetition
|
||||
let re = Regex::new(r"(?P<d>\d{1,2} \w+ \d{4}).+(?P<r>\d)").unwrap();
|
||||
for (i, ul) in raw_data.into_iter().enumerate() {
|
||||
for element in ul.select(&sel_li) {
|
||||
match element.inner_html() {
|
||||
e if e.starts_with("Début") => {
|
||||
let captures = re.captures(&e).unwrap();
|
||||
data.insert(
|
||||
i,
|
||||
format!(
|
||||
"{} pendant {}s",
|
||||
captures.name("d").unwrap().as_str(),
|
||||
captures.name("r").unwrap().as_str()
|
||||
),
|
||||
);
|
||||
}
|
||||
e if e.starts_with("Reprise") => {
|
||||
let captures = re.captures(&e).unwrap();
|
||||
captures.name("g");
|
||||
data.insert(
|
||||
i,
|
||||
format!(
|
||||
"{} puis reprise {} pendant {}s",
|
||||
data.get(&i).unwrap(),
|
||||
captures.name("d").unwrap().as_str(),
|
||||
captures.name("r").unwrap().as_str()
|
||||
),
|
||||
);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!("{:#?}", data);
|
||||
}
|
||||
|
||||
async fn get_webpage() -> Result<Html, Box<dyn std::error::Error>> {
|
||||
|
|
Reference in a new issue