This repository has been archived on 2024-05-23. You can view files and clone it, but cannot push or open issues or pull requests.
cal8tor/src/info.rs

110 lines
3.5 KiB
Rust
Raw Normal View History

2022-08-15 19:06:36 +02:00
use chrono::{DateTime, Duration, TimeZone, Utc};
2022-08-15 17:25:36 +02:00
use regex::{Captures, Regex};
2022-08-15 14:53:09 +02:00
use scraper::{Html, Selector};
2022-08-16 11:55:51 +02:00
use std::collections::HashMap;
2022-08-15 12:20:16 +02:00
2022-08-15 19:06:36 +02:00
pub async fn info() -> HashMap<usize, Vec<(DateTime<Utc>, DateTime<Utc>)>> {
2022-08-15 14:53:09 +02:00
let document = get_webpage().await.expect("Can't reach info website.");
// Selectors
let sel_ul = Selector::parse("ul").unwrap();
2022-08-15 15:46:58 +02:00
let sel_li = Selector::parse("li").unwrap();
2022-08-15 14:53:09 +02:00
// Find the raw infos in html page
2022-08-15 15:46:58 +02:00
let mut raw_data = Vec::new();
2022-08-15 14:53:09 +02:00
for (i, data) in document.select(&sel_ul).enumerate() {
if [1, 2].contains(&i) {
2022-08-15 15:46:58 +02:00
raw_data.push(data);
2022-08-15 14:53:09 +02:00
}
}
2022-08-15 15:46:58 +02:00
2022-08-15 17:25:36 +02:00
let mut data = HashMap::new();
2022-08-15 15:46:58 +02:00
// d => date
// r => repetition
let re = Regex::new(r"(?P<d>\d{1,2} \w+ \d{4}).+(?P<r>\d)").unwrap();
for (i, ul) in raw_data.into_iter().enumerate() {
for element in ul.select(&sel_li) {
match element.inner_html() {
e if e.starts_with("Début") => {
let captures = re.captures(&e).unwrap();
2022-08-15 17:25:36 +02:00
let start_date = get_date(captures.name("d").unwrap().as_str());
2022-08-15 17:25:36 +02:00
2022-08-15 17:58:40 +02:00
let rep: i64 = captures.name("r").unwrap().as_str().parse().unwrap();
// -1 car la première semaine est déjà compté
let end_date = start_date + Duration::weeks(rep - 1);
2022-08-15 17:25:36 +02:00
2022-08-15 20:09:16 +02:00
data.insert(i + 1, vec![(start_date, end_date)]);
2022-08-15 15:46:58 +02:00
}
e if e.starts_with("Reprise") => {
let captures = re.captures(&e).unwrap();
captures.name("g");
let start_date = get_date(captures.name("d").unwrap().as_str());
2022-08-15 17:58:40 +02:00
let rep: i64 = captures.name("r").unwrap().as_str().parse().unwrap();
// -1 car la première semaine est déjà compté
let end_date = start_date + Duration::weeks(rep - 1);
2022-08-15 20:12:22 +02:00
let it = i + 1;
let mut vec = data.get(&it).unwrap().to_owned();
2022-08-15 17:58:40 +02:00
vec.push((start_date, end_date));
2022-08-15 20:12:22 +02:00
data.insert(it, vec);
2022-08-15 15:46:58 +02:00
}
_ => (),
}
}
}
2022-08-15 19:06:36 +02:00
data
2022-08-15 12:20:16 +02:00
}
2022-08-15 17:25:36 +02:00
/// Get info webpage
2022-08-15 12:20:16 +02:00
async fn get_webpage() -> Result<Html, Box<dyn std::error::Error>> {
/* let html = reqwest::get("https://informatique.up8.edu/licence-iv/edt").await?.text().await?;
Ok(Html::parse_document(&html)) */
let html = include_str!("../target/debug2.html");
Ok(Html::parse_document(html))
}
2022-08-15 17:25:36 +02:00
/// Turn a french date to an english one
fn anglophonization(date: &str) -> String {
let dico = HashMap::from([
("janvier", "january"),
("mars", "march"),
("septembre", "september"),
("novembre", "november"),
]);
// New regex of all the french month
let re = Regex::new(&format!(
"({})",
dico.keys().cloned().collect::<Vec<_>>().join("|")
))
.unwrap();
format!(
// Use 12:00 for chrono parser
"{} 12:00",
// Replace french by english month
re.replace_all(date, |cap: &Captures| {
match &cap[0] {
month if dico.contains_key(month) => dico.get(month).unwrap(),
month => panic!("Unknown month: {}", month),
}
})
)
}
/// Turn a string to a DateTime
2022-08-15 19:06:36 +02:00
fn get_date(date: &str) -> DateTime<Utc> {
// Use and keep UTC time, we have the hour set to 12h and
// Paris 8 is in France so there is no problems
Utc.datetime_from_str(&anglophonization(date), "%e %B %Y %H:%M")
.unwrap()
}