2022-08-16 14:41:07 +02:00
|
|
|
use chrono::{DateTime, TimeZone, Utc};
|
2022-08-15 17:25:36 +02:00
|
|
|
use regex::{Captures, Regex};
|
2022-08-15 14:53:09 +02:00
|
|
|
use scraper::{Html, Selector};
|
2022-08-16 11:55:51 +02:00
|
|
|
use std::collections::HashMap;
|
2022-08-15 12:20:16 +02:00
|
|
|
|
2022-08-16 14:41:07 +02:00
|
|
|
pub async fn info() -> HashMap<usize, Vec<(DateTime<Utc>, i64)>> {
|
2022-08-15 14:53:09 +02:00
|
|
|
let document = get_webpage().await.expect("Can't reach info website.");
|
|
|
|
|
|
|
|
// Selectors
|
|
|
|
let sel_ul = Selector::parse("ul").unwrap();
|
2022-08-15 15:46:58 +02:00
|
|
|
let sel_li = Selector::parse("li").unwrap();
|
2022-08-15 14:53:09 +02:00
|
|
|
|
|
|
|
// Find the raw infos in html page
|
2022-08-15 15:46:58 +02:00
|
|
|
let mut raw_data = Vec::new();
|
2022-08-15 14:53:09 +02:00
|
|
|
for (i, data) in document.select(&sel_ul).enumerate() {
|
|
|
|
if [1, 2].contains(&i) {
|
2022-08-15 15:46:58 +02:00
|
|
|
raw_data.push(data);
|
2022-08-15 14:53:09 +02:00
|
|
|
}
|
|
|
|
}
|
2022-08-15 15:46:58 +02:00
|
|
|
|
2022-08-15 17:25:36 +02:00
|
|
|
let mut data = HashMap::new();
|
2022-08-15 15:46:58 +02:00
|
|
|
// d => date
|
|
|
|
// r => repetition
|
|
|
|
let re = Regex::new(r"(?P<d>\d{1,2} \w+ \d{4}).+(?P<r>\d)").unwrap();
|
|
|
|
for (i, ul) in raw_data.into_iter().enumerate() {
|
|
|
|
for element in ul.select(&sel_li) {
|
|
|
|
match element.inner_html() {
|
|
|
|
e if e.starts_with("Début") => {
|
|
|
|
let captures = re.captures(&e).unwrap();
|
2022-08-15 17:25:36 +02:00
|
|
|
|
2022-08-15 17:50:16 +02:00
|
|
|
let start_date = get_date(captures.name("d").unwrap().as_str());
|
2022-08-15 17:25:36 +02:00
|
|
|
|
2022-08-15 17:58:40 +02:00
|
|
|
let rep: i64 = captures.name("r").unwrap().as_str().parse().unwrap();
|
2022-08-15 17:25:36 +02:00
|
|
|
|
2022-08-16 14:41:07 +02:00
|
|
|
data.insert(i + 1, vec![(start_date, rep)]);
|
2022-08-15 15:46:58 +02:00
|
|
|
}
|
|
|
|
e if e.starts_with("Reprise") => {
|
|
|
|
let captures = re.captures(&e).unwrap();
|
|
|
|
captures.name("g");
|
2022-08-15 17:50:16 +02:00
|
|
|
|
|
|
|
let start_date = get_date(captures.name("d").unwrap().as_str());
|
|
|
|
|
2022-08-15 17:58:40 +02:00
|
|
|
let rep: i64 = captures.name("r").unwrap().as_str().parse().unwrap();
|
2022-08-15 17:50:16 +02:00
|
|
|
|
2022-08-15 20:12:22 +02:00
|
|
|
let it = i + 1;
|
|
|
|
|
|
|
|
let mut vec = data.get(&it).unwrap().to_owned();
|
2022-08-16 14:41:07 +02:00
|
|
|
vec.push((start_date, rep));
|
2022-08-15 17:58:40 +02:00
|
|
|
|
2022-08-15 20:12:22 +02:00
|
|
|
data.insert(it, vec);
|
2022-08-15 15:46:58 +02:00
|
|
|
}
|
|
|
|
_ => (),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-08-15 19:06:36 +02:00
|
|
|
data
|
2022-08-15 12:20:16 +02:00
|
|
|
}
|
|
|
|
|
2022-08-15 17:25:36 +02:00
|
|
|
/// Get info webpage
|
2022-08-15 12:20:16 +02:00
|
|
|
async fn get_webpage() -> Result<Html, Box<dyn std::error::Error>> {
|
2022-08-23 16:51:02 +02:00
|
|
|
/* let url = "https://informatique.up8.edu/licence-iv/edt";
|
2022-08-16 15:56:41 +02:00
|
|
|
|
2022-08-16 16:07:58 +02:00
|
|
|
// We don't use reqwest::get() but a client with a custom user-agent
|
|
|
|
// in order to avoid getting rate limit
|
|
|
|
let client = reqwest::Client::builder()
|
|
|
|
.user_agent("bypass-rate_limit")
|
|
|
|
.build()?;
|
2022-08-16 15:56:41 +02:00
|
|
|
let html = client.get(url).send().await?.text().await?;
|
2022-08-15 12:20:16 +02:00
|
|
|
|
2022-08-16 15:48:13 +02:00
|
|
|
// Panic on error
|
2022-08-23 16:51:02 +02:00
|
|
|
crate::utils::check_errors(&html, url); */
|
|
|
|
|
|
|
|
let html = std::fs::read_to_string("target/debug-sch.htm").unwrap();
|
2022-08-15 12:20:16 +02:00
|
|
|
|
2022-08-16 15:48:13 +02:00
|
|
|
Ok(Html::parse_document(&html))
|
2022-08-15 12:20:16 +02:00
|
|
|
}
|
2022-08-15 17:25:36 +02:00
|
|
|
|
|
|
|
/// Turn a french date to an english one
|
|
|
|
fn anglophonization(date: &str) -> String {
|
|
|
|
let dico = HashMap::from([
|
|
|
|
("janvier", "january"),
|
|
|
|
("mars", "march"),
|
|
|
|
("septembre", "september"),
|
|
|
|
("novembre", "november"),
|
|
|
|
]);
|
|
|
|
|
|
|
|
// New regex of all the french month
|
|
|
|
let re = Regex::new(&format!(
|
|
|
|
"({})",
|
|
|
|
dico.keys().cloned().collect::<Vec<_>>().join("|")
|
|
|
|
))
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
format!(
|
|
|
|
// Use 12:00 for chrono parser
|
|
|
|
"{} 12:00",
|
|
|
|
// Replace french by english month
|
|
|
|
re.replace_all(date, |cap: &Captures| {
|
|
|
|
match &cap[0] {
|
|
|
|
month if dico.contains_key(month) => dico.get(month).unwrap(),
|
|
|
|
month => panic!("Unknown month: {}", month),
|
|
|
|
}
|
|
|
|
})
|
|
|
|
)
|
|
|
|
}
|
2022-08-15 17:50:16 +02:00
|
|
|
|
|
|
|
/// Turn a string to a DateTime
|
2022-08-15 19:06:36 +02:00
|
|
|
fn get_date(date: &str) -> DateTime<Utc> {
|
2022-08-15 17:50:16 +02:00
|
|
|
// Use and keep UTC time, we have the hour set to 12h and
|
|
|
|
// Paris 8 is in France so there is no problems
|
|
|
|
Utc.datetime_from_str(&anglophonization(date), "%e %B %Y %H:%M")
|
|
|
|
.unwrap()
|
|
|
|
}
|