get posts names

This commit is contained in:
Mylloon 2022-08-20 20:05:53 +02:00
parent d56210ccb9
commit 86c9f50f01
Signed by: Anri
GPG key ID: A82D63DFF8D1317F
4 changed files with 1436 additions and 2 deletions

1400
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -5,3 +5,6 @@ edition = "2021"
[dependencies] [dependencies]
clap = { version = "3.2", features = ["derive"] } clap = { version = "3.2", features = ["derive"] }
reqwest = { version = "0.11" }
tokio = { version = "1", features = ["full"] }
scraper = "0.13"

View file

@ -27,14 +27,16 @@ struct Cli {
scheme: String, scheme: String,
} }
fn main() { #[tokio::main]
async fn main() {
let cli = Cli::parse(); let cli = Cli::parse();
let posts = parse::get_posts( let posts = parse::get_posts(
cli.scheme.to_lowercase(), cli.scheme.to_lowercase(),
cli.username.to_lowercase(), cli.username.to_lowercase(),
cli.domain.to_lowercase(), cli.domain.to_lowercase(),
); )
.await;
println!("{:#?}", posts); println!("{:#?}", posts);
} }

29
src/parse.rs Normal file
View file

@ -0,0 +1,29 @@
use scraper::{Html, Selector};
/// Return vector of all the posts of the user
pub async fn get_posts(scheme: String, username: String, domain: String) -> Vec<String> {
// Parse index page: sheme://username.domain
let document = Html::parse_document(
&reqwest::get(format!("{}://{}.{}", scheme, username, domain))
.await
.unwrap()
.text()
.await
.unwrap(),
);
// Look at the posts
let raw_posts = document
.select(&Selector::parse("section.posts").unwrap())
.next()
.unwrap();
// Get the name of them and push them into the vector
let mut posts = Vec::new();
for link in raw_posts.select(&Selector::parse("a").unwrap()) {
posts.push(link.inner_html());
}
// Return the vector
posts
}