Compare commits
4 commits
7661f85220
...
2423f080a1
Author | SHA1 | Date | |
---|---|---|---|
2423f080a1 | |||
cd7b5434e6 | |||
86c9f50f01 | |||
d56210ccb9 |
5 changed files with 1733 additions and 2 deletions
1631
Cargo.lock
generated
1631
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -4,3 +4,7 @@ version = "0.1.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
clap = { version = "3.2", features = ["derive"] }
|
||||||
|
reqwest = { version = "0.11" }
|
||||||
|
tokio = { version = "1", features = ["full"] }
|
||||||
|
scraper = "0.13"
|
||||||
|
|
23
src/download.rs
Normal file
23
src/download.rs
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
/// Download all the posts from the raw endpoint
|
||||||
|
pub async fn download_posts(posts: (String, Vec<String>), dir: String) {
|
||||||
|
// Create folder, silently ignore if already exists
|
||||||
|
std::fs::create_dir(&dir).unwrap_or_default();
|
||||||
|
|
||||||
|
// Endpoint name
|
||||||
|
let endpoint = "raw";
|
||||||
|
|
||||||
|
for post in posts.1 {
|
||||||
|
let mut file = std::fs::File::create(format!("{}/{}.md", dir, post)).unwrap();
|
||||||
|
std::io::Write::write_all(
|
||||||
|
&mut file,
|
||||||
|
reqwest::get(format!("{}/{}/{}", posts.0, endpoint, post))
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.text()
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.as_bytes(),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
}
|
52
src/main.rs
52
src/main.rs
|
@ -1,3 +1,51 @@
|
||||||
fn main() {
|
use clap::Parser;
|
||||||
println!("Hello, prose!");
|
|
||||||
|
mod download;
|
||||||
|
mod parse;
|
||||||
|
|
||||||
|
#[derive(Parser)]
|
||||||
|
#[clap(version, about, long_about = None)]
|
||||||
|
struct Cli {
|
||||||
|
/// Your username
|
||||||
|
#[clap(value_parser)]
|
||||||
|
username: String,
|
||||||
|
|
||||||
|
/// Directory output [default: the username]
|
||||||
|
#[clap(short, long, value_parser, value_name = "DIRECTORY")]
|
||||||
|
directory: Option<String>,
|
||||||
|
|
||||||
|
/// Domain name
|
||||||
|
#[clap(
|
||||||
|
long,
|
||||||
|
value_parser,
|
||||||
|
value_name = "DOMAIN NAME",
|
||||||
|
default_value = "prose.sh"
|
||||||
|
)]
|
||||||
|
domain: String,
|
||||||
|
|
||||||
|
/// Scheme: HTTP/HTTPS
|
||||||
|
#[clap(long, value_parser, default_value = "https")]
|
||||||
|
scheme: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() {
|
||||||
|
let cli = Cli::parse();
|
||||||
|
|
||||||
|
// Retrieve user's posts
|
||||||
|
let posts = parse::get_posts(
|
||||||
|
cli.scheme.to_lowercase(),
|
||||||
|
cli.username.to_lowercase(),
|
||||||
|
cli.domain.to_lowercase(),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
// Defines the output folder name
|
||||||
|
let directory = match cli.directory {
|
||||||
|
Some(loc) => loc,
|
||||||
|
None => cli.username,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Download the posts
|
||||||
|
download::download_posts(posts, directory).await;
|
||||||
}
|
}
|
||||||
|
|
25
src/parse.rs
Normal file
25
src/parse.rs
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
|
/// Return vector of all the posts of the user
|
||||||
|
pub async fn get_posts(scheme: String, username: String, domain: String) -> (String, Vec<String>) {
|
||||||
|
// Defines the address
|
||||||
|
let url = format!("{}://{}.{}", scheme, username, domain);
|
||||||
|
|
||||||
|
// Parse index page: sheme://username.domain
|
||||||
|
let document = Html::parse_document(&reqwest::get(&url).await.unwrap().text().await.unwrap());
|
||||||
|
|
||||||
|
// Look at the posts
|
||||||
|
let raw_posts = document
|
||||||
|
.select(&Selector::parse("section.posts").unwrap())
|
||||||
|
.next()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Get the name of them and push them into the vector
|
||||||
|
let mut posts = Vec::new();
|
||||||
|
for link in raw_posts.select(&Selector::parse("a").unwrap()) {
|
||||||
|
posts.push(link.inner_html());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the vector
|
||||||
|
(url, posts)
|
||||||
|
}
|
Reference in a new issue