Compare commits

..

4 commits

Author SHA1 Message Date
2423f080a1
download the posts 2022-08-20 20:24:29 +02:00
cd7b5434e6
return a tuple 2022-08-20 20:24:19 +02:00
86c9f50f01
get posts names 2022-08-20 20:05:53 +02:00
d56210ccb9
add cli 2022-08-20 19:44:56 +02:00
5 changed files with 1733 additions and 2 deletions

1631
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -4,3 +4,7 @@ version = "0.1.0"
edition = "2021" edition = "2021"
[dependencies] [dependencies]
clap = { version = "3.2", features = ["derive"] }
reqwest = { version = "0.11" }
tokio = { version = "1", features = ["full"] }
scraper = "0.13"

23
src/download.rs Normal file
View file

@ -0,0 +1,23 @@
/// Download all the posts from the raw endpoint
pub async fn download_posts(posts: (String, Vec<String>), dir: String) {
// Create folder, silently ignore if already exists
std::fs::create_dir(&dir).unwrap_or_default();
// Endpoint name
let endpoint = "raw";
for post in posts.1 {
let mut file = std::fs::File::create(format!("{}/{}.md", dir, post)).unwrap();
std::io::Write::write_all(
&mut file,
reqwest::get(format!("{}/{}/{}", posts.0, endpoint, post))
.await
.unwrap()
.text()
.await
.unwrap()
.as_bytes(),
)
.unwrap();
}
}

View file

@ -1,3 +1,51 @@
fn main() { use clap::Parser;
println!("Hello, prose!");
mod download;
mod parse;
#[derive(Parser)]
#[clap(version, about, long_about = None)]
struct Cli {
/// Your username
#[clap(value_parser)]
username: String,
/// Directory output [default: the username]
#[clap(short, long, value_parser, value_name = "DIRECTORY")]
directory: Option<String>,
/// Domain name
#[clap(
long,
value_parser,
value_name = "DOMAIN NAME",
default_value = "prose.sh"
)]
domain: String,
/// Scheme: HTTP/HTTPS
#[clap(long, value_parser, default_value = "https")]
scheme: String,
}
#[tokio::main]
async fn main() {
let cli = Cli::parse();
// Retrieve user's posts
let posts = parse::get_posts(
cli.scheme.to_lowercase(),
cli.username.to_lowercase(),
cli.domain.to_lowercase(),
)
.await;
// Defines the output folder name
let directory = match cli.directory {
Some(loc) => loc,
None => cli.username,
};
// Download the posts
download::download_posts(posts, directory).await;
} }

25
src/parse.rs Normal file
View file

@ -0,0 +1,25 @@
use scraper::{Html, Selector};
/// Return vector of all the posts of the user
pub async fn get_posts(scheme: String, username: String, domain: String) -> (String, Vec<String>) {
// Defines the address
let url = format!("{}://{}.{}", scheme, username, domain);
// Parse index page: sheme://username.domain
let document = Html::parse_document(&reqwest::get(&url).await.unwrap().text().await.unwrap());
// Look at the posts
let raw_posts = document
.select(&Selector::parse("section.posts").unwrap())
.next()
.unwrap();
// Get the name of them and push them into the vector
let mut posts = Vec::new();
for link in raw_posts.select(&Selector::parse("a").unwrap()) {
posts.push(link.inner_html());
}
// Return the vector
(url, posts)
}