This commit is contained in:
Love 2024-08-06 16:13:51 +02:00
commit fe7497f390
7 changed files with 2179 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

1989
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

18
Cargo.toml Normal file
View File

@ -0,0 +1,18 @@
[package]
name = "svtl"
version = "0.1.0"
edition = "2021"
[dependencies]
anyhow = "1.0.86"
clap = { version = "4.5.13", features = ["derive"] }
env_logger = "0.11.5"
log = "0.4.22"
regex = "1.10.6"
reqwest = { version = "0.12.5", features = ["json"] }
scraper = "0.20.0"
serde = { version = "1.0.204", features = ["derive"] }
serde_json = "1.0.122"
tokio = { version = "1.39.2", features = ["full"] }
tokio-stream = "0.1.15"
url = { version = "2.5.2", features = ["serde"] }

62
src/base_page.rs Normal file
View File

@ -0,0 +1,62 @@
use log::info;
use regex::Regex;
use scraper::{Html, Selector};
use std::io;
use std::path::Path;
use tokio::fs::File as TokioFile;
use tokio::io::AsyncReadExt;
use url::Url;
pub async fn get_content(link: &str) -> Result<String, reqwest::Error> {
info!("Requesting page from SVT: {}", link);
let resp = reqwest::get(link).await?;
let content = resp.text().await?;
info!("Page retrieved successfully");
Ok(content)
}
pub fn get_relative_links(content: &str) -> Vec<String> {
info!("Parsing content to find links");
let fragment = Html::parse_document(content);
let selector = Selector::parse("a[href]").unwrap();
let mut hrefs: Vec<String> = fragment
.select(&selector)
.filter_map(|element| element.value().attr("href"))
.filter(|href| href.ends_with("info=visa"))
.map(|href| href.to_string())
.collect();
hrefs.sort_by_key(|url| {
let re = Regex::new(r"(\d+)").unwrap();
re.captures(url.split('/').last().unwrap())
.and_then(|cap| cap.get(1).map(|m| m.as_str().parse::<usize>().unwrap()))
.unwrap_or(usize::MAX)
});
info!("Found and sorted {} links", hrefs.len());
hrefs
}
pub fn get_base_url(url: &str) -> Result<String, url::ParseError> {
info!("Extracting base URL from: {}", url);
let parsed_url = Url::parse(url)?;
let base_url = format!(
"{}://{}",
parsed_url.scheme(),
parsed_url.host_str().unwrap()
);
info!("Base URL is: {}", base_url);
Ok(base_url)
}
async fn get_content_tmp() -> Result<String, io::Error> {
info!("Reading content from temp.html");
let path = Path::new("./temp.html");
let mut file = TokioFile::open(path).await?;
let mut content = String::new();
file.read_to_string(&mut content).await?;
Ok(content)
}

4
src/lib.rs Normal file
View File

@ -0,0 +1,4 @@
mod base_page;
mod yt_dlp_wrapper;
pub use base_page::{get_base_url, get_content, get_relative_links};
pub use yt_dlp_wrapper::YtDlpWrapper;

57
src/main.rs Normal file
View File

@ -0,0 +1,57 @@
use clap;
use log::{error, info};
use svtl::{get_base_url, get_content, get_relative_links, YtDlpWrapper};
#[tokio::main]
async fn main() {
env_logger::init();
let matches = clap::Command::new("Svt Links")
.version("1.0")
.about("Download entire series from SVT")
.arg(
clap::Arg::new("link")
.short('l')
.long("link")
.required(true)
.help("Link to series"),
)
.get_matches();
let yt_dlp = match YtDlpWrapper::new().await {
Some(yt_dlp) => yt_dlp,
None => {
error!("yt-dlp not found in PATH. Please install yt-dlp to proceed.");
return;
}
};
let master_link = matches.get_one::<Box<str>>("link").unwrap().as_ref();
let base_url = match get_base_url(master_link) {
Ok(url) => url,
Err(e) => {
error!("Failed to parse url with error: {:?}", &e);
return;
}
};
let content = match get_content(master_link).await {
Ok(content) => content,
Err(e) => {
error!("Failed to get base page: {}", e);
return;
}
};
let links = get_relative_links(&content);
info!("Found the following links: {}", links.join(", "));
for link_part in links {
let link = format!("{}{}", base_url, link_part);
info!("Downloading: {}", link);
if let Err(e) = yt_dlp.spawn_yt_dlp_task(&link).await {
error!("Failed to download with error: {:?}", &e);
return;
}
}
}

48
src/yt_dlp_wrapper.rs Normal file
View File

@ -0,0 +1,48 @@
use anyhow::{anyhow, Result};
use log::{error, info};
use std::ffi::OsStr;
use tokio::process;
pub struct YtDlpWrapper {
use_aria2c: bool,
}
impl YtDlpWrapper {
pub async fn new() -> Option<Self> {
if !command_exists("yt-dlp").await {
return None;
}
Some(Self {
use_aria2c: command_exists("aria2c").await,
})
}
pub async fn spawn_yt_dlp_task(&self, url: &str) -> Result<()> {
info!("Spawning yt-dlp task for URL: {}", url);
let mut builder = process::Command::new("yt-dlp");
if self.use_aria2c {
builder.arg("--downloader=aria2c");
}
let mut yt_dlp_cmd = builder.arg(url).spawn()?;
let status = yt_dlp_cmd.wait().await?;
if status.success() {
info!("yt-dlp task completed successfully");
Ok(())
} else {
error!("yt-dlp task failed with status: {}", status);
Err(anyhow!("yt-dlp task failed"))
}
}
}
async fn command_exists<S: AsRef<OsStr>>(name: S) -> bool {
process::Command::new(name)
.arg("--version")
.output()
.await
.map(|output| output.status.success())
.unwrap_or(false)
}