1
Fork 0
select-html/source/main.rs

90 lines
1.9 KiB
Rust
Raw Normal View History

2022-09-23 15:55:40 +00:00
//! # Select HTML
2022-09-05 12:24:49 +00:00
//!
//! > **Extract HTML using CSS selectors in the command-line.**
2022-09-05 12:24:49 +00:00
use std::{
fs::File,
io::{stdin, Read},
path::PathBuf,
};
use {
clap::Parser,
color_eyre::{eyre::eyre, install, Result},
scraper::{Html, Selector},
};
/// CLI arguments struct using [`clap`]'s Derive API.
#[derive(Debug, Parser)]
#[clap(about, author, version)]
pub struct Args {
/// Output the attribute's value from the selected element, can be used
/// multiple times.
#[clap(short, long, group = "output")]
pub attribute: Vec<String>,
/// A HTML file to read, if not specified stdin will be used instead.
2024-01-25 17:52:13 +00:00
#[clap(long)]
2022-09-05 12:24:49 +00:00
pub file: Option<PathBuf>,
/// The CSS selector to use.
pub selector: String,
/// Output inner text of the selected elements.
#[clap(short, long, group = "output")]
pub text: bool,
/// Trim whitespace from selected items.
#[clap(long)]
pub trim: bool,
}
/// The main CLI function.
fn main() -> Result<()> {
install()?;
let args = Args::parse();
let selector = Selector::parse(&args.selector)
.map_err(|_| eyre!("Failed to parse selector"))?;
let document = {
let mut html = String::new();
if let Some(path) = args.file {
File::open(path)?.read_to_string(&mut html)?;
} else {
stdin().read_to_string(&mut html)?;
};
Html::parse_document(&html)
};
let mut to_print = vec![];
for element in document.select(&selector) {
if args.text {
to_print.push(element.text().collect::<String>());
} else if !args.attribute.is_empty() {
let element = element.value();
for attribute in &args.attribute {
if let Some(value) = element.attr(attribute) {
to_print.push(value.to_string());
}
}
} else {
to_print.push(element.html());
}
}
for value in to_print {
if args.trim {
println!("{}", value.trim());
} else {
println!("{}", value);
}
}
Ok(())
}