1
Fork 0
select-html/source/main.rs

108 lines
2.7 KiB
Rust

// Copyright (C) 2022 Bauke <me@bauke.xyz>
//
// This program is free software: you can redistribute it and/or modify it under
// the terms of the GNU Affero General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option) any
// later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
// details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//! # Select HTML
//!
//! > **Extract HTML using CSS selectors in the command-line.**
#![forbid(unsafe_code)]
#![warn(missing_docs, clippy::missing_docs_in_private_items)]
use std::{
fs::File,
io::{stdin, Read},
path::PathBuf,
};
use {
clap::Parser,
color_eyre::{eyre::eyre, install, Result},
scraper::{Html, Selector},
};
/// CLI arguments struct using [`clap`]'s Derive API.
#[derive(Debug, Parser)]
#[clap(about, author, version)]
pub struct Args {
/// Output the attribute's value from the selected element, can be used
/// multiple times.
#[clap(short, long, group = "output")]
pub attribute: Vec<String>,
/// A HTML file to read, if not specified stdin will be used instead.
#[clap(long, parse(from_os_str))]
pub file: Option<PathBuf>,
/// The CSS selector to use.
pub selector: String,
/// Output inner text of the selected elements.
#[clap(short, long, group = "output")]
pub text: bool,
/// Trim whitespace from selected items.
#[clap(long)]
pub trim: bool,
}
/// The main CLI function.
fn main() -> Result<()> {
install()?;
let args = Args::parse();
let selector = Selector::parse(&args.selector)
.map_err(|_| eyre!("Failed to parse selector"))?;
let document = {
let mut html = String::new();
if let Some(path) = args.file {
File::open(path)?.read_to_string(&mut html)?;
} else {
stdin().read_to_string(&mut html)?;
};
Html::parse_document(&html)
};
let mut to_print = vec![];
for element in document.select(&selector) {
if args.text {
to_print.push(element.text().collect::<String>());
} else if !args.attribute.is_empty() {
let element = element.value();
for attribute in &args.attribute {
if let Some(value) = element.attr(attribute) {
to_print.push(value.to_string());
}
}
} else {
to_print.push(element.html());
}
}
for value in to_print {
if args.trim {
println!("{}", value.trim());
} else {
println!("{}", value);
}
}
Ok(())
}