// Copyright (C) 2022 Bauke // // This program is free software: you can redistribute it and/or modify it under // the terms of the GNU Affero General Public License as published by the Free // Software Foundation, either version 3 of the License, or (at your option) any // later version. // // This program is distributed in the hope that it will be useful, but WITHOUT // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more // details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . //! # Select HTML //! //! > **Extract HTML using CSS selectors in the command-line.** #![forbid(unsafe_code)] #![warn(missing_docs, clippy::missing_docs_in_private_items)] use std::{ fs::File, io::{stdin, Read}, path::PathBuf, }; use { clap::Parser, color_eyre::{eyre::eyre, install, Result}, scraper::{Html, Selector}, }; /// CLI arguments struct using [`clap`]'s Derive API. #[derive(Debug, Parser)] #[clap(about, author, version)] pub struct Args { /// Output the attribute's value from the selected element, can be used /// multiple times. #[clap(short, long, group = "output")] pub attribute: Vec, /// A HTML file to read, if not specified stdin will be used instead. #[clap(long, parse(from_os_str))] pub file: Option, /// The CSS selector to use. pub selector: String, /// Output inner text of the selected elements. #[clap(short, long, group = "output")] pub text: bool, /// Trim whitespace from selected items. #[clap(long)] pub trim: bool, } /// The main CLI function. fn main() -> Result<()> { install()?; let args = Args::parse(); let selector = Selector::parse(&args.selector) .map_err(|_| eyre!("Failed to parse selector"))?; let document = { let mut html = String::new(); if let Some(path) = args.file { File::open(path)?.read_to_string(&mut html)?; } else { stdin().read_to_string(&mut html)?; }; Html::parse_document(&html) }; let mut to_print = vec![]; for element in document.select(&selector) { if args.text { to_print.push(element.text().collect::()); } else if !args.attribute.is_empty() { let element = element.value(); for attribute in &args.attribute { if let Some(value) = element.attr(attribute) { to_print.push(value.to_string()); } } } else { to_print.push(element.html()); } } for value in to_print { if args.trim { println!("{}", value.trim()); } else { println!("{}", value); } } Ok(()) }