//! Miscellaneous parsing utilities. use std::str::FromStr; use { regex::Match, scraper::{ElementRef, Selector}, }; use crate::{regexes::DUPLICATE_WHITESPACE_RE, ParseError}; /// Shorthand to extract the text and `href` values from an anchor element. pub fn extract_anchor_values( anchor: ElementRef, ) -> Result<(String, String), ParseError> { let name = DUPLICATE_WHITESPACE_RE .replace_all(&anchor.text().collect::(), " ") .trim() .to_string(); let href = anchor .value() .attr("href") .ok_or(ParseError::MissingExpectedHtml)? .to_string(); Ok((name, href)) } /// Shorthand to parse a [`regex::Match`] with [`std::str::FromStr`]. pub fn parse_regex_match(regex_match: Option) -> Option { regex_match.and_then(|regex_match| regex_match.as_str().parse::().ok()) } /// Returns the text of the first found element inside the given `parent` /// element. pub fn select_first_element_text( parent: ElementRef, selector: &Selector, ) -> Option { parent .select(selector) .next() .map(|element| element.text().collect::()) .map(|text| text.trim().to_string()) } /// Shorthand for creating a [`scraper::Selector`]. pub fn selector(selector: &str) -> Selector { Selector::parse(selector).unwrap() }