tildes-parser/source/utilities.rs

//! Miscellaneous parsing utilities.

use std::str::FromStr;

use {
  regex::Match,
  scraper::{ElementRef, Selector},
};

use crate::{regexes::DUPLICATE_WHITESPACE_RE, ParseError};

/// Shorthand to extract the text and `href` values from an anchor element.
pub fn extract_anchor_values(
  anchor: ElementRef,
) -> Result<(String, String), ParseError> {
  let name = DUPLICATE_WHITESPACE_RE
    .replace_all(&anchor.text().collect::<String>(), " ")
    .trim()
    .to_string();
  let href = anchor
    .value()
    .attr("href")
    .ok_or(ParseError::MissingExpectedHtml)?
    .to_string();

  Ok((name, href))
}

/// Shorthand to parse a [`regex::Match`] with [`std::str::FromStr`].
pub fn parse_regex_match<T: FromStr>(regex_match: Option<Match>) -> Option<T> {
  regex_match.and_then(|regex_match| regex_match.as_str().parse::<T>().ok())
}

/// Returns the text of the first found element inside the given `parent`
/// element.
pub fn select_first_element_text(
  parent: ElementRef,
  selector: &Selector,
) -> Option<String> {
  parent
    .select(selector)
    .next()
    .map(|element| element.text().collect::<String>())
    .map(|text| text.trim().to_string())
}

/// Shorthand for creating a [`scraper::Selector`].
pub fn selector(selector: &str) -> Selector {
  Selector::parse(selector).unwrap()
}
Initial commit. 2022-09-29 23:29:46 +00:00			`//! Miscellaneous parsing utilities.`

			`use std::str::FromStr;`

			`use {`
			`regex::Match,`
			`scraper::{ElementRef, Selector},`
			`};`

Replace color-eyre usage with our own error types using thiserror. 2023-06-09 19:03:38 +00:00			`use crate::{regexes::DUPLICATE_WHITESPACE_RE, ParseError};`
Add group parsing. 2022-10-03 14:25:28 +00:00
			/// Shorthand to extract the text and `href` values from an anchor element.
Replace color-eyre usage with our own error types using thiserror. 2023-06-09 19:03:38 +00:00			`pub fn extract_anchor_values(`
			`anchor: ElementRef,`
			`) -> Result<(String, String), ParseError> {`
Add group parsing. 2022-10-03 14:25:28 +00:00			`let name = DUPLICATE_WHITESPACE_RE`
			`.replace_all(&anchor.text().collect::<String>(), " ")`
			`.trim()`
			`.to_string();`
Replace color-eyre usage with our own error types using thiserror. 2023-06-09 19:03:38 +00:00			`let href = anchor`
			`.value()`
			`.attr("href")`
			`.ok_or(ParseError::MissingExpectedHtml)?`
			`.to_string();`
Add group parsing. 2022-10-03 14:25:28 +00:00
Replace color-eyre usage with our own error types using thiserror. 2023-06-09 19:03:38 +00:00			`Ok((name, href))`
Add group parsing. 2022-10-03 14:25:28 +00:00			`}`

Initial commit. 2022-09-29 23:29:46 +00:00			/// Shorthand to parse a [`regex::Match`] with [`std::str::FromStr`].
			`pub fn parse_regex_match<T: FromStr>(regex_match: Option<Match>) -> Option<T> {`
			`regex_match.and_then(\|regex_match\| regex_match.as_str().parse::<T>().ok())`
			`}`

			/// Returns the text of the first found element inside the given `parent`
			`/// element.`
			`pub fn select_first_element_text(`
			`parent: ElementRef,`
			`selector: &Selector,`
			`) -> Option<String> {`
			`parent`
			`.select(selector)`
			`.next()`
Trim text. 2022-10-03 13:31:26 +00:00			`.map(\|element\| element.text().collect::<String>())`
			`.map(\|text\| text.trim().to_string())`
Initial commit. 2022-09-29 23:29:46 +00:00			`}`

			/// Shorthand for creating a [`scraper::Selector`].
			`pub fn selector(selector: &str) -> Selector {`
			`Selector::parse(selector).unwrap()`
			`}`