diff --git a/source/error.rs b/source/error.rs new file mode 100644 index 0000000..a03d10e --- /dev/null +++ b/source/error.rs @@ -0,0 +1,13 @@ +//! All error types. + +use thiserror::Error; + +/// Errors that can happen while parsing. +#[derive(Debug, Error)] +pub enum ParseError { + /// The error for when HTML text or attributes are missing and they should be + /// present. This could be user error by having an edge case not covered, or + /// Tildes could have been updated and the HTML changed in the meantime. + #[error("Missing expected HTML values")] + MissingExpectedHtml, +} diff --git a/source/from_str.rs b/source/from_str.rs index c17fcf8..bdb25a7 100644 --- a/source/from_str.rs +++ b/source/from_str.rs @@ -2,9 +2,9 @@ use std::str::FromStr; -use {color_eyre::eyre::Error, duplicate::duplicate_item, scraper::Html}; +use {duplicate::duplicate_item, scraper::Html}; -use crate::{Group, GroupList}; +use crate::{Group, GroupList, ParseError}; #[duplicate_item( _Struct; @@ -12,7 +12,7 @@ use crate::{Group, GroupList}; [GroupList]; )] impl FromStr for _Struct { - type Err = Error; + type Err = ParseError; fn from_str(s: &str) -> Result { let html = Html::parse_document(s); diff --git a/source/group.rs b/source/group.rs index 144c1a8..5692571 100644 --- a/source/group.rs +++ b/source/group.rs @@ -1,6 +1,6 @@ //! Parsing for `/~`. -use {color_eyre::Result, scraper::Html}; +use scraper::Html; use crate::{ regexes::GROUP_SUBSCRIBERS_RE, @@ -11,6 +11,7 @@ use crate::{ utilities::{ extract_anchor_values, parse_regex_match, select_first_element_text, }, + ParseError, }; /// A group's information. @@ -44,37 +45,37 @@ pub struct GroupWikiLink { impl Group { /// Parses a [`Group`] from a [`scraper::Html`] tree. - pub fn from_html(html: &Html) -> Result { + pub fn from_html(html: &Html) -> Result { let description = select_first_element_text(html.root_element(), &GROUP_DESCRIPTION); - let name = - select_first_element_text(html.root_element(), &GROUP_NAME).unwrap(); + let name = select_first_element_text(html.root_element(), &GROUP_NAME) + .ok_or(ParseError::MissingExpectedHtml)?; let subscribers = parse_regex_match( GROUP_SUBSCRIBERS_RE .captures_iter( &select_first_element_text(html.root_element(), &GROUP_SUBSCRIBERS) - .unwrap(), + .ok_or(ParseError::MissingExpectedHtml)?, ) .next() - .unwrap() + .ok_or(ParseError::MissingExpectedHtml)? .name("count"), ) - .unwrap(); + .ok_or(ParseError::MissingExpectedHtml)?; let sub_groups = html .select(&GROUP_SUB_GROUP_LINKS) - .map(|element| extract_anchor_values(element).0) - .collect(); + .map(|element| Ok(extract_anchor_values(element)?.0)) + .collect::>()?; let wiki_links = html .select(&GROUP_WIKI_LINKS) .map(|element| { - let (name, url) = extract_anchor_values(element); - GroupWikiLink { name, url } + let (name, url) = extract_anchor_values(element)?; + Ok(GroupWikiLink { name, url }) }) - .collect(); + .collect::>()?; Ok(Self { description, diff --git a/source/group_list.rs b/source/group_list.rs index 6890b61..a913985 100644 --- a/source/group_list.rs +++ b/source/group_list.rs @@ -1,11 +1,12 @@ //! Parsing for [`/groups`](https://tildes.net/groups). -use {color_eyre::Result, scraper::Html}; +use scraper::Html; use crate::{ regexes::{DUPLICATE_WHITESPACE_RE, GROUP_LIST_ACTIVITY_RE}, selectors::{GROUP_LINK, GROUP_LIST_ACTIVITY, GROUP_LIST_DESCRIPTION}, utilities::{parse_regex_match, select_first_element_text, selector}, + ParseError, }; /// The group list from the [`/groups`](https://tildes.net/groups) page. @@ -34,7 +35,7 @@ pub struct GroupListSummary { impl GroupList { /// Parses a [`GroupList`] from a [`scraper::Html`] tree. - pub fn from_html(html: &Html) -> Result { + pub fn from_html(html: &Html) -> Result { let summaries = html .select(&selector(".group-list li")) .map(|parent| { @@ -54,17 +55,18 @@ impl GroupList { .unwrap_or_default() }; - GroupListSummary { + Ok(GroupListSummary { comment_activity: activity_counts.0, description: select_first_element_text( parent, &GROUP_LIST_DESCRIPTION, ), - name: select_first_element_text(parent, &GROUP_LINK).unwrap(), + name: select_first_element_text(parent, &GROUP_LINK) + .ok_or(ParseError::MissingExpectedHtml)?, topic_activity: activity_counts.1, - } + }) }) - .collect(); + .collect::>()?; Ok(Self { summaries }) } diff --git a/source/lib.rs b/source/lib.rs index 676e1bd..d023fc9 100644 --- a/source/lib.rs +++ b/source/lib.rs @@ -24,11 +24,13 @@ pub mod regexes; pub mod selectors; pub mod utilities; +pub(crate) mod error; pub(crate) mod from_str; pub(crate) mod group; pub(crate) mod group_list; pub use { + error::*, group::{Group, GroupWikiLink}, group_list::{GroupList, GroupListSummary}, }; diff --git a/source/utilities.rs b/source/utilities.rs index 30210f4..ac6317a 100644 --- a/source/utilities.rs +++ b/source/utilities.rs @@ -7,17 +7,23 @@ use { scraper::{ElementRef, Selector}, }; -use crate::regexes::DUPLICATE_WHITESPACE_RE; +use crate::{regexes::DUPLICATE_WHITESPACE_RE, ParseError}; /// Shorthand to extract the text and `href` values from an anchor element. -pub fn extract_anchor_values(anchor: ElementRef) -> (String, String) { +pub fn extract_anchor_values( + anchor: ElementRef, +) -> Result<(String, String), ParseError> { let name = DUPLICATE_WHITESPACE_RE .replace_all(&anchor.text().collect::(), " ") .trim() .to_string(); - let href = anchor.value().attr("href").unwrap().to_string(); + let href = anchor + .value() + .attr("href") + .ok_or(ParseError::MissingExpectedHtml)? + .to_string(); - (name, href) + Ok((name, href)) } /// Shorthand to parse a [`regex::Match`] with [`std::str::FromStr`].