Replace color-eyre usage with our own error types using thiserror.

This commit is contained in:
Bauke 2023-06-09 21:03:38 +02:00
parent 9e36676a7f
commit a2b9efb94b
Signed by: Bauke
GPG Key ID: C1C0F29952BCF558
6 changed files with 49 additions and 25 deletions

13
source/error.rs Normal file
View File

@ -0,0 +1,13 @@
//! All error types.
use thiserror::Error;
/// Errors that can happen while parsing.
#[derive(Debug, Error)]
pub enum ParseError {
/// The error for when HTML text or attributes are missing and they should be
/// present. This could be user error by having an edge case not covered, or
/// Tildes could have been updated and the HTML changed in the meantime.
#[error("Missing expected HTML values")]
MissingExpectedHtml,
}

View File

@ -2,9 +2,9 @@
use std::str::FromStr; use std::str::FromStr;
use {color_eyre::eyre::Error, duplicate::duplicate_item, scraper::Html}; use {duplicate::duplicate_item, scraper::Html};
use crate::{Group, GroupList}; use crate::{Group, GroupList, ParseError};
#[duplicate_item( #[duplicate_item(
_Struct; _Struct;
@ -12,7 +12,7 @@ use crate::{Group, GroupList};
[GroupList]; [GroupList];
)] )]
impl FromStr for _Struct { impl FromStr for _Struct {
type Err = Error; type Err = ParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> { fn from_str(s: &str) -> Result<Self, Self::Err> {
let html = Html::parse_document(s); let html = Html::parse_document(s);

View File

@ -1,6 +1,6 @@
//! Parsing for `/~<group>`. //! Parsing for `/~<group>`.
use {color_eyre::Result, scraper::Html}; use scraper::Html;
use crate::{ use crate::{
regexes::GROUP_SUBSCRIBERS_RE, regexes::GROUP_SUBSCRIBERS_RE,
@ -11,6 +11,7 @@ use crate::{
utilities::{ utilities::{
extract_anchor_values, parse_regex_match, select_first_element_text, extract_anchor_values, parse_regex_match, select_first_element_text,
}, },
ParseError,
}; };
/// A group's information. /// A group's information.
@ -44,37 +45,37 @@ pub struct GroupWikiLink {
impl Group { impl Group {
/// Parses a [`Group`] from a [`scraper::Html`] tree. /// Parses a [`Group`] from a [`scraper::Html`] tree.
pub fn from_html(html: &Html) -> Result<Self> { pub fn from_html(html: &Html) -> Result<Self, ParseError> {
let description = let description =
select_first_element_text(html.root_element(), &GROUP_DESCRIPTION); select_first_element_text(html.root_element(), &GROUP_DESCRIPTION);
let name = let name = select_first_element_text(html.root_element(), &GROUP_NAME)
select_first_element_text(html.root_element(), &GROUP_NAME).unwrap(); .ok_or(ParseError::MissingExpectedHtml)?;
let subscribers = parse_regex_match( let subscribers = parse_regex_match(
GROUP_SUBSCRIBERS_RE GROUP_SUBSCRIBERS_RE
.captures_iter( .captures_iter(
&select_first_element_text(html.root_element(), &GROUP_SUBSCRIBERS) &select_first_element_text(html.root_element(), &GROUP_SUBSCRIBERS)
.unwrap(), .ok_or(ParseError::MissingExpectedHtml)?,
) )
.next() .next()
.unwrap() .ok_or(ParseError::MissingExpectedHtml)?
.name("count"), .name("count"),
) )
.unwrap(); .ok_or(ParseError::MissingExpectedHtml)?;
let sub_groups = html let sub_groups = html
.select(&GROUP_SUB_GROUP_LINKS) .select(&GROUP_SUB_GROUP_LINKS)
.map(|element| extract_anchor_values(element).0) .map(|element| Ok(extract_anchor_values(element)?.0))
.collect(); .collect::<Result<_, _>>()?;
let wiki_links = html let wiki_links = html
.select(&GROUP_WIKI_LINKS) .select(&GROUP_WIKI_LINKS)
.map(|element| { .map(|element| {
let (name, url) = extract_anchor_values(element); let (name, url) = extract_anchor_values(element)?;
GroupWikiLink { name, url } Ok(GroupWikiLink { name, url })
}) })
.collect(); .collect::<Result<_, _>>()?;
Ok(Self { Ok(Self {
description, description,

View File

@ -1,11 +1,12 @@
//! Parsing for [`/groups`](https://tildes.net/groups). //! Parsing for [`/groups`](https://tildes.net/groups).
use {color_eyre::Result, scraper::Html}; use scraper::Html;
use crate::{ use crate::{
regexes::{DUPLICATE_WHITESPACE_RE, GROUP_LIST_ACTIVITY_RE}, regexes::{DUPLICATE_WHITESPACE_RE, GROUP_LIST_ACTIVITY_RE},
selectors::{GROUP_LINK, GROUP_LIST_ACTIVITY, GROUP_LIST_DESCRIPTION}, selectors::{GROUP_LINK, GROUP_LIST_ACTIVITY, GROUP_LIST_DESCRIPTION},
utilities::{parse_regex_match, select_first_element_text, selector}, utilities::{parse_regex_match, select_first_element_text, selector},
ParseError,
}; };
/// The group list from the [`/groups`](https://tildes.net/groups) page. /// The group list from the [`/groups`](https://tildes.net/groups) page.
@ -34,7 +35,7 @@ pub struct GroupListSummary {
impl GroupList { impl GroupList {
/// Parses a [`GroupList`] from a [`scraper::Html`] tree. /// Parses a [`GroupList`] from a [`scraper::Html`] tree.
pub fn from_html(html: &Html) -> Result<Self> { pub fn from_html(html: &Html) -> Result<Self, ParseError> {
let summaries = html let summaries = html
.select(&selector(".group-list li")) .select(&selector(".group-list li"))
.map(|parent| { .map(|parent| {
@ -54,17 +55,18 @@ impl GroupList {
.unwrap_or_default() .unwrap_or_default()
}; };
GroupListSummary { Ok(GroupListSummary {
comment_activity: activity_counts.0, comment_activity: activity_counts.0,
description: select_first_element_text( description: select_first_element_text(
parent, parent,
&GROUP_LIST_DESCRIPTION, &GROUP_LIST_DESCRIPTION,
), ),
name: select_first_element_text(parent, &GROUP_LINK).unwrap(), name: select_first_element_text(parent, &GROUP_LINK)
.ok_or(ParseError::MissingExpectedHtml)?,
topic_activity: activity_counts.1, topic_activity: activity_counts.1,
}
}) })
.collect(); })
.collect::<Result<_, _>>()?;
Ok(Self { summaries }) Ok(Self { summaries })
} }

View File

@ -24,11 +24,13 @@ pub mod regexes;
pub mod selectors; pub mod selectors;
pub mod utilities; pub mod utilities;
pub(crate) mod error;
pub(crate) mod from_str; pub(crate) mod from_str;
pub(crate) mod group; pub(crate) mod group;
pub(crate) mod group_list; pub(crate) mod group_list;
pub use { pub use {
error::*,
group::{Group, GroupWikiLink}, group::{Group, GroupWikiLink},
group_list::{GroupList, GroupListSummary}, group_list::{GroupList, GroupListSummary},
}; };

View File

@ -7,17 +7,23 @@ use {
scraper::{ElementRef, Selector}, scraper::{ElementRef, Selector},
}; };
use crate::regexes::DUPLICATE_WHITESPACE_RE; use crate::{regexes::DUPLICATE_WHITESPACE_RE, ParseError};
/// Shorthand to extract the text and `href` values from an anchor element. /// Shorthand to extract the text and `href` values from an anchor element.
pub fn extract_anchor_values(anchor: ElementRef) -> (String, String) { pub fn extract_anchor_values(
anchor: ElementRef,
) -> Result<(String, String), ParseError> {
let name = DUPLICATE_WHITESPACE_RE let name = DUPLICATE_WHITESPACE_RE
.replace_all(&anchor.text().collect::<String>(), " ") .replace_all(&anchor.text().collect::<String>(), " ")
.trim() .trim()
.to_string(); .to_string();
let href = anchor.value().attr("href").unwrap().to_string(); let href = anchor
.value()
.attr("href")
.ok_or(ParseError::MissingExpectedHtml)?
.to_string();
(name, href) Ok((name, href))
} }
/// Shorthand to parse a [`regex::Match`] with [`std::str::FromStr`]. /// Shorthand to parse a [`regex::Match`] with [`std::str::FromStr`].