Replace color-eyre usage with our own error types using thiserror.

This commit is contained in:
Bauke 2023-06-09 21:03:38 +02:00
parent 9e36676a7f
commit a2b9efb94b
Signed by: Bauke
GPG Key ID: C1C0F29952BCF558
6 changed files with 49 additions and 25 deletions

13
source/error.rs Normal file
View File

@ -0,0 +1,13 @@
//! All error types.
use thiserror::Error;
/// Errors that can happen while parsing.
#[derive(Debug, Error)]
pub enum ParseError {
/// The error for when HTML text or attributes are missing and they should be
/// present. This could be user error by having an edge case not covered, or
/// Tildes could have been updated and the HTML changed in the meantime.
#[error("Missing expected HTML values")]
MissingExpectedHtml,
}

View File

@ -2,9 +2,9 @@
use std::str::FromStr;
use {color_eyre::eyre::Error, duplicate::duplicate_item, scraper::Html};
use {duplicate::duplicate_item, scraper::Html};
use crate::{Group, GroupList};
use crate::{Group, GroupList, ParseError};
#[duplicate_item(
_Struct;
@ -12,7 +12,7 @@ use crate::{Group, GroupList};
[GroupList];
)]
impl FromStr for _Struct {
type Err = Error;
type Err = ParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let html = Html::parse_document(s);

View File

@ -1,6 +1,6 @@
//! Parsing for `/~<group>`.
use {color_eyre::Result, scraper::Html};
use scraper::Html;
use crate::{
regexes::GROUP_SUBSCRIBERS_RE,
@ -11,6 +11,7 @@ use crate::{
utilities::{
extract_anchor_values, parse_regex_match, select_first_element_text,
},
ParseError,
};
/// A group's information.
@ -44,37 +45,37 @@ pub struct GroupWikiLink {
impl Group {
/// Parses a [`Group`] from a [`scraper::Html`] tree.
pub fn from_html(html: &Html) -> Result<Self> {
pub fn from_html(html: &Html) -> Result<Self, ParseError> {
let description =
select_first_element_text(html.root_element(), &GROUP_DESCRIPTION);
let name =
select_first_element_text(html.root_element(), &GROUP_NAME).unwrap();
let name = select_first_element_text(html.root_element(), &GROUP_NAME)
.ok_or(ParseError::MissingExpectedHtml)?;
let subscribers = parse_regex_match(
GROUP_SUBSCRIBERS_RE
.captures_iter(
&select_first_element_text(html.root_element(), &GROUP_SUBSCRIBERS)
.unwrap(),
.ok_or(ParseError::MissingExpectedHtml)?,
)
.next()
.unwrap()
.ok_or(ParseError::MissingExpectedHtml)?
.name("count"),
)
.unwrap();
.ok_or(ParseError::MissingExpectedHtml)?;
let sub_groups = html
.select(&GROUP_SUB_GROUP_LINKS)
.map(|element| extract_anchor_values(element).0)
.collect();
.map(|element| Ok(extract_anchor_values(element)?.0))
.collect::<Result<_, _>>()?;
let wiki_links = html
.select(&GROUP_WIKI_LINKS)
.map(|element| {
let (name, url) = extract_anchor_values(element);
GroupWikiLink { name, url }
let (name, url) = extract_anchor_values(element)?;
Ok(GroupWikiLink { name, url })
})
.collect();
.collect::<Result<_, _>>()?;
Ok(Self {
description,

View File

@ -1,11 +1,12 @@
//! Parsing for [`/groups`](https://tildes.net/groups).
use {color_eyre::Result, scraper::Html};
use scraper::Html;
use crate::{
regexes::{DUPLICATE_WHITESPACE_RE, GROUP_LIST_ACTIVITY_RE},
selectors::{GROUP_LINK, GROUP_LIST_ACTIVITY, GROUP_LIST_DESCRIPTION},
utilities::{parse_regex_match, select_first_element_text, selector},
ParseError,
};
/// The group list from the [`/groups`](https://tildes.net/groups) page.
@ -34,7 +35,7 @@ pub struct GroupListSummary {
impl GroupList {
/// Parses a [`GroupList`] from a [`scraper::Html`] tree.
pub fn from_html(html: &Html) -> Result<Self> {
pub fn from_html(html: &Html) -> Result<Self, ParseError> {
let summaries = html
.select(&selector(".group-list li"))
.map(|parent| {
@ -54,17 +55,18 @@ impl GroupList {
.unwrap_or_default()
};
GroupListSummary {
Ok(GroupListSummary {
comment_activity: activity_counts.0,
description: select_first_element_text(
parent,
&GROUP_LIST_DESCRIPTION,
),
name: select_first_element_text(parent, &GROUP_LINK).unwrap(),
name: select_first_element_text(parent, &GROUP_LINK)
.ok_or(ParseError::MissingExpectedHtml)?,
topic_activity: activity_counts.1,
}
})
.collect();
})
.collect::<Result<_, _>>()?;
Ok(Self { summaries })
}

View File

@ -24,11 +24,13 @@ pub mod regexes;
pub mod selectors;
pub mod utilities;
pub(crate) mod error;
pub(crate) mod from_str;
pub(crate) mod group;
pub(crate) mod group_list;
pub use {
error::*,
group::{Group, GroupWikiLink},
group_list::{GroupList, GroupListSummary},
};

View File

@ -7,17 +7,23 @@ use {
scraper::{ElementRef, Selector},
};
use crate::regexes::DUPLICATE_WHITESPACE_RE;
use crate::{regexes::DUPLICATE_WHITESPACE_RE, ParseError};
/// Shorthand to extract the text and `href` values from an anchor element.
pub fn extract_anchor_values(anchor: ElementRef) -> (String, String) {
pub fn extract_anchor_values(
anchor: ElementRef,
) -> Result<(String, String), ParseError> {
let name = DUPLICATE_WHITESPACE_RE
.replace_all(&anchor.text().collect::<String>(), " ")
.trim()
.to_string();
let href = anchor.value().attr("href").unwrap().to_string();
let href = anchor
.value()
.attr("href")
.ok_or(ParseError::MissingExpectedHtml)?
.to_string();
(name, href)
Ok((name, href))
}
/// Shorthand to parse a [`regex::Match`] with [`std::str::FromStr`].