Replace color-eyre usage with our own error types using thiserror.
This commit is contained in:
parent
9e36676a7f
commit
a2b9efb94b
|
@ -0,0 +1,13 @@
|
||||||
|
//! All error types.
|
||||||
|
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
/// Errors that can happen while parsing.
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum ParseError {
|
||||||
|
/// The error for when HTML text or attributes are missing and they should be
|
||||||
|
/// present. This could be user error by having an edge case not covered, or
|
||||||
|
/// Tildes could have been updated and the HTML changed in the meantime.
|
||||||
|
#[error("Missing expected HTML values")]
|
||||||
|
MissingExpectedHtml,
|
||||||
|
}
|
|
@ -2,9 +2,9 @@
|
||||||
|
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use {color_eyre::eyre::Error, duplicate::duplicate_item, scraper::Html};
|
use {duplicate::duplicate_item, scraper::Html};
|
||||||
|
|
||||||
use crate::{Group, GroupList};
|
use crate::{Group, GroupList, ParseError};
|
||||||
|
|
||||||
#[duplicate_item(
|
#[duplicate_item(
|
||||||
_Struct;
|
_Struct;
|
||||||
|
@ -12,7 +12,7 @@ use crate::{Group, GroupList};
|
||||||
[GroupList];
|
[GroupList];
|
||||||
)]
|
)]
|
||||||
impl FromStr for _Struct {
|
impl FromStr for _Struct {
|
||||||
type Err = Error;
|
type Err = ParseError;
|
||||||
|
|
||||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
let html = Html::parse_document(s);
|
let html = Html::parse_document(s);
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
//! Parsing for `/~<group>`.
|
//! Parsing for `/~<group>`.
|
||||||
|
|
||||||
use {color_eyre::Result, scraper::Html};
|
use scraper::Html;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
regexes::GROUP_SUBSCRIBERS_RE,
|
regexes::GROUP_SUBSCRIBERS_RE,
|
||||||
|
@ -11,6 +11,7 @@ use crate::{
|
||||||
utilities::{
|
utilities::{
|
||||||
extract_anchor_values, parse_regex_match, select_first_element_text,
|
extract_anchor_values, parse_regex_match, select_first_element_text,
|
||||||
},
|
},
|
||||||
|
ParseError,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// A group's information.
|
/// A group's information.
|
||||||
|
@ -44,37 +45,37 @@ pub struct GroupWikiLink {
|
||||||
|
|
||||||
impl Group {
|
impl Group {
|
||||||
/// Parses a [`Group`] from a [`scraper::Html`] tree.
|
/// Parses a [`Group`] from a [`scraper::Html`] tree.
|
||||||
pub fn from_html(html: &Html) -> Result<Self> {
|
pub fn from_html(html: &Html) -> Result<Self, ParseError> {
|
||||||
let description =
|
let description =
|
||||||
select_first_element_text(html.root_element(), &GROUP_DESCRIPTION);
|
select_first_element_text(html.root_element(), &GROUP_DESCRIPTION);
|
||||||
|
|
||||||
let name =
|
let name = select_first_element_text(html.root_element(), &GROUP_NAME)
|
||||||
select_first_element_text(html.root_element(), &GROUP_NAME).unwrap();
|
.ok_or(ParseError::MissingExpectedHtml)?;
|
||||||
|
|
||||||
let subscribers = parse_regex_match(
|
let subscribers = parse_regex_match(
|
||||||
GROUP_SUBSCRIBERS_RE
|
GROUP_SUBSCRIBERS_RE
|
||||||
.captures_iter(
|
.captures_iter(
|
||||||
&select_first_element_text(html.root_element(), &GROUP_SUBSCRIBERS)
|
&select_first_element_text(html.root_element(), &GROUP_SUBSCRIBERS)
|
||||||
.unwrap(),
|
.ok_or(ParseError::MissingExpectedHtml)?,
|
||||||
)
|
)
|
||||||
.next()
|
.next()
|
||||||
.unwrap()
|
.ok_or(ParseError::MissingExpectedHtml)?
|
||||||
.name("count"),
|
.name("count"),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.ok_or(ParseError::MissingExpectedHtml)?;
|
||||||
|
|
||||||
let sub_groups = html
|
let sub_groups = html
|
||||||
.select(&GROUP_SUB_GROUP_LINKS)
|
.select(&GROUP_SUB_GROUP_LINKS)
|
||||||
.map(|element| extract_anchor_values(element).0)
|
.map(|element| Ok(extract_anchor_values(element)?.0))
|
||||||
.collect();
|
.collect::<Result<_, _>>()?;
|
||||||
|
|
||||||
let wiki_links = html
|
let wiki_links = html
|
||||||
.select(&GROUP_WIKI_LINKS)
|
.select(&GROUP_WIKI_LINKS)
|
||||||
.map(|element| {
|
.map(|element| {
|
||||||
let (name, url) = extract_anchor_values(element);
|
let (name, url) = extract_anchor_values(element)?;
|
||||||
GroupWikiLink { name, url }
|
Ok(GroupWikiLink { name, url })
|
||||||
})
|
})
|
||||||
.collect();
|
.collect::<Result<_, _>>()?;
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
description,
|
description,
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
//! Parsing for [`/groups`](https://tildes.net/groups).
|
//! Parsing for [`/groups`](https://tildes.net/groups).
|
||||||
|
|
||||||
use {color_eyre::Result, scraper::Html};
|
use scraper::Html;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
regexes::{DUPLICATE_WHITESPACE_RE, GROUP_LIST_ACTIVITY_RE},
|
regexes::{DUPLICATE_WHITESPACE_RE, GROUP_LIST_ACTIVITY_RE},
|
||||||
selectors::{GROUP_LINK, GROUP_LIST_ACTIVITY, GROUP_LIST_DESCRIPTION},
|
selectors::{GROUP_LINK, GROUP_LIST_ACTIVITY, GROUP_LIST_DESCRIPTION},
|
||||||
utilities::{parse_regex_match, select_first_element_text, selector},
|
utilities::{parse_regex_match, select_first_element_text, selector},
|
||||||
|
ParseError,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// The group list from the [`/groups`](https://tildes.net/groups) page.
|
/// The group list from the [`/groups`](https://tildes.net/groups) page.
|
||||||
|
@ -34,7 +35,7 @@ pub struct GroupListSummary {
|
||||||
|
|
||||||
impl GroupList {
|
impl GroupList {
|
||||||
/// Parses a [`GroupList`] from a [`scraper::Html`] tree.
|
/// Parses a [`GroupList`] from a [`scraper::Html`] tree.
|
||||||
pub fn from_html(html: &Html) -> Result<Self> {
|
pub fn from_html(html: &Html) -> Result<Self, ParseError> {
|
||||||
let summaries = html
|
let summaries = html
|
||||||
.select(&selector(".group-list li"))
|
.select(&selector(".group-list li"))
|
||||||
.map(|parent| {
|
.map(|parent| {
|
||||||
|
@ -54,17 +55,18 @@ impl GroupList {
|
||||||
.unwrap_or_default()
|
.unwrap_or_default()
|
||||||
};
|
};
|
||||||
|
|
||||||
GroupListSummary {
|
Ok(GroupListSummary {
|
||||||
comment_activity: activity_counts.0,
|
comment_activity: activity_counts.0,
|
||||||
description: select_first_element_text(
|
description: select_first_element_text(
|
||||||
parent,
|
parent,
|
||||||
&GROUP_LIST_DESCRIPTION,
|
&GROUP_LIST_DESCRIPTION,
|
||||||
),
|
),
|
||||||
name: select_first_element_text(parent, &GROUP_LINK).unwrap(),
|
name: select_first_element_text(parent, &GROUP_LINK)
|
||||||
|
.ok_or(ParseError::MissingExpectedHtml)?,
|
||||||
topic_activity: activity_counts.1,
|
topic_activity: activity_counts.1,
|
||||||
}
|
|
||||||
})
|
})
|
||||||
.collect();
|
})
|
||||||
|
.collect::<Result<_, _>>()?;
|
||||||
|
|
||||||
Ok(Self { summaries })
|
Ok(Self { summaries })
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,11 +24,13 @@ pub mod regexes;
|
||||||
pub mod selectors;
|
pub mod selectors;
|
||||||
pub mod utilities;
|
pub mod utilities;
|
||||||
|
|
||||||
|
pub(crate) mod error;
|
||||||
pub(crate) mod from_str;
|
pub(crate) mod from_str;
|
||||||
pub(crate) mod group;
|
pub(crate) mod group;
|
||||||
pub(crate) mod group_list;
|
pub(crate) mod group_list;
|
||||||
|
|
||||||
pub use {
|
pub use {
|
||||||
|
error::*,
|
||||||
group::{Group, GroupWikiLink},
|
group::{Group, GroupWikiLink},
|
||||||
group_list::{GroupList, GroupListSummary},
|
group_list::{GroupList, GroupListSummary},
|
||||||
};
|
};
|
||||||
|
|
|
@ -7,17 +7,23 @@ use {
|
||||||
scraper::{ElementRef, Selector},
|
scraper::{ElementRef, Selector},
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::regexes::DUPLICATE_WHITESPACE_RE;
|
use crate::{regexes::DUPLICATE_WHITESPACE_RE, ParseError};
|
||||||
|
|
||||||
/// Shorthand to extract the text and `href` values from an anchor element.
|
/// Shorthand to extract the text and `href` values from an anchor element.
|
||||||
pub fn extract_anchor_values(anchor: ElementRef) -> (String, String) {
|
pub fn extract_anchor_values(
|
||||||
|
anchor: ElementRef,
|
||||||
|
) -> Result<(String, String), ParseError> {
|
||||||
let name = DUPLICATE_WHITESPACE_RE
|
let name = DUPLICATE_WHITESPACE_RE
|
||||||
.replace_all(&anchor.text().collect::<String>(), " ")
|
.replace_all(&anchor.text().collect::<String>(), " ")
|
||||||
.trim()
|
.trim()
|
||||||
.to_string();
|
.to_string();
|
||||||
let href = anchor.value().attr("href").unwrap().to_string();
|
let href = anchor
|
||||||
|
.value()
|
||||||
|
.attr("href")
|
||||||
|
.ok_or(ParseError::MissingExpectedHtml)?
|
||||||
|
.to_string();
|
||||||
|
|
||||||
(name, href)
|
Ok((name, href))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Shorthand to parse a [`regex::Match`] with [`std::str::FromStr`].
|
/// Shorthand to parse a [`regex::Match`] with [`std::str::FromStr`].
|
||||||
|
|
Loading…
Reference in New Issue