Add topic parsing.
This commit is contained in:
		
							parent
							
								
									a2b9efb94b
								
							
						
					
					
						commit
						773a075edf
					
				| 
						 | 
					@ -4,12 +4,13 @@ use std::str::FromStr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use {duplicate::duplicate_item, scraper::Html};
 | 
					use {duplicate::duplicate_item, scraper::Html};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use crate::{Group, GroupList, ParseError};
 | 
					use crate::{Group, GroupList, ParseError, Topic};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[duplicate_item(
 | 
					#[duplicate_item(
 | 
				
			||||||
  _Struct;
 | 
					  _Struct;
 | 
				
			||||||
  [Group];
 | 
					  [Group];
 | 
				
			||||||
  [GroupList];
 | 
					  [GroupList];
 | 
				
			||||||
 | 
					  [Topic];
 | 
				
			||||||
)]
 | 
					)]
 | 
				
			||||||
impl FromStr for _Struct {
 | 
					impl FromStr for _Struct {
 | 
				
			||||||
  type Err = ParseError;
 | 
					  type Err = ParseError;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -28,9 +28,11 @@ pub(crate) mod error;
 | 
				
			||||||
pub(crate) mod from_str;
 | 
					pub(crate) mod from_str;
 | 
				
			||||||
pub(crate) mod group;
 | 
					pub(crate) mod group;
 | 
				
			||||||
pub(crate) mod group_list;
 | 
					pub(crate) mod group_list;
 | 
				
			||||||
 | 
					pub(crate) mod topic;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub use {
 | 
					pub use {
 | 
				
			||||||
  error::*,
 | 
					  error::*,
 | 
				
			||||||
  group::{Group, GroupWikiLink},
 | 
					  group::{Group, GroupWikiLink},
 | 
				
			||||||
  group_list::{GroupList, GroupListSummary},
 | 
					  group_list::{GroupList, GroupListSummary},
 | 
				
			||||||
 | 
					  topic::*,
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -28,4 +28,28 @@ lazy_static! {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  /// Selector for group wiki links.
 | 
					  /// Selector for group wiki links.
 | 
				
			||||||
  pub static ref GROUP_WIKI_LINKS: Selector = selector(r#"#sidebar [href*="/wiki/"]"#);
 | 
					  pub static ref GROUP_WIKI_LINKS: Selector = selector(r#"#sidebar [href*="/wiki/"]"#);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /// Selector for the topic comment count.
 | 
				
			||||||
 | 
					  pub static ref TOPIC_COMMENT_COUNT: Selector = selector(".topic-comments-header h2");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /// Selector for the topic full byline.
 | 
				
			||||||
 | 
					  pub static ref TOPIC_FULL_BYLINE: Selector = selector(".topic-full-byline");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /// Selector for a link topic's content.
 | 
				
			||||||
 | 
					  pub static ref TOPIC_FULL_LINK: Selector = selector(".topic-full-link a");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /// Selector for the topic tag elements.
 | 
				
			||||||
 | 
					  pub static ref TOPIC_FULL_TAGS: Selector = selector(".topic-full-tags a");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /// Selector for a text topic's content.
 | 
				
			||||||
 | 
					  pub static ref TOPIC_FULL_TEXT: Selector = selector(".topic-full-text");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /// Selector for the main topic `<article>`.
 | 
				
			||||||
 | 
					  pub static ref TOPIC_MAIN_ARTICLE: Selector = selector("main > .topic-full");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /// Selector for a topic toast warning.
 | 
				
			||||||
 | 
					  pub static ref TOPIC_TOAST_WARNING: Selector = selector(".toast.toast-warning");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /// Selector for the topic vote count.
 | 
				
			||||||
 | 
					  pub static ref TOPIC_VOTE_COUNT: Selector = selector(".topic-voting-votes");
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,170 @@
 | 
				
			||||||
 | 
					//! Parsing for `/~<group>/<topic-id>`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use scraper::Html;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use crate::{
 | 
				
			||||||
 | 
					  regexes::DUPLICATE_WHITESPACE_RE,
 | 
				
			||||||
 | 
					  selectors::{
 | 
				
			||||||
 | 
					    TOPIC_COMMENT_COUNT, TOPIC_FULL_BYLINE, TOPIC_FULL_LINK, TOPIC_FULL_TAGS,
 | 
				
			||||||
 | 
					    TOPIC_FULL_TEXT, TOPIC_MAIN_ARTICLE, TOPIC_TOAST_WARNING, TOPIC_VOTE_COUNT,
 | 
				
			||||||
 | 
					  },
 | 
				
			||||||
 | 
					  utilities::select_first_element_text,
 | 
				
			||||||
 | 
					  ParseError,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// A Tildes topic.
 | 
				
			||||||
 | 
					#[derive(Debug)]
 | 
				
			||||||
 | 
					pub struct Topic {
 | 
				
			||||||
 | 
					  /// The name of the author.
 | 
				
			||||||
 | 
					  pub author: TopicAuthor,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /// The amount of comments the topic has. Comments themselves have to be
 | 
				
			||||||
 | 
					  /// parsed separately.
 | 
				
			||||||
 | 
					  pub comment_total: i32,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /// The content of the topic.
 | 
				
			||||||
 | 
					  pub content: TopicContent,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /// The unique ID of the topic.
 | 
				
			||||||
 | 
					  pub id: String,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /// Whether the topic is locked.
 | 
				
			||||||
 | 
					  pub is_locked: bool,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /// Whether the topic is official (not yet implemented, is always false).
 | 
				
			||||||
 | 
					  ///
 | 
				
			||||||
 | 
					  /// TODO: Add is_official. This isn't possible right now because topics don't
 | 
				
			||||||
 | 
					  /// have any indicator of being marked as official. The only place it's shown
 | 
				
			||||||
 | 
					  /// is in the topic listing. See #787 in the Tildes issue tracker.
 | 
				
			||||||
 | 
					  pub is_official: bool,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /// All tags applied to the topic.
 | 
				
			||||||
 | 
					  pub tags: Vec<String>,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /// The amount of votes the topic has received.
 | 
				
			||||||
 | 
					  pub vote_count: i32,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// All the different ways a topic author can be represented.
 | 
				
			||||||
 | 
					#[derive(Debug)]
 | 
				
			||||||
 | 
					pub enum TopicAuthor {
 | 
				
			||||||
 | 
					  /// The normal case, where the topic author is available.
 | 
				
			||||||
 | 
					  Name(String),
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /// The topic was posted by Tildes itself.
 | 
				
			||||||
 | 
					  ///
 | 
				
			||||||
 | 
					  /// Technically the user for this is [Tildes](https://tildes.net/user/tildes)
 | 
				
			||||||
 | 
					  /// but in the topic it says "Automatically posted <date>" where the username
 | 
				
			||||||
 | 
					  /// normally goes, so may as well special-case it here too.
 | 
				
			||||||
 | 
					  Scheduled,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /// The user was banned, deleted their account or disassociated the topic from
 | 
				
			||||||
 | 
					  /// their account.
 | 
				
			||||||
 | 
					  Unknown,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// The different types of content a topic can have.
 | 
				
			||||||
 | 
					#[derive(Debug)]
 | 
				
			||||||
 | 
					pub enum TopicContent {
 | 
				
			||||||
 | 
					  /// The topic is a link topic pointing to an external site.
 | 
				
			||||||
 | 
					  Link(String),
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /// The topic is a text topic with a HTML body.
 | 
				
			||||||
 | 
					  Text(String),
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /// The topic's content is no longer available.
 | 
				
			||||||
 | 
					  Unknown,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl Topic {
 | 
				
			||||||
 | 
					  /// Parses a [`Topic`] from a [`scraper::Html`] tree.
 | 
				
			||||||
 | 
					  pub fn from_html(html: &Html) -> Result<Self, ParseError> {
 | 
				
			||||||
 | 
					    let topic_article_element = html
 | 
				
			||||||
 | 
					      .select(&TOPIC_MAIN_ARTICLE)
 | 
				
			||||||
 | 
					      .next()
 | 
				
			||||||
 | 
					      .ok_or(ParseError::MissingExpectedHtml)?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let topic_byline =
 | 
				
			||||||
 | 
					      select_first_element_text(topic_article_element, &TOPIC_FULL_BYLINE)
 | 
				
			||||||
 | 
					        .map(|byline| {
 | 
				
			||||||
 | 
					          DUPLICATE_WHITESPACE_RE
 | 
				
			||||||
 | 
					            .replace_all(&byline, " ")
 | 
				
			||||||
 | 
					            .to_string()
 | 
				
			||||||
 | 
					        })
 | 
				
			||||||
 | 
					        .ok_or(ParseError::MissingExpectedHtml)?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let author = if topic_byline.starts_with("Automatically posted") {
 | 
				
			||||||
 | 
					      TopicAuthor::Scheduled
 | 
				
			||||||
 | 
					    } else if topic_byline.ends_with("unknown user") {
 | 
				
			||||||
 | 
					      TopicAuthor::Unknown
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      TopicAuthor::Name(
 | 
				
			||||||
 | 
					        topic_byline
 | 
				
			||||||
 | 
					          .split(" ")
 | 
				
			||||||
 | 
					          .last()
 | 
				
			||||||
 | 
					          .ok_or(ParseError::MissingExpectedHtml)?
 | 
				
			||||||
 | 
					          .to_string(),
 | 
				
			||||||
 | 
					      )
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let comment_total = if let Some(comment_total) =
 | 
				
			||||||
 | 
					      select_first_element_text(topic_article_element, &TOPIC_COMMENT_COUNT)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      comment_total
 | 
				
			||||||
 | 
					        .split(" ")
 | 
				
			||||||
 | 
					        .next()
 | 
				
			||||||
 | 
					        .map(|count| count.parse::<i32>())
 | 
				
			||||||
 | 
					        .ok_or(ParseError::MissingExpectedHtml)?
 | 
				
			||||||
 | 
					        .map_err(|_| ParseError::MissingExpectedHtml)?
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      0
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let content = if let Some(link_content) =
 | 
				
			||||||
 | 
					      topic_article_element.select(&TOPIC_FULL_LINK).next()
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      TopicContent::Link(link_content.text().collect::<String>())
 | 
				
			||||||
 | 
					    } else if let Some(text_content) =
 | 
				
			||||||
 | 
					      topic_article_element.select(&TOPIC_FULL_TEXT).next()
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      TopicContent::Text(text_content.inner_html().trim().to_string())
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      TopicContent::Unknown
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let id = topic_article_element
 | 
				
			||||||
 | 
					      .value()
 | 
				
			||||||
 | 
					      .id()
 | 
				
			||||||
 | 
					      .ok_or(ParseError::MissingExpectedHtml)?[6..]
 | 
				
			||||||
 | 
					      .to_string();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let is_locked =
 | 
				
			||||||
 | 
					      select_first_element_text(topic_article_element, &TOPIC_TOAST_WARNING)
 | 
				
			||||||
 | 
					        .map(|toast| toast.contains("This topic is locked."))
 | 
				
			||||||
 | 
					        .unwrap_or_default();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let tags = topic_article_element
 | 
				
			||||||
 | 
					      .select(&TOPIC_FULL_TAGS)
 | 
				
			||||||
 | 
					      .map(|tag| tag.text().collect::<String>())
 | 
				
			||||||
 | 
					      .collect::<Vec<_>>();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let vote_count =
 | 
				
			||||||
 | 
					      select_first_element_text(topic_article_element, &TOPIC_VOTE_COUNT)
 | 
				
			||||||
 | 
					        .map(|vote_count| vote_count.parse::<i32>())
 | 
				
			||||||
 | 
					        .ok_or(ParseError::MissingExpectedHtml)?
 | 
				
			||||||
 | 
					        .map_err(|_| ParseError::MissingExpectedHtml)?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let topic = Topic {
 | 
				
			||||||
 | 
					      author,
 | 
				
			||||||
 | 
					      comment_total,
 | 
				
			||||||
 | 
					      content,
 | 
				
			||||||
 | 
					      id,
 | 
				
			||||||
 | 
					      is_locked,
 | 
				
			||||||
 | 
					      is_official: false, // TODO: Implement this once it can be done.
 | 
				
			||||||
 | 
					      tags,
 | 
				
			||||||
 | 
					      vote_count,
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Ok(topic)
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Reference in New Issue