Add group parsing to Topic.

This commit is contained in:
Bauke 2023-06-22 12:55:35 +02:00
parent fdc9fb8b98
commit 6f0d4d394d
Signed by: Bauke
GPG Key ID: C1C0F29952BCF558
10 changed files with 55 additions and 3 deletions

View File

@ -29,6 +29,9 @@ lazy_static! {
/// Selector for group wiki links.
pub static ref GROUP_WIKI_LINKS: Selector = selector(r#"#sidebar [href*="/wiki/"]"#);
/// Selector for the site header context.
pub static ref SITE_HEADER_CONTEXT: Selector = selector(".site-header-context");
/// Selector for the topic comment count.
pub static ref TOPIC_COMMENT_COUNT: Selector = selector(".topic-comments-header h2");

View File

@ -5,8 +5,9 @@ use scraper::Html;
use crate::{
regexes::DUPLICATE_WHITESPACE_RE,
selectors::{
TOPIC_COMMENT_COUNT, TOPIC_FULL_BYLINE, TOPIC_FULL_LINK, TOPIC_FULL_TAGS,
TOPIC_FULL_TEXT, TOPIC_MAIN_ARTICLE, TOPIC_TOAST_WARNING, TOPIC_VOTE_COUNT,
SITE_HEADER_CONTEXT, TOPIC_COMMENT_COUNT, TOPIC_FULL_BYLINE,
TOPIC_FULL_LINK, TOPIC_FULL_TAGS, TOPIC_FULL_TEXT, TOPIC_MAIN_ARTICLE,
TOPIC_TOAST_WARNING, TOPIC_VOTE_COUNT,
},
utilities::select_first_element_text,
ParseError,
@ -25,6 +26,9 @@ pub struct Topic {
/// The content of the topic.
pub content: TopicContent,
/// The group the topic was posted in, with a leading tilde character.
pub group: String,
/// The unique ID of the topic.
pub id: String,
@ -132,6 +136,20 @@ impl Topic {
TopicContent::Unknown
};
let group = DUPLICATE_WHITESPACE_RE
.replace_all(
html
.select(&SITE_HEADER_CONTEXT)
.next()
.ok_or(ParseError::MissingExpectedHtml)?
.text()
.collect::<String>()
.trim(),
"",
)
.to_string();
assert!(group.starts_with("~"));
let id = topic_article_element
.value()
.id()
@ -158,6 +176,7 @@ impl Topic {
author,
comment_total,
content,
group,
id,
is_locked,
is_official: false, // TODO: Implement this once it can be done.

View File

@ -5,7 +5,13 @@
<title>Deleted topic sample for topic.rs</title>
</head>
<body class="theme-white">
<body>
<header>
<div class="site-header-context">
<a href="/~group">~group</a>
</div>
</header>
<main>
<article id="topic-666" class="topic-full">
<header>

View File

@ -6,6 +6,12 @@
</head>
<body>
<header>
<div class="site-header-context">
<a href="/~group">~group</a>
</div>
</header>
<main>
<article id="topic-456" class="topic-full">
<header>

View File

@ -6,6 +6,14 @@
</head>
<body>
<header>
<div class="site-header-context">
<a href="/~group">~group</a>
.
<a href="/~group.recurring">recurring</a>
</div>
</header>
<main>
<article id="topic-789" class="topic-full">
<header>

View File

@ -6,6 +6,12 @@
</head>
<body>
<header>
<div class="site-header-context">
<a href="/~group">~group</a>
</div>
</header>
<main>
<article id="topic-123" class="topic-full">
<header>

View File

@ -6,6 +6,7 @@ Topic {
author: Unknown,
comment_total: 2,
content: Unknown,
group: "~group",
id: "666",
is_locked: false,
is_official: false,

View File

@ -10,6 +10,7 @@ Topic {
content: Link(
"https://tildes.net/~test",
),
group: "~group",
id: "456",
is_locked: false,
is_official: false,

View File

@ -8,6 +8,7 @@ Topic {
content: Text(
"<p>Scheduled Topic Text</p>",
),
group: "~group.recurring",
id: "789",
is_locked: false,
is_official: false,

View File

@ -10,6 +10,7 @@ Topic {
content: Text(
"<p>Topic Text</p>",
),
group: "~group",
id: "123",
is_locked: true,
is_official: false,