Compare commits
No commits in common. "ba008a3ccaf5d1e4f4288429632c9d389e236675" and "a2b9efb94be83d5490bf260f36db3d0624199cfe" have entirely different histories.
ba008a3cca
...
a2b9efb94b
|
@ -4,13 +4,12 @@ use std::str::FromStr;
|
||||||
|
|
||||||
use {duplicate::duplicate_item, scraper::Html};
|
use {duplicate::duplicate_item, scraper::Html};
|
||||||
|
|
||||||
use crate::{Group, GroupList, ParseError, Topic};
|
use crate::{Group, GroupList, ParseError};
|
||||||
|
|
||||||
#[duplicate_item(
|
#[duplicate_item(
|
||||||
_Struct;
|
_Struct;
|
||||||
[Group];
|
[Group];
|
||||||
[GroupList];
|
[GroupList];
|
||||||
[Topic];
|
|
||||||
)]
|
)]
|
||||||
impl FromStr for _Struct {
|
impl FromStr for _Struct {
|
||||||
type Err = ParseError;
|
type Err = ParseError;
|
||||||
|
|
|
@ -28,11 +28,9 @@ pub(crate) mod error;
|
||||||
pub(crate) mod from_str;
|
pub(crate) mod from_str;
|
||||||
pub(crate) mod group;
|
pub(crate) mod group;
|
||||||
pub(crate) mod group_list;
|
pub(crate) mod group_list;
|
||||||
pub(crate) mod topic;
|
|
||||||
|
|
||||||
pub use {
|
pub use {
|
||||||
error::*,
|
error::*,
|
||||||
group::{Group, GroupWikiLink},
|
group::{Group, GroupWikiLink},
|
||||||
group_list::{GroupList, GroupListSummary},
|
group_list::{GroupList, GroupListSummary},
|
||||||
topic::*,
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -28,28 +28,4 @@ lazy_static! {
|
||||||
|
|
||||||
/// Selector for group wiki links.
|
/// Selector for group wiki links.
|
||||||
pub static ref GROUP_WIKI_LINKS: Selector = selector(r#"#sidebar [href*="/wiki/"]"#);
|
pub static ref GROUP_WIKI_LINKS: Selector = selector(r#"#sidebar [href*="/wiki/"]"#);
|
||||||
|
|
||||||
/// Selector for the topic comment count.
|
|
||||||
pub static ref TOPIC_COMMENT_COUNT: Selector = selector(".topic-comments-header h2");
|
|
||||||
|
|
||||||
/// Selector for the topic full byline.
|
|
||||||
pub static ref TOPIC_FULL_BYLINE: Selector = selector(".topic-full-byline");
|
|
||||||
|
|
||||||
/// Selector for a link topic's content.
|
|
||||||
pub static ref TOPIC_FULL_LINK: Selector = selector(".topic-full-link a");
|
|
||||||
|
|
||||||
/// Selector for the topic tag elements.
|
|
||||||
pub static ref TOPIC_FULL_TAGS: Selector = selector(".topic-full-tags a");
|
|
||||||
|
|
||||||
/// Selector for a text topic's content.
|
|
||||||
pub static ref TOPIC_FULL_TEXT: Selector = selector(".topic-full-text");
|
|
||||||
|
|
||||||
/// Selector for the main topic `<article>`.
|
|
||||||
pub static ref TOPIC_MAIN_ARTICLE: Selector = selector("main > .topic-full");
|
|
||||||
|
|
||||||
/// Selector for a topic toast warning.
|
|
||||||
pub static ref TOPIC_TOAST_WARNING: Selector = selector(".toast.toast-warning");
|
|
||||||
|
|
||||||
/// Selector for the topic vote count.
|
|
||||||
pub static ref TOPIC_VOTE_COUNT: Selector = selector(".topic-voting-votes");
|
|
||||||
}
|
}
|
||||||
|
|
170
source/topic.rs
170
source/topic.rs
|
@ -1,170 +0,0 @@
|
||||||
//! Parsing for `/~<group>/<topic-id>`.
|
|
||||||
|
|
||||||
use scraper::Html;
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
regexes::DUPLICATE_WHITESPACE_RE,
|
|
||||||
selectors::{
|
|
||||||
TOPIC_COMMENT_COUNT, TOPIC_FULL_BYLINE, TOPIC_FULL_LINK, TOPIC_FULL_TAGS,
|
|
||||||
TOPIC_FULL_TEXT, TOPIC_MAIN_ARTICLE, TOPIC_TOAST_WARNING, TOPIC_VOTE_COUNT,
|
|
||||||
},
|
|
||||||
utilities::select_first_element_text,
|
|
||||||
ParseError,
|
|
||||||
};
|
|
||||||
|
|
||||||
/// A Tildes topic.
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct Topic {
|
|
||||||
/// The name of the author.
|
|
||||||
pub author: TopicAuthor,
|
|
||||||
|
|
||||||
/// The amount of comments the topic has. Comments themselves have to be
|
|
||||||
/// parsed separately.
|
|
||||||
pub comment_total: i32,
|
|
||||||
|
|
||||||
/// The content of the topic.
|
|
||||||
pub content: TopicContent,
|
|
||||||
|
|
||||||
/// The unique ID of the topic.
|
|
||||||
pub id: String,
|
|
||||||
|
|
||||||
/// Whether the topic is locked.
|
|
||||||
pub is_locked: bool,
|
|
||||||
|
|
||||||
/// Whether the topic is official (not yet implemented, is always false).
|
|
||||||
///
|
|
||||||
/// TODO: Add is_official. This isn't possible right now because topics don't
|
|
||||||
/// have any indicator of being marked as official. The only place it's shown
|
|
||||||
/// is in the topic listing. See #787 in the Tildes issue tracker.
|
|
||||||
pub is_official: bool,
|
|
||||||
|
|
||||||
/// All tags applied to the topic.
|
|
||||||
pub tags: Vec<String>,
|
|
||||||
|
|
||||||
/// The amount of votes the topic has received.
|
|
||||||
pub vote_count: i32,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// All the different ways a topic author can be represented.
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum TopicAuthor {
|
|
||||||
/// The normal case, where the topic author is available.
|
|
||||||
Name(String),
|
|
||||||
|
|
||||||
/// The topic was posted by Tildes itself.
|
|
||||||
///
|
|
||||||
/// Technically the user for this is [Tildes](https://tildes.net/user/tildes)
|
|
||||||
/// but in the topic it says "Automatically posted <date>" where the username
|
|
||||||
/// normally goes, so may as well special-case it here too.
|
|
||||||
Scheduled,
|
|
||||||
|
|
||||||
/// The user was banned, deleted their account or disassociated the topic from
|
|
||||||
/// their account.
|
|
||||||
Unknown,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The different types of content a topic can have.
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum TopicContent {
|
|
||||||
/// The topic is a link topic pointing to an external site.
|
|
||||||
Link(String),
|
|
||||||
|
|
||||||
/// The topic is a text topic with a HTML body.
|
|
||||||
Text(String),
|
|
||||||
|
|
||||||
/// The topic's content is no longer available.
|
|
||||||
Unknown,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Topic {
|
|
||||||
/// Parses a [`Topic`] from a [`scraper::Html`] tree.
|
|
||||||
pub fn from_html(html: &Html) -> Result<Self, ParseError> {
|
|
||||||
let topic_article_element = html
|
|
||||||
.select(&TOPIC_MAIN_ARTICLE)
|
|
||||||
.next()
|
|
||||||
.ok_or(ParseError::MissingExpectedHtml)?;
|
|
||||||
|
|
||||||
let topic_byline =
|
|
||||||
select_first_element_text(topic_article_element, &TOPIC_FULL_BYLINE)
|
|
||||||
.map(|byline| {
|
|
||||||
DUPLICATE_WHITESPACE_RE
|
|
||||||
.replace_all(&byline, " ")
|
|
||||||
.to_string()
|
|
||||||
})
|
|
||||||
.ok_or(ParseError::MissingExpectedHtml)?;
|
|
||||||
|
|
||||||
let author = if topic_byline.starts_with("Automatically posted") {
|
|
||||||
TopicAuthor::Scheduled
|
|
||||||
} else if topic_byline.ends_with("unknown user") {
|
|
||||||
TopicAuthor::Unknown
|
|
||||||
} else {
|
|
||||||
TopicAuthor::Name(
|
|
||||||
topic_byline
|
|
||||||
.split(" ")
|
|
||||||
.last()
|
|
||||||
.ok_or(ParseError::MissingExpectedHtml)?
|
|
||||||
.to_string(),
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
let comment_total = if let Some(comment_total) =
|
|
||||||
select_first_element_text(topic_article_element, &TOPIC_COMMENT_COUNT)
|
|
||||||
{
|
|
||||||
comment_total
|
|
||||||
.split(" ")
|
|
||||||
.next()
|
|
||||||
.map(|count| count.parse::<i32>())
|
|
||||||
.ok_or(ParseError::MissingExpectedHtml)?
|
|
||||||
.map_err(|_| ParseError::MissingExpectedHtml)?
|
|
||||||
} else {
|
|
||||||
0
|
|
||||||
};
|
|
||||||
|
|
||||||
let content = if let Some(link_content) =
|
|
||||||
topic_article_element.select(&TOPIC_FULL_LINK).next()
|
|
||||||
{
|
|
||||||
TopicContent::Link(link_content.text().collect::<String>())
|
|
||||||
} else if let Some(text_content) =
|
|
||||||
topic_article_element.select(&TOPIC_FULL_TEXT).next()
|
|
||||||
{
|
|
||||||
TopicContent::Text(text_content.inner_html().trim().to_string())
|
|
||||||
} else {
|
|
||||||
TopicContent::Unknown
|
|
||||||
};
|
|
||||||
|
|
||||||
let id = topic_article_element
|
|
||||||
.value()
|
|
||||||
.id()
|
|
||||||
.ok_or(ParseError::MissingExpectedHtml)?[6..]
|
|
||||||
.to_string();
|
|
||||||
|
|
||||||
let is_locked =
|
|
||||||
select_first_element_text(topic_article_element, &TOPIC_TOAST_WARNING)
|
|
||||||
.map(|toast| toast.contains("This topic is locked."))
|
|
||||||
.unwrap_or_default();
|
|
||||||
|
|
||||||
let tags = topic_article_element
|
|
||||||
.select(&TOPIC_FULL_TAGS)
|
|
||||||
.map(|tag| tag.text().collect::<String>())
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
let vote_count =
|
|
||||||
select_first_element_text(topic_article_element, &TOPIC_VOTE_COUNT)
|
|
||||||
.map(|vote_count| vote_count.parse::<i32>())
|
|
||||||
.ok_or(ParseError::MissingExpectedHtml)?
|
|
||||||
.map_err(|_| ParseError::MissingExpectedHtml)?;
|
|
||||||
|
|
||||||
let topic = Topic {
|
|
||||||
author,
|
|
||||||
comment_total,
|
|
||||||
content,
|
|
||||||
id,
|
|
||||||
is_locked,
|
|
||||||
is_official: false, // TODO: Implement this once it can be done.
|
|
||||||
tags,
|
|
||||||
vote_count,
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(topic)
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,41 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html lang="en">
|
|
||||||
|
|
||||||
<head>
|
|
||||||
<title>Deleted topic sample for topic.rs</title>
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="theme-white">
|
|
||||||
<main>
|
|
||||||
<article id="topic-666" class="topic-full">
|
|
||||||
<header>
|
|
||||||
<div class="topic-voting">
|
|
||||||
<span class="topic-voting-votes">0</span>
|
|
||||||
<span class="topic-voting-label">votes</span>
|
|
||||||
</div>
|
|
||||||
<h1></h1>
|
|
||||||
|
|
||||||
<div class="topic-full-byline">
|
|
||||||
Posted
|
|
||||||
<time class="time-responsive" datetime="2018-04-26T21:22:47Z"
|
|
||||||
title="2018-04-26 21:22:47 UTC" data-abbreviated="Apr 26, 2018">
|
|
||||||
<span class="time-responsive-full">April 26, 2018</span>
|
|
||||||
</time> by unknown user
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="topic-full-tags"></div>
|
|
||||||
</header>
|
|
||||||
|
|
||||||
<div class="text-error">Topic deleted by author</div>
|
|
||||||
|
|
||||||
<section class="topic-comments">
|
|
||||||
<div class="divider"></div>
|
|
||||||
<header class="topic-comments-header">
|
|
||||||
<h2>2 comments</h2>
|
|
||||||
</header>
|
|
||||||
</section>
|
|
||||||
</article>
|
|
||||||
</main>
|
|
||||||
</body>
|
|
||||||
|
|
||||||
</html>
|
|
|
@ -1,50 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html lang="en">
|
|
||||||
|
|
||||||
<head>
|
|
||||||
<title>Link topic sample for topic.rs</title>
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body>
|
|
||||||
<main>
|
|
||||||
<article id="topic-456" class="topic-full">
|
|
||||||
<header>
|
|
||||||
<div class="topic-voting">
|
|
||||||
<span class="topic-voting-votes">10</span>
|
|
||||||
<span class="topic-voting-label">votes</span>
|
|
||||||
</div>
|
|
||||||
<h1>Link Topic Title</h1>
|
|
||||||
|
|
||||||
<div class="topic-full-byline">
|
|
||||||
Posted
|
|
||||||
<time class="time-responsive" datetime="2021-12-16T17:26:03Z"
|
|
||||||
title="2021-12-16 17:26:03 UTC" data-abbreviated="Dec 16, 2021">
|
|
||||||
<span class="time-responsive-full">December 16, 2021</span>
|
|
||||||
</time>
|
|
||||||
by
|
|
||||||
<a href="/user/AnotherUser" class="link-user">AnotherUser</a>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="topic-full-tags">
|
|
||||||
Tags:
|
|
||||||
<a href="/~group?tag=four">four</a>,
|
|
||||||
<a href="/~group?tag=five">five</a>,
|
|
||||||
<a href="/~group?tag=six">six</a>
|
|
||||||
</div>
|
|
||||||
</header>
|
|
||||||
|
|
||||||
<div class="topic-full-link">
|
|
||||||
<a href="https://tildes.net/~test">https://tildes.net/~test</a>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<section class="topic-comments">
|
|
||||||
<div class="divider"></div>
|
|
||||||
<header class="topic-comments-header">
|
|
||||||
<h2>30 comments</h2>
|
|
||||||
</header>
|
|
||||||
</section>
|
|
||||||
</article>
|
|
||||||
</main>
|
|
||||||
</body>
|
|
||||||
|
|
||||||
</html>
|
|
|
@ -1,46 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html lang="en">
|
|
||||||
|
|
||||||
<head>
|
|
||||||
<title>Text topic sample for topic.rs</title>
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body>
|
|
||||||
<main>
|
|
||||||
<article id="topic-789" class="topic-full">
|
|
||||||
<header>
|
|
||||||
<div class="topic-voting">
|
|
||||||
<span class="topic-voting-votes">50</span>
|
|
||||||
<span class="topic-voting-label">votes</span>
|
|
||||||
</div>
|
|
||||||
<h1>Scheduled Topic Title</h1>
|
|
||||||
|
|
||||||
<div class="topic-full-byline">
|
|
||||||
Automatically posted
|
|
||||||
<time class="time-responsive" datetime="2022-12-16T17:26:03Z"
|
|
||||||
title="2022-12-16 17:26:03 UTC" data-abbreviated="Dec 16, 2022">
|
|
||||||
<span class="time-responsive-full">December 16, 2022</span>
|
|
||||||
</time>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="topic-full-tags">
|
|
||||||
Tags:
|
|
||||||
<a href="/~group?tag=recurring">recurring.weekly</a>
|
|
||||||
</div>
|
|
||||||
</header>
|
|
||||||
|
|
||||||
<div class="topic-full-text">
|
|
||||||
<p>Scheduled Topic Text</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<section class="topic-comments">
|
|
||||||
<div class="divider"></div>
|
|
||||||
<header class="topic-comments-header">
|
|
||||||
<h2>100 comments</h2>
|
|
||||||
</header>
|
|
||||||
</section>
|
|
||||||
</article>
|
|
||||||
</main>
|
|
||||||
</body>
|
|
||||||
|
|
||||||
</html>
|
|
|
@ -1,54 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html lang="en">
|
|
||||||
|
|
||||||
<head>
|
|
||||||
<title>Text topic sample for topic.rs</title>
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body>
|
|
||||||
<main>
|
|
||||||
<article id="topic-123" class="topic-full">
|
|
||||||
<header>
|
|
||||||
<div class="topic-voting">
|
|
||||||
<span class="topic-voting-votes">5</span>
|
|
||||||
<span class="topic-voting-label">votes</span>
|
|
||||||
</div>
|
|
||||||
<h1>Text Topic Title</h1>
|
|
||||||
|
|
||||||
<div class="topic-full-byline">
|
|
||||||
Posted
|
|
||||||
<time class="time-responsive" datetime="2022-12-16T17:26:03Z"
|
|
||||||
title="2022-12-16 17:26:03 UTC" data-abbreviated="Dec 16, 2022">
|
|
||||||
<span class="time-responsive-full">December 16, 2022</span>
|
|
||||||
</time>
|
|
||||||
by
|
|
||||||
<a href="/user/User" class="link-user">User</a>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="topic-full-tags">
|
|
||||||
Tags:
|
|
||||||
<a href="/~group?tag=one">one</a>,
|
|
||||||
<a href="/~group?tag=two">two</a>,
|
|
||||||
<a href="/~group?tag=three">three</a>
|
|
||||||
</div>
|
|
||||||
</header>
|
|
||||||
|
|
||||||
<div class="topic-full-text">
|
|
||||||
<p>Topic Text</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="toast toast-warning">
|
|
||||||
This topic is locked. New comments can not be posted.
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<section class="topic-comments">
|
|
||||||
<div class="divider"></div>
|
|
||||||
<header class="topic-comments-header">
|
|
||||||
<h2>15 comments</h2>
|
|
||||||
</header>
|
|
||||||
</section>
|
|
||||||
</article>
|
|
||||||
</main>
|
|
||||||
</body>
|
|
||||||
|
|
||||||
</html>
|
|
|
@ -1,14 +0,0 @@
|
||||||
---
|
|
||||||
source: tests/topic.rs
|
|
||||||
expression: topic
|
|
||||||
---
|
|
||||||
Topic {
|
|
||||||
author: Unknown,
|
|
||||||
comment_total: 2,
|
|
||||||
content: Unknown,
|
|
||||||
id: "666",
|
|
||||||
is_locked: false,
|
|
||||||
is_official: false,
|
|
||||||
tags: [],
|
|
||||||
vote_count: 0,
|
|
||||||
}
|
|
|
@ -1,22 +0,0 @@
|
||||||
---
|
|
||||||
source: tests/topic.rs
|
|
||||||
expression: topic
|
|
||||||
---
|
|
||||||
Topic {
|
|
||||||
author: Name(
|
|
||||||
"AnotherUser",
|
|
||||||
),
|
|
||||||
comment_total: 30,
|
|
||||||
content: Link(
|
|
||||||
"https://tildes.net/~test",
|
|
||||||
),
|
|
||||||
id: "456",
|
|
||||||
is_locked: false,
|
|
||||||
is_official: false,
|
|
||||||
tags: [
|
|
||||||
"four",
|
|
||||||
"five",
|
|
||||||
"six",
|
|
||||||
],
|
|
||||||
vote_count: 10,
|
|
||||||
}
|
|
|
@ -1,18 +0,0 @@
|
||||||
---
|
|
||||||
source: tests/topic.rs
|
|
||||||
expression: topic
|
|
||||||
---
|
|
||||||
Topic {
|
|
||||||
author: Scheduled,
|
|
||||||
comment_total: 100,
|
|
||||||
content: Text(
|
|
||||||
"<p>Scheduled Topic Text</p>",
|
|
||||||
),
|
|
||||||
id: "789",
|
|
||||||
is_locked: false,
|
|
||||||
is_official: false,
|
|
||||||
tags: [
|
|
||||||
"recurring.weekly",
|
|
||||||
],
|
|
||||||
vote_count: 50,
|
|
||||||
}
|
|
|
@ -1,22 +0,0 @@
|
||||||
---
|
|
||||||
source: tests/topic.rs
|
|
||||||
expression: topic
|
|
||||||
---
|
|
||||||
Topic {
|
|
||||||
author: Name(
|
|
||||||
"User",
|
|
||||||
),
|
|
||||||
comment_total: 15,
|
|
||||||
content: Text(
|
|
||||||
"<p>Topic Text</p>",
|
|
||||||
),
|
|
||||||
id: "123",
|
|
||||||
is_locked: true,
|
|
||||||
is_official: false,
|
|
||||||
tags: [
|
|
||||||
"one",
|
|
||||||
"two",
|
|
||||||
"three",
|
|
||||||
],
|
|
||||||
vote_count: 5,
|
|
||||||
}
|
|
|
@ -1,14 +0,0 @@
|
||||||
use std::fs::read_to_string;
|
|
||||||
|
|
||||||
use {insta::assert_debug_snapshot, tildes_parser::Topic};
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_topic_parsing() {
|
|
||||||
let samples = ["link", "text", "deleted", "scheduled"];
|
|
||||||
for sample in samples {
|
|
||||||
let html =
|
|
||||||
read_to_string(format!("tests/samples/topic_{sample}.html")).unwrap();
|
|
||||||
let topic = &html.parse::<Topic>().unwrap();
|
|
||||||
assert_debug_snapshot!(sample, topic);
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue