Add posted date parsing to Topic.

This commit is contained in:
Bauke 2023-06-23 11:00:02 +02:00
parent 6f0d4d394d
commit bf8df3bd39
Signed by: Bauke
GPG Key ID: C1C0F29952BCF558
8 changed files with 219 additions and 4 deletions

192
Cargo.lock generated
View File

@ -23,6 +23,21 @@ dependencies = [
"memchr",
]
[[package]]
name = "android-tzdata"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]]
name = "autocfg"
version = "1.1.0"
@ -35,18 +50,45 @@ version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bumpalo"
version = "3.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
[[package]]
name = "byteorder"
version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
[[package]]
name = "cc"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec837a71355b28f6556dbd569b37b3f363091c0bd4b2e735674521b4c5fd9bc5"
dependencies = [
"android-tzdata",
"iana-time-zone",
"js-sys",
"num-traits",
"time",
"wasm-bindgen",
"winapi",
]
[[package]]
name = "console"
version = "0.15.7"
@ -65,6 +107,12 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"
[[package]]
name = "core-foundation-sys"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"
[[package]]
name = "cssparser"
version = "0.29.6"
@ -212,6 +260,29 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "iana-time-zone"
version = "0.1.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"wasm-bindgen",
"windows",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
dependencies = [
"cc",
]
[[package]]
name = "insta"
version = "1.29.0"
@ -231,6 +302,15 @@ version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
[[package]]
name = "js-sys"
version = "0.3.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a"
dependencies = [
"wasm-bindgen",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
@ -309,6 +389,15 @@ version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
[[package]]
name = "num-traits"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.18.0"
@ -776,6 +865,7 @@ dependencies = [
name = "tildes-parser"
version = "0.1.0"
dependencies = [
"chrono",
"duplicate",
"insta",
"lazy_static",
@ -784,6 +874,17 @@ dependencies = [
"thiserror",
]
[[package]]
name = "time"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a"
dependencies = [
"libc",
"wasi 0.10.0+wasi-snapshot-preview1",
"winapi",
]
[[package]]
name = "unicode-ident"
version = "1.0.9"
@ -814,12 +915,103 @@ version = "0.9.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasm-bindgen"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.18",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.18",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f"
dependencies = [
"windows-targets 0.48.0",
]
[[package]]
name = "windows-sys"
version = "0.45.0"

View File

@ -11,6 +11,7 @@ edition = "2021"
path = "source/lib.rs"
[dependencies]
chrono = "0.4.26"
duplicate = "^1.0.0"
lazy_static = "^1.4.0"
regex = "^1.8.4"

View File

@ -32,6 +32,9 @@ lazy_static! {
/// Selector for the site header context.
pub static ref SITE_HEADER_CONTEXT: Selector = selector(".site-header-context");
/// Selector for `<time>` elements that have a `datetime` attribute.
pub static ref TIME_WITH_DATETIME: Selector = selector("time[datetime]");
/// Selector for the topic comment count.
pub static ref TOPIC_COMMENT_COUNT: Selector = selector(".topic-comments-header h2");

View File

@ -1,13 +1,16 @@
//! Parsing for `/~<group>/<topic-id>`.
use scraper::Html;
use {
chrono::{DateTime, FixedOffset},
scraper::Html,
};
use crate::{
regexes::DUPLICATE_WHITESPACE_RE,
selectors::{
SITE_HEADER_CONTEXT, TOPIC_COMMENT_COUNT, TOPIC_FULL_BYLINE,
TOPIC_FULL_LINK, TOPIC_FULL_TAGS, TOPIC_FULL_TEXT, TOPIC_MAIN_ARTICLE,
TOPIC_TOAST_WARNING, TOPIC_VOTE_COUNT,
SITE_HEADER_CONTEXT, TIME_WITH_DATETIME, TOPIC_COMMENT_COUNT,
TOPIC_FULL_BYLINE, TOPIC_FULL_LINK, TOPIC_FULL_TAGS, TOPIC_FULL_TEXT,
TOPIC_MAIN_ARTICLE, TOPIC_TOAST_WARNING, TOPIC_VOTE_COUNT,
},
utilities::select_first_element_text,
ParseError,
@ -42,6 +45,9 @@ pub struct Topic {
/// is in the topic listing. See #787 in the Tildes issue tracker.
pub is_official: bool,
/// The date the topic was posted.
pub posted_date: DateTime<FixedOffset>,
/// All tags applied to the topic.
pub tags: Vec<String>,
@ -161,6 +167,14 @@ impl Topic {
.map(|toast| toast.contains("This topic is locked."))
.unwrap_or_default();
let posted_date = topic_article_element
.select(&TOPIC_FULL_BYLINE)
.next()
.and_then(|byline| byline.select(&TIME_WITH_DATETIME).next())
.and_then(|time| time.value().attr("datetime"))
.and_then(|datetime| DateTime::parse_from_rfc3339(datetime).ok())
.ok_or(ParseError::MissingExpectedHtml)?;
let tags = topic_article_element
.select(&TOPIC_FULL_TAGS)
.map(|tag| tag.text().collect::<String>())
@ -180,6 +194,7 @@ impl Topic {
id,
is_locked,
is_official: false, // TODO: Implement this once it can be done.
posted_date,
tags,
vote_count,
};

View File

@ -10,6 +10,7 @@ Topic {
id: "666",
is_locked: false,
is_official: false,
posted_date: 2018-04-26T21:22:47+00:00,
tags: [],
vote_count: 0,
}

View File

@ -14,6 +14,7 @@ Topic {
id: "456",
is_locked: false,
is_official: false,
posted_date: 2021-12-16T17:26:03+00:00,
tags: [
"four",
"five",

View File

@ -12,6 +12,7 @@ Topic {
id: "789",
is_locked: false,
is_official: false,
posted_date: 2022-12-16T17:26:03+00:00,
tags: [
"recurring.weekly",
],

View File

@ -14,6 +14,7 @@ Topic {
id: "123",
is_locked: true,
is_official: false,
posted_date: 2022-12-16T17:26:03+00:00,
tags: [
"one",
"two",