76 lines
1.7 KiB
Rust
76 lines
1.7 KiB
Rust
//! # Tildes Wiki Sitemap
|
|
//!
|
|
//! > **Tildes Wiki sitemap generator.**
|
|
|
|
#![forbid(unsafe_code)]
|
|
#![warn(missing_docs, clippy::missing_docs_in_private_items)]
|
|
|
|
use {
|
|
askama::Template,
|
|
color_eyre::Result,
|
|
indicatif::{ProgressIterator, ProgressStyle},
|
|
regex::Regex,
|
|
tildes_parser::{Group, GroupList, Html},
|
|
};
|
|
|
|
mod templates;
|
|
|
|
fn main() -> Result<()> {
|
|
color_eyre::install()?;
|
|
|
|
let http = ureq::AgentBuilder::new()
|
|
.user_agent("Tildes Wiki Sitemap")
|
|
.build();
|
|
|
|
// Shorthand to download a URL and parse it to `Html`.
|
|
let download_html = |url: &str| -> Result<Html> {
|
|
Ok(Html::parse_document(&http.get(url).call()?.into_string()?))
|
|
};
|
|
|
|
let group_list =
|
|
GroupList::from_html(&download_html("https://tildes.net/groups")?)?;
|
|
|
|
let groups = group_list
|
|
.summaries
|
|
.into_iter()
|
|
.progress_with_style(ProgressStyle::with_template(
|
|
"{spinner} {pos}/{len} {bar}",
|
|
)?)
|
|
.map(|summary| {
|
|
// Sleep 500 milliseconds between HTTP requests.
|
|
std::thread::sleep(std::time::Duration::from_millis(500));
|
|
|
|
Group::from_html(&download_html(&format!(
|
|
"https://tildes.net/{}",
|
|
summary.name
|
|
))?)
|
|
})
|
|
.collect::<Result<Vec<_>>>()?;
|
|
|
|
let wiki_link_count = groups
|
|
.iter()
|
|
.map(|group| group.wiki_links.len())
|
|
.sum::<usize>();
|
|
|
|
println!(
|
|
"Collected {} groups and {} wiki links.",
|
|
groups.len(),
|
|
wiki_link_count,
|
|
);
|
|
|
|
std::fs::create_dir_all("output")?;
|
|
std::fs::write("output/sitemap.md", render_sitemap(groups)?)?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn render_sitemap(groups: Vec<Group>) -> Result<String> {
|
|
let duplicate_newline_re = Regex::new("\n\n\n+").unwrap();
|
|
|
|
Ok(
|
|
duplicate_newline_re
|
|
.replace_all(&templates::SitemapTemplate { groups }.render()?, "\n\n")
|
|
.to_string(),
|
|
)
|
|
}
|