Rewrite using askama and tildes-parser.
This commit is contained in:
parent
e0361b0631
commit
4808ed8244
|
@ -1,8 +1,9 @@
|
|||
# Compiled files and executables.
|
||||
/target/
|
||||
# Generated by Cargo
|
||||
debug/
|
||||
target/
|
||||
|
||||
# Backup files generated by rustfmt.
|
||||
**/*.rs.bk
|
||||
# Code coverage results
|
||||
coverage/
|
||||
|
||||
# The actual Sitemap, to be copied to https://tildes.net/~tildes/wiki/sitemap.
|
||||
sitemap.md
|
||||
# Markdown output directory
|
||||
output/
|
||||
|
|
File diff suppressed because it is too large
Load Diff
17
Cargo.toml
17
Cargo.toml
|
@ -2,17 +2,24 @@
|
|||
|
||||
[package]
|
||||
name = "tildes-wiki-sitemap"
|
||||
description = "Tildes Wiki sitemap generator."
|
||||
repository = "https://git.bauke.xyz/Bauke/tildes-parser"
|
||||
license = "AGPL-3.0-or-later"
|
||||
version = "0.1.0"
|
||||
authors = ["Bauke <me@bauke.xyz>"]
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
|
||||
[[bin]]
|
||||
name = "tildes-wiki-sitemap"
|
||||
path = "source/main.rs"
|
||||
|
||||
[dependencies]
|
||||
scraper = "0.12.0"
|
||||
askama = "0.11.1"
|
||||
color-eyre = "0.6.2"
|
||||
indicatif = "0.17.1"
|
||||
regex = "1.6.0"
|
||||
ureq = "2.5.0"
|
||||
|
||||
[dependencies.reqwest]
|
||||
version = "0.10.7"
|
||||
features = ["blocking"]
|
||||
[dependencies.tildes-parser]
|
||||
git = "https://git.bauke.xyz/Bauke/tildes-parser.git"
|
||||
rev = "08bf7ed"
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
[tasks.fmt]
|
||||
command = "cargo"
|
||||
args = ["fmt", "${@}"]
|
||||
|
||||
[tasks.check]
|
||||
command = "cargo"
|
||||
args = ["check", "${@}"]
|
||||
|
||||
[tasks.clippy]
|
||||
command = "cargo"
|
||||
args = ["clippy", "${@}"]
|
||||
|
||||
[tasks.test]
|
||||
command = "cargo"
|
||||
args = ["test", "${@}"]
|
||||
|
||||
[tasks.doc]
|
||||
command = "cargo"
|
||||
args = ["doc", "${@}"]
|
||||
|
||||
[tasks.build]
|
||||
command = "cargo"
|
||||
args = ["build", "${@}"]
|
||||
|
||||
[tasks.complete-check]
|
||||
dependencies = ["fmt", "check", "clippy", "test", "doc", "build"]
|
||||
|
||||
[tasks.code-coverage]
|
||||
workspace = false
|
||||
install_crate = "cargo-tarpaulin"
|
||||
command = "cargo"
|
||||
args = [
|
||||
"tarpaulin",
|
||||
"--exclude-files=target/*",
|
||||
"--out=html",
|
||||
"--output-dir=coverage",
|
||||
"--skip-clean",
|
||||
"--target-dir=target/tarpaulin"
|
||||
]
|
|
@ -0,0 +1,2 @@
|
|||
[general]
|
||||
dirs = ["source/templates"]
|
146
source/main.rs
146
source/main.rs
|
@ -1,97 +1,75 @@
|
|||
use std::{error::Error, fs, thread, time::Duration};
|
||||
//! # Tildes Wiki Sitemap
|
||||
//!
|
||||
//! > **Tildes Wiki sitemap generator.**
|
||||
|
||||
use reqwest::blocking::Client;
|
||||
use scraper::{ElementRef, Html, Selector};
|
||||
#![forbid(unsafe_code)]
|
||||
#![warn(missing_docs, clippy::missing_docs_in_private_items)]
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
let client = Client::builder()
|
||||
use {
|
||||
askama::Template,
|
||||
color_eyre::Result,
|
||||
indicatif::{ProgressIterator, ProgressStyle},
|
||||
regex::Regex,
|
||||
tildes_parser::{Group, GroupList, Html},
|
||||
};
|
||||
|
||||
mod templates;
|
||||
|
||||
fn main() -> Result<()> {
|
||||
color_eyre::install()?;
|
||||
|
||||
let http = ureq::AgentBuilder::new()
|
||||
.user_agent("Tildes Wiki Sitemap")
|
||||
.build()?;
|
||||
.build();
|
||||
|
||||
// Get the HTML from the groups list.
|
||||
let response = client.get("https://tildes.net/groups").send()?;
|
||||
let body = response.text()?;
|
||||
// Shorthand to download a URL and parse it to `Html`.
|
||||
let download_html = |url: &str| -> Result<Html> {
|
||||
Ok(Html::parse_document(&http.get(url).call()?.into_string()?))
|
||||
};
|
||||
|
||||
// Parse the HTML.
|
||||
let html = Html::parse_document(&body);
|
||||
let group_list =
|
||||
GroupList::from_html(&download_html("https://tildes.net/groups")?)?;
|
||||
|
||||
// Create a selector to grab all anchors that link to a group.
|
||||
let selector = Selector::parse(".group-list .link-group").unwrap();
|
||||
let groups = group_list
|
||||
.summaries
|
||||
.into_iter()
|
||||
.progress_with_style(ProgressStyle::with_template(
|
||||
"{spinner} {pos}/{len} {bar}",
|
||||
)?)
|
||||
.map(|summary| {
|
||||
// Sleep 500 milliseconds between HTTP requests.
|
||||
std::thread::sleep(std::time::Duration::from_millis(500));
|
||||
|
||||
// Get all the group link elements from the HTML.
|
||||
let group_links = html.select(&selector).collect::<Vec<ElementRef>>();
|
||||
Group::from_html(&download_html(&format!(
|
||||
"https://tildes.net/{}",
|
||||
summary.name
|
||||
))?)
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
// Create the sitemap with the info.
|
||||
let mut sitemap = "# Tildes Wiki Sitemap\n\n".to_string();
|
||||
sitemap += "Automatically generated by \
|
||||
[this program](https://git.holllo.cc/Bauke/tildes-wiki-sitemap). \
|
||||
[message @Bauke](https://tildes.net/user/Bauke/new_message?subject=Tildes%20Wiki%20Sitemap\
|
||||
&message=Update%20the%20sitemap%20you%20doofus!) if this page is outdated and \
|
||||
you can't run the program yourself.\n\n\
|
||||
This page is a temporary placeholder to help wiki contributors navigate. \
|
||||
Find this page easily by bookmarking it!\n\n";
|
||||
let wiki_link_count = groups
|
||||
.iter()
|
||||
.map(|group| group.wiki_links.len())
|
||||
.sum::<usize>();
|
||||
|
||||
for group_link in group_links {
|
||||
// Get the group name without the tilde.
|
||||
let group_name = group_link.inner_html()[1..].to_string();
|
||||
println!("┌ Processing ~{}!", group_name);
|
||||
println!(
|
||||
"Collected {} groups and {} wiki links.",
|
||||
groups.len(),
|
||||
wiki_link_count,
|
||||
);
|
||||
|
||||
// Get the HTML from the group page.
|
||||
let response = client
|
||||
.get(&format!("https://tildes.net/~{}", group_name))
|
||||
.send()?;
|
||||
let body = response.text()?;
|
||||
std::fs::create_dir_all("output")?;
|
||||
std::fs::write("output/sitemap.md", render_sitemap(groups)?)?;
|
||||
|
||||
// Parse the HTML.
|
||||
let html = Html::parse_document(&body);
|
||||
|
||||
// Create a selector to grab all the anchors in the sidebar that lead to a wiki page.
|
||||
let selector =
|
||||
Selector::parse("#sidebar .nav a[href*=\"/wiki/\"]").unwrap();
|
||||
|
||||
// Get all the wiki URL elements from the HTML.
|
||||
let wiki_links = html.select(&selector).collect::<Vec<ElementRef>>();
|
||||
let wiki_links_amount = wiki_links.len();
|
||||
|
||||
// Create a selector to grab the group description.
|
||||
let selector =
|
||||
Selector::parse("#sidebar .group-short-description").unwrap();
|
||||
|
||||
// Get the group description from the HTML.
|
||||
let group_description = html.select(&selector).collect::<Vec<ElementRef>>();
|
||||
|
||||
// Add the group as a new header.
|
||||
sitemap += format!("## ~{}\n\n", group_name).as_str();
|
||||
|
||||
// If a group description is found, add it to the Markdown.
|
||||
if !group_description.is_empty() {
|
||||
let description = group_description.first().unwrap().inner_html();
|
||||
sitemap += format!("> {}\n\n", description).as_str();
|
||||
}
|
||||
|
||||
// If there's no wiki pages, add a little blurb with a link to create one.
|
||||
if wiki_links.is_empty() {
|
||||
sitemap += format!("There are no wiki pages for ~{} yet, \
|
||||
[click here and be the first to create one](https://tildes.net/~{}/wiki/new_page), \
|
||||
if you were granted the necessary permission to do so!\n", group_name, group_name).as_str();
|
||||
}
|
||||
|
||||
// Loop over the links and add them in a list.
|
||||
for wiki_link in wiki_links {
|
||||
let wiki_page_title = wiki_link.inner_html();
|
||||
let wiki_page_link = wiki_link.value().attr("href").unwrap_or("");
|
||||
sitemap += &format!("* [{}]({})\n", wiki_page_title, wiki_page_link);
|
||||
}
|
||||
|
||||
sitemap += "\n";
|
||||
println!("└ Processed {} wiki links.", wiki_links_amount);
|
||||
|
||||
// Sleep 500ms between HTTP requests.
|
||||
thread::sleep(Duration::from_millis(500));
|
||||
}
|
||||
|
||||
sitemap = sitemap.trim_end().to_string() + "\n";
|
||||
fs::write("./sitemap.md", sitemap)?;
|
||||
println!("✓ Done!");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn render_sitemap(groups: Vec<Group>) -> Result<String> {
|
||||
let duplicate_newline_re = Regex::new("\n\n\n+").unwrap();
|
||||
|
||||
Ok(
|
||||
duplicate_newline_re
|
||||
.replace_all(&templates::SitemapTemplate { groups }.render()?, "\n\n")
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
//! Askama templates.
|
||||
|
||||
use {askama::Template, tildes_parser::Group};
|
||||
|
||||
/// The template for `sitemap.md`.
|
||||
#[derive(Debug, Template)]
|
||||
#[template(path = "sitemap.md")]
|
||||
pub struct SitemapTemplate {
|
||||
/// All groups to render in the sitemap.
|
||||
pub groups: Vec<Group>,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sitemap_template() -> color_eyre::Result<()> {
|
||||
let groups = vec![
|
||||
Group {
|
||||
description: Some("Example group description.".to_string()),
|
||||
name: "~example".to_string(),
|
||||
sub_groups: vec![],
|
||||
subscribers: 12345,
|
||||
wiki_links: vec![
|
||||
tildes_parser::GroupWikiLink {
|
||||
name: "Example Page".to_string(),
|
||||
url: "https://example.org/~example/wiki/example_page".to_string(),
|
||||
},
|
||||
tildes_parser::GroupWikiLink {
|
||||
name: "Example Page".to_string(),
|
||||
url: "https://example.org/~example/wiki/example_page".to_string(),
|
||||
},
|
||||
],
|
||||
},
|
||||
Group {
|
||||
description: Some("Example group description.".to_string()),
|
||||
name: "~example".to_string(),
|
||||
sub_groups: vec![],
|
||||
subscribers: 12345,
|
||||
wiki_links: vec![
|
||||
tildes_parser::GroupWikiLink {
|
||||
name: "Example Page".to_string(),
|
||||
url: "https://example.org/~example/wiki/example_page".to_string(),
|
||||
},
|
||||
tildes_parser::GroupWikiLink {
|
||||
name: "Example Page".to_string(),
|
||||
url: "https://example.org/~example/wiki/example_page".to_string(),
|
||||
},
|
||||
],
|
||||
},
|
||||
Group {
|
||||
description: None,
|
||||
name: "~example".to_string(),
|
||||
sub_groups: vec![],
|
||||
subscribers: 12345,
|
||||
wiki_links: vec![],
|
||||
},
|
||||
Group {
|
||||
description: None,
|
||||
name: "~example".to_string(),
|
||||
sub_groups: vec![],
|
||||
subscribers: 12345,
|
||||
wiki_links: vec![
|
||||
tildes_parser::GroupWikiLink {
|
||||
name: "Example Page".to_string(),
|
||||
url: "https://example.org/~example/wiki/example_page".to_string(),
|
||||
},
|
||||
tildes_parser::GroupWikiLink {
|
||||
name: "Example Page".to_string(),
|
||||
url: "https://example.org/~example/wiki/example_page".to_string(),
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
std::fs::create_dir_all("output")?;
|
||||
std::fs::write("output/sitemap-test.md", crate::render_sitemap(groups)?)?;
|
||||
|
||||
Ok(())
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
# Tildes Wiki Sitemap
|
||||
|
||||
This page is a temporary placeholder to help wiki contributors navigate. Find this page easily by bookmarking it!
|
||||
|
||||
<details>
|
||||
<summary>Generating this page</summary>
|
||||
|
||||
The sitemap can be automatically generated by [tildes-wiki-sitemap](https://git.bauke.xyz/Bauke/tildes-wiki-sitemap).
|
||||
|
||||
If this page is outdated and you can't update it yourself, feel free to [message @Bauke](https://tildes.net/user/Bauke/new_message?subject=Tildes%20Wiki%20Sitemap&message=Update%20the%20sitemap%20you%20doofus!).
|
||||
</details>
|
||||
|
||||
{% for group in groups %}
|
||||
## {{ group.name }}
|
||||
|
||||
{% if let Some(description) = group.description -%}
|
||||
> **{{ description }}**
|
||||
{%- endif %}
|
||||
|
||||
{% if group.wiki_links.is_empty() -%}
|
||||
There are no wiki pages for {{ group.name }} yet, [click here](https://tildes.net/{{ group.name }}/wiki/new_page) if you want to create one.
|
||||
{%- endif %}
|
||||
|
||||
{% for link in group.wiki_links -%}
|
||||
* [{{ link.name }}]({{ link.url }})
|
||||
{% endfor -%}
|
||||
{% endfor %}
|
Loading…
Reference in New Issue