Rewrite using askama and tildes-parser.
This commit is contained in:
parent
e0361b0631
commit
4808ed8244
|
@ -1,8 +1,9 @@
|
||||||
# Compiled files and executables.
|
# Generated by Cargo
|
||||||
/target/
|
debug/
|
||||||
|
target/
|
||||||
|
|
||||||
# Backup files generated by rustfmt.
|
# Code coverage results
|
||||||
**/*.rs.bk
|
coverage/
|
||||||
|
|
||||||
# The actual Sitemap, to be copied to https://tildes.net/~tildes/wiki/sitemap.
|
# Markdown output directory
|
||||||
sitemap.md
|
output/
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
17
Cargo.toml
17
Cargo.toml
|
@ -2,17 +2,24 @@
|
||||||
|
|
||||||
[package]
|
[package]
|
||||||
name = "tildes-wiki-sitemap"
|
name = "tildes-wiki-sitemap"
|
||||||
|
description = "Tildes Wiki sitemap generator."
|
||||||
|
repository = "https://git.bauke.xyz/Bauke/tildes-parser"
|
||||||
|
license = "AGPL-3.0-or-later"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
authors = ["Bauke <me@bauke.xyz>"]
|
authors = ["Bauke <me@bauke.xyz>"]
|
||||||
edition = "2018"
|
edition = "2021"
|
||||||
|
|
||||||
[[bin]]
|
[[bin]]
|
||||||
name = "tildes-wiki-sitemap"
|
name = "tildes-wiki-sitemap"
|
||||||
path = "source/main.rs"
|
path = "source/main.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
scraper = "0.12.0"
|
askama = "0.11.1"
|
||||||
|
color-eyre = "0.6.2"
|
||||||
|
indicatif = "0.17.1"
|
||||||
|
regex = "1.6.0"
|
||||||
|
ureq = "2.5.0"
|
||||||
|
|
||||||
[dependencies.reqwest]
|
[dependencies.tildes-parser]
|
||||||
version = "0.10.7"
|
git = "https://git.bauke.xyz/Bauke/tildes-parser.git"
|
||||||
features = ["blocking"]
|
rev = "08bf7ed"
|
||||||
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
[tasks.fmt]
|
||||||
|
command = "cargo"
|
||||||
|
args = ["fmt", "${@}"]
|
||||||
|
|
||||||
|
[tasks.check]
|
||||||
|
command = "cargo"
|
||||||
|
args = ["check", "${@}"]
|
||||||
|
|
||||||
|
[tasks.clippy]
|
||||||
|
command = "cargo"
|
||||||
|
args = ["clippy", "${@}"]
|
||||||
|
|
||||||
|
[tasks.test]
|
||||||
|
command = "cargo"
|
||||||
|
args = ["test", "${@}"]
|
||||||
|
|
||||||
|
[tasks.doc]
|
||||||
|
command = "cargo"
|
||||||
|
args = ["doc", "${@}"]
|
||||||
|
|
||||||
|
[tasks.build]
|
||||||
|
command = "cargo"
|
||||||
|
args = ["build", "${@}"]
|
||||||
|
|
||||||
|
[tasks.complete-check]
|
||||||
|
dependencies = ["fmt", "check", "clippy", "test", "doc", "build"]
|
||||||
|
|
||||||
|
[tasks.code-coverage]
|
||||||
|
workspace = false
|
||||||
|
install_crate = "cargo-tarpaulin"
|
||||||
|
command = "cargo"
|
||||||
|
args = [
|
||||||
|
"tarpaulin",
|
||||||
|
"--exclude-files=target/*",
|
||||||
|
"--out=html",
|
||||||
|
"--output-dir=coverage",
|
||||||
|
"--skip-clean",
|
||||||
|
"--target-dir=target/tarpaulin"
|
||||||
|
]
|
|
@ -0,0 +1,2 @@
|
||||||
|
[general]
|
||||||
|
dirs = ["source/templates"]
|
146
source/main.rs
146
source/main.rs
|
@ -1,97 +1,75 @@
|
||||||
use std::{error::Error, fs, thread, time::Duration};
|
//! # Tildes Wiki Sitemap
|
||||||
|
//!
|
||||||
|
//! > **Tildes Wiki sitemap generator.**
|
||||||
|
|
||||||
use reqwest::blocking::Client;
|
#![forbid(unsafe_code)]
|
||||||
use scraper::{ElementRef, Html, Selector};
|
#![warn(missing_docs, clippy::missing_docs_in_private_items)]
|
||||||
|
|
||||||
fn main() -> Result<(), Box<dyn Error>> {
|
use {
|
||||||
let client = Client::builder()
|
askama::Template,
|
||||||
|
color_eyre::Result,
|
||||||
|
indicatif::{ProgressIterator, ProgressStyle},
|
||||||
|
regex::Regex,
|
||||||
|
tildes_parser::{Group, GroupList, Html},
|
||||||
|
};
|
||||||
|
|
||||||
|
mod templates;
|
||||||
|
|
||||||
|
fn main() -> Result<()> {
|
||||||
|
color_eyre::install()?;
|
||||||
|
|
||||||
|
let http = ureq::AgentBuilder::new()
|
||||||
.user_agent("Tildes Wiki Sitemap")
|
.user_agent("Tildes Wiki Sitemap")
|
||||||
.build()?;
|
.build();
|
||||||
|
|
||||||
// Get the HTML from the groups list.
|
// Shorthand to download a URL and parse it to `Html`.
|
||||||
let response = client.get("https://tildes.net/groups").send()?;
|
let download_html = |url: &str| -> Result<Html> {
|
||||||
let body = response.text()?;
|
Ok(Html::parse_document(&http.get(url).call()?.into_string()?))
|
||||||
|
};
|
||||||
|
|
||||||
// Parse the HTML.
|
let group_list =
|
||||||
let html = Html::parse_document(&body);
|
GroupList::from_html(&download_html("https://tildes.net/groups")?)?;
|
||||||
|
|
||||||
// Create a selector to grab all anchors that link to a group.
|
let groups = group_list
|
||||||
let selector = Selector::parse(".group-list .link-group").unwrap();
|
.summaries
|
||||||
|
.into_iter()
|
||||||
|
.progress_with_style(ProgressStyle::with_template(
|
||||||
|
"{spinner} {pos}/{len} {bar}",
|
||||||
|
)?)
|
||||||
|
.map(|summary| {
|
||||||
|
// Sleep 500 milliseconds between HTTP requests.
|
||||||
|
std::thread::sleep(std::time::Duration::from_millis(500));
|
||||||
|
|
||||||
// Get all the group link elements from the HTML.
|
Group::from_html(&download_html(&format!(
|
||||||
let group_links = html.select(&selector).collect::<Vec<ElementRef>>();
|
"https://tildes.net/{}",
|
||||||
|
summary.name
|
||||||
|
))?)
|
||||||
|
})
|
||||||
|
.collect::<Result<Vec<_>>>()?;
|
||||||
|
|
||||||
// Create the sitemap with the info.
|
let wiki_link_count = groups
|
||||||
let mut sitemap = "# Tildes Wiki Sitemap\n\n".to_string();
|
.iter()
|
||||||
sitemap += "Automatically generated by \
|
.map(|group| group.wiki_links.len())
|
||||||
[this program](https://git.holllo.cc/Bauke/tildes-wiki-sitemap). \
|
.sum::<usize>();
|
||||||
[message @Bauke](https://tildes.net/user/Bauke/new_message?subject=Tildes%20Wiki%20Sitemap\
|
|
||||||
&message=Update%20the%20sitemap%20you%20doofus!) if this page is outdated and \
|
|
||||||
you can't run the program yourself.\n\n\
|
|
||||||
This page is a temporary placeholder to help wiki contributors navigate. \
|
|
||||||
Find this page easily by bookmarking it!\n\n";
|
|
||||||
|
|
||||||
for group_link in group_links {
|
println!(
|
||||||
// Get the group name without the tilde.
|
"Collected {} groups and {} wiki links.",
|
||||||
let group_name = group_link.inner_html()[1..].to_string();
|
groups.len(),
|
||||||
println!("┌ Processing ~{}!", group_name);
|
wiki_link_count,
|
||||||
|
);
|
||||||
|
|
||||||
// Get the HTML from the group page.
|
std::fs::create_dir_all("output")?;
|
||||||
let response = client
|
std::fs::write("output/sitemap.md", render_sitemap(groups)?)?;
|
||||||
.get(&format!("https://tildes.net/~{}", group_name))
|
|
||||||
.send()?;
|
|
||||||
let body = response.text()?;
|
|
||||||
|
|
||||||
// Parse the HTML.
|
|
||||||
let html = Html::parse_document(&body);
|
|
||||||
|
|
||||||
// Create a selector to grab all the anchors in the sidebar that lead to a wiki page.
|
|
||||||
let selector =
|
|
||||||
Selector::parse("#sidebar .nav a[href*=\"/wiki/\"]").unwrap();
|
|
||||||
|
|
||||||
// Get all the wiki URL elements from the HTML.
|
|
||||||
let wiki_links = html.select(&selector).collect::<Vec<ElementRef>>();
|
|
||||||
let wiki_links_amount = wiki_links.len();
|
|
||||||
|
|
||||||
// Create a selector to grab the group description.
|
|
||||||
let selector =
|
|
||||||
Selector::parse("#sidebar .group-short-description").unwrap();
|
|
||||||
|
|
||||||
// Get the group description from the HTML.
|
|
||||||
let group_description = html.select(&selector).collect::<Vec<ElementRef>>();
|
|
||||||
|
|
||||||
// Add the group as a new header.
|
|
||||||
sitemap += format!("## ~{}\n\n", group_name).as_str();
|
|
||||||
|
|
||||||
// If a group description is found, add it to the Markdown.
|
|
||||||
if !group_description.is_empty() {
|
|
||||||
let description = group_description.first().unwrap().inner_html();
|
|
||||||
sitemap += format!("> {}\n\n", description).as_str();
|
|
||||||
}
|
|
||||||
|
|
||||||
// If there's no wiki pages, add a little blurb with a link to create one.
|
|
||||||
if wiki_links.is_empty() {
|
|
||||||
sitemap += format!("There are no wiki pages for ~{} yet, \
|
|
||||||
[click here and be the first to create one](https://tildes.net/~{}/wiki/new_page), \
|
|
||||||
if you were granted the necessary permission to do so!\n", group_name, group_name).as_str();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Loop over the links and add them in a list.
|
|
||||||
for wiki_link in wiki_links {
|
|
||||||
let wiki_page_title = wiki_link.inner_html();
|
|
||||||
let wiki_page_link = wiki_link.value().attr("href").unwrap_or("");
|
|
||||||
sitemap += &format!("* [{}]({})\n", wiki_page_title, wiki_page_link);
|
|
||||||
}
|
|
||||||
|
|
||||||
sitemap += "\n";
|
|
||||||
println!("└ Processed {} wiki links.", wiki_links_amount);
|
|
||||||
|
|
||||||
// Sleep 500ms between HTTP requests.
|
|
||||||
thread::sleep(Duration::from_millis(500));
|
|
||||||
}
|
|
||||||
|
|
||||||
sitemap = sitemap.trim_end().to_string() + "\n";
|
|
||||||
fs::write("./sitemap.md", sitemap)?;
|
|
||||||
println!("✓ Done!");
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn render_sitemap(groups: Vec<Group>) -> Result<String> {
|
||||||
|
let duplicate_newline_re = Regex::new("\n\n\n+").unwrap();
|
||||||
|
|
||||||
|
Ok(
|
||||||
|
duplicate_newline_re
|
||||||
|
.replace_all(&templates::SitemapTemplate { groups }.render()?, "\n\n")
|
||||||
|
.to_string(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,77 @@
|
||||||
|
//! Askama templates.
|
||||||
|
|
||||||
|
use {askama::Template, tildes_parser::Group};
|
||||||
|
|
||||||
|
/// The template for `sitemap.md`.
|
||||||
|
#[derive(Debug, Template)]
|
||||||
|
#[template(path = "sitemap.md")]
|
||||||
|
pub struct SitemapTemplate {
|
||||||
|
/// All groups to render in the sitemap.
|
||||||
|
pub groups: Vec<Group>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_sitemap_template() -> color_eyre::Result<()> {
|
||||||
|
let groups = vec![
|
||||||
|
Group {
|
||||||
|
description: Some("Example group description.".to_string()),
|
||||||
|
name: "~example".to_string(),
|
||||||
|
sub_groups: vec![],
|
||||||
|
subscribers: 12345,
|
||||||
|
wiki_links: vec![
|
||||||
|
tildes_parser::GroupWikiLink {
|
||||||
|
name: "Example Page".to_string(),
|
||||||
|
url: "https://example.org/~example/wiki/example_page".to_string(),
|
||||||
|
},
|
||||||
|
tildes_parser::GroupWikiLink {
|
||||||
|
name: "Example Page".to_string(),
|
||||||
|
url: "https://example.org/~example/wiki/example_page".to_string(),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
Group {
|
||||||
|
description: Some("Example group description.".to_string()),
|
||||||
|
name: "~example".to_string(),
|
||||||
|
sub_groups: vec![],
|
||||||
|
subscribers: 12345,
|
||||||
|
wiki_links: vec![
|
||||||
|
tildes_parser::GroupWikiLink {
|
||||||
|
name: "Example Page".to_string(),
|
||||||
|
url: "https://example.org/~example/wiki/example_page".to_string(),
|
||||||
|
},
|
||||||
|
tildes_parser::GroupWikiLink {
|
||||||
|
name: "Example Page".to_string(),
|
||||||
|
url: "https://example.org/~example/wiki/example_page".to_string(),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
Group {
|
||||||
|
description: None,
|
||||||
|
name: "~example".to_string(),
|
||||||
|
sub_groups: vec![],
|
||||||
|
subscribers: 12345,
|
||||||
|
wiki_links: vec![],
|
||||||
|
},
|
||||||
|
Group {
|
||||||
|
description: None,
|
||||||
|
name: "~example".to_string(),
|
||||||
|
sub_groups: vec![],
|
||||||
|
subscribers: 12345,
|
||||||
|
wiki_links: vec![
|
||||||
|
tildes_parser::GroupWikiLink {
|
||||||
|
name: "Example Page".to_string(),
|
||||||
|
url: "https://example.org/~example/wiki/example_page".to_string(),
|
||||||
|
},
|
||||||
|
tildes_parser::GroupWikiLink {
|
||||||
|
name: "Example Page".to_string(),
|
||||||
|
url: "https://example.org/~example/wiki/example_page".to_string(),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
std::fs::create_dir_all("output")?;
|
||||||
|
std::fs::write("output/sitemap-test.md", crate::render_sitemap(groups)?)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
|
@ -0,0 +1,27 @@
|
||||||
|
# Tildes Wiki Sitemap
|
||||||
|
|
||||||
|
This page is a temporary placeholder to help wiki contributors navigate. Find this page easily by bookmarking it!
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>Generating this page</summary>
|
||||||
|
|
||||||
|
The sitemap can be automatically generated by [tildes-wiki-sitemap](https://git.bauke.xyz/Bauke/tildes-wiki-sitemap).
|
||||||
|
|
||||||
|
If this page is outdated and you can't update it yourself, feel free to [message @Bauke](https://tildes.net/user/Bauke/new_message?subject=Tildes%20Wiki%20Sitemap&message=Update%20the%20sitemap%20you%20doofus!).
|
||||||
|
</details>
|
||||||
|
|
||||||
|
{% for group in groups %}
|
||||||
|
## {{ group.name }}
|
||||||
|
|
||||||
|
{% if let Some(description) = group.description -%}
|
||||||
|
> **{{ description }}**
|
||||||
|
{%- endif %}
|
||||||
|
|
||||||
|
{% if group.wiki_links.is_empty() -%}
|
||||||
|
There are no wiki pages for {{ group.name }} yet, [click here](https://tildes.net/{{ group.name }}/wiki/new_page) if you want to create one.
|
||||||
|
{%- endif %}
|
||||||
|
|
||||||
|
{% for link in group.wiki_links -%}
|
||||||
|
* [{{ link.name }}]({{ link.url }})
|
||||||
|
{% endfor -%}
|
||||||
|
{% endfor %}
|
Loading…
Reference in New Issue