1
Fork 0

Rewrite using askama and tildes-parser.

This commit is contained in:
Bauke 2022-10-03 23:38:25 +02:00
parent e0361b0631
commit 4808ed8244
Signed by: Bauke
GPG Key ID: C1C0F29952BCF558
8 changed files with 958 additions and 771 deletions

13
.gitignore vendored
View File

@ -1,8 +1,9 @@
# Compiled files and executables.
/target/
# Generated by Cargo
debug/
target/
# Backup files generated by rustfmt.
**/*.rs.bk
# Code coverage results
coverage/
# The actual Sitemap, to be copied to https://tildes.net/~tildes/wiki/sitemap.
sitemap.md
# Markdown output directory
output/

1408
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -2,17 +2,24 @@
[package]
name = "tildes-wiki-sitemap"
description = "Tildes Wiki sitemap generator."
repository = "https://git.bauke.xyz/Bauke/tildes-parser"
license = "AGPL-3.0-or-later"
version = "0.1.0"
authors = ["Bauke <me@bauke.xyz>"]
edition = "2018"
edition = "2021"
[[bin]]
name = "tildes-wiki-sitemap"
path = "source/main.rs"
[dependencies]
scraper = "0.12.0"
askama = "0.11.1"
color-eyre = "0.6.2"
indicatif = "0.17.1"
regex = "1.6.0"
ureq = "2.5.0"
[dependencies.reqwest]
version = "0.10.7"
features = ["blocking"]
[dependencies.tildes-parser]
git = "https://git.bauke.xyz/Bauke/tildes-parser.git"
rev = "08bf7ed"

39
Makefile.toml Normal file
View File

@ -0,0 +1,39 @@
[tasks.fmt]
command = "cargo"
args = ["fmt", "${@}"]
[tasks.check]
command = "cargo"
args = ["check", "${@}"]
[tasks.clippy]
command = "cargo"
args = ["clippy", "${@}"]
[tasks.test]
command = "cargo"
args = ["test", "${@}"]
[tasks.doc]
command = "cargo"
args = ["doc", "${@}"]
[tasks.build]
command = "cargo"
args = ["build", "${@}"]
[tasks.complete-check]
dependencies = ["fmt", "check", "clippy", "test", "doc", "build"]
[tasks.code-coverage]
workspace = false
install_crate = "cargo-tarpaulin"
command = "cargo"
args = [
"tarpaulin",
"--exclude-files=target/*",
"--out=html",
"--output-dir=coverage",
"--skip-clean",
"--target-dir=target/tarpaulin"
]

2
askama.toml Normal file
View File

@ -0,0 +1,2 @@
[general]
dirs = ["source/templates"]

View File

@ -1,97 +1,75 @@
use std::{error::Error, fs, thread, time::Duration};
//! # Tildes Wiki Sitemap
//!
//! > **Tildes Wiki sitemap generator.**
use reqwest::blocking::Client;
use scraper::{ElementRef, Html, Selector};
#![forbid(unsafe_code)]
#![warn(missing_docs, clippy::missing_docs_in_private_items)]
fn main() -> Result<(), Box<dyn Error>> {
let client = Client::builder()
use {
askama::Template,
color_eyre::Result,
indicatif::{ProgressIterator, ProgressStyle},
regex::Regex,
tildes_parser::{Group, GroupList, Html},
};
mod templates;
fn main() -> Result<()> {
color_eyre::install()?;
let http = ureq::AgentBuilder::new()
.user_agent("Tildes Wiki Sitemap")
.build()?;
.build();
// Get the HTML from the groups list.
let response = client.get("https://tildes.net/groups").send()?;
let body = response.text()?;
// Shorthand to download a URL and parse it to `Html`.
let download_html = |url: &str| -> Result<Html> {
Ok(Html::parse_document(&http.get(url).call()?.into_string()?))
};
// Parse the HTML.
let html = Html::parse_document(&body);
let group_list =
GroupList::from_html(&download_html("https://tildes.net/groups")?)?;
// Create a selector to grab all anchors that link to a group.
let selector = Selector::parse(".group-list .link-group").unwrap();
let groups = group_list
.summaries
.into_iter()
.progress_with_style(ProgressStyle::with_template(
"{spinner} {pos}/{len} {bar}",
)?)
.map(|summary| {
// Sleep 500 milliseconds between HTTP requests.
std::thread::sleep(std::time::Duration::from_millis(500));
// Get all the group link elements from the HTML.
let group_links = html.select(&selector).collect::<Vec<ElementRef>>();
Group::from_html(&download_html(&format!(
"https://tildes.net/{}",
summary.name
))?)
})
.collect::<Result<Vec<_>>>()?;
// Create the sitemap with the info.
let mut sitemap = "# Tildes Wiki Sitemap\n\n".to_string();
sitemap += "Automatically generated by \
[this program](https://git.holllo.cc/Bauke/tildes-wiki-sitemap). \
[message @Bauke](https://tildes.net/user/Bauke/new_message?subject=Tildes%20Wiki%20Sitemap\
&message=Update%20the%20sitemap%20you%20doofus!) if this page is outdated and \
you can't run the program yourself.\n\n\
This page is a temporary placeholder to help wiki contributors navigate. \
Find this page easily by bookmarking it!\n\n";
let wiki_link_count = groups
.iter()
.map(|group| group.wiki_links.len())
.sum::<usize>();
for group_link in group_links {
// Get the group name without the tilde.
let group_name = group_link.inner_html()[1..].to_string();
println!("┌ Processing ~{}!", group_name);
println!(
"Collected {} groups and {} wiki links.",
groups.len(),
wiki_link_count,
);
// Get the HTML from the group page.
let response = client
.get(&format!("https://tildes.net/~{}", group_name))
.send()?;
let body = response.text()?;
std::fs::create_dir_all("output")?;
std::fs::write("output/sitemap.md", render_sitemap(groups)?)?;
// Parse the HTML.
let html = Html::parse_document(&body);
// Create a selector to grab all the anchors in the sidebar that lead to a wiki page.
let selector =
Selector::parse("#sidebar .nav a[href*=\"/wiki/\"]").unwrap();
// Get all the wiki URL elements from the HTML.
let wiki_links = html.select(&selector).collect::<Vec<ElementRef>>();
let wiki_links_amount = wiki_links.len();
// Create a selector to grab the group description.
let selector =
Selector::parse("#sidebar .group-short-description").unwrap();
// Get the group description from the HTML.
let group_description = html.select(&selector).collect::<Vec<ElementRef>>();
// Add the group as a new header.
sitemap += format!("## ~{}\n\n", group_name).as_str();
// If a group description is found, add it to the Markdown.
if !group_description.is_empty() {
let description = group_description.first().unwrap().inner_html();
sitemap += format!("> {}\n\n", description).as_str();
}
// If there's no wiki pages, add a little blurb with a link to create one.
if wiki_links.is_empty() {
sitemap += format!("There are no wiki pages for ~{} yet, \
[click here and be the first to create one](https://tildes.net/~{}/wiki/new_page), \
if you were granted the necessary permission to do so!\n", group_name, group_name).as_str();
}
// Loop over the links and add them in a list.
for wiki_link in wiki_links {
let wiki_page_title = wiki_link.inner_html();
let wiki_page_link = wiki_link.value().attr("href").unwrap_or("");
sitemap += &format!("* [{}]({})\n", wiki_page_title, wiki_page_link);
}
sitemap += "\n";
println!("└ Processed {} wiki links.", wiki_links_amount);
// Sleep 500ms between HTTP requests.
thread::sleep(Duration::from_millis(500));
}
sitemap = sitemap.trim_end().to_string() + "\n";
fs::write("./sitemap.md", sitemap)?;
println!("✓ Done!");
Ok(())
}
fn render_sitemap(groups: Vec<Group>) -> Result<String> {
let duplicate_newline_re = Regex::new("\n\n\n+").unwrap();
Ok(
duplicate_newline_re
.replace_all(&templates::SitemapTemplate { groups }.render()?, "\n\n")
.to_string(),
)
}

77
source/templates/mod.rs Normal file
View File

@ -0,0 +1,77 @@
//! Askama templates.
use {askama::Template, tildes_parser::Group};
/// The template for `sitemap.md`.
#[derive(Debug, Template)]
#[template(path = "sitemap.md")]
pub struct SitemapTemplate {
/// All groups to render in the sitemap.
pub groups: Vec<Group>,
}
#[test]
fn test_sitemap_template() -> color_eyre::Result<()> {
let groups = vec![
Group {
description: Some("Example group description.".to_string()),
name: "~example".to_string(),
sub_groups: vec![],
subscribers: 12345,
wiki_links: vec![
tildes_parser::GroupWikiLink {
name: "Example Page".to_string(),
url: "https://example.org/~example/wiki/example_page".to_string(),
},
tildes_parser::GroupWikiLink {
name: "Example Page".to_string(),
url: "https://example.org/~example/wiki/example_page".to_string(),
},
],
},
Group {
description: Some("Example group description.".to_string()),
name: "~example".to_string(),
sub_groups: vec![],
subscribers: 12345,
wiki_links: vec![
tildes_parser::GroupWikiLink {
name: "Example Page".to_string(),
url: "https://example.org/~example/wiki/example_page".to_string(),
},
tildes_parser::GroupWikiLink {
name: "Example Page".to_string(),
url: "https://example.org/~example/wiki/example_page".to_string(),
},
],
},
Group {
description: None,
name: "~example".to_string(),
sub_groups: vec![],
subscribers: 12345,
wiki_links: vec![],
},
Group {
description: None,
name: "~example".to_string(),
sub_groups: vec![],
subscribers: 12345,
wiki_links: vec![
tildes_parser::GroupWikiLink {
name: "Example Page".to_string(),
url: "https://example.org/~example/wiki/example_page".to_string(),
},
tildes_parser::GroupWikiLink {
name: "Example Page".to_string(),
url: "https://example.org/~example/wiki/example_page".to_string(),
},
],
},
];
std::fs::create_dir_all("output")?;
std::fs::write("output/sitemap-test.md", crate::render_sitemap(groups)?)?;
Ok(())
}

View File

@ -0,0 +1,27 @@
# Tildes Wiki Sitemap
This page is a temporary placeholder to help wiki contributors navigate. Find this page easily by bookmarking it!
<details>
<summary>Generating this page</summary>
The sitemap can be automatically generated by [tildes-wiki-sitemap](https://git.bauke.xyz/Bauke/tildes-wiki-sitemap).
If this page is outdated and you can't update it yourself, feel free to [message @Bauke](https://tildes.net/user/Bauke/new_message?subject=Tildes%20Wiki%20Sitemap&message=Update%20the%20sitemap%20you%20doofus!).
</details>
{% for group in groups %}
## {{ group.name }}
{% if let Some(description) = group.description -%}
> **{{ description }}**
{%- endif %}
{% if group.wiki_links.is_empty() -%}
There are no wiki pages for {{ group.name }} yet, [click here](https://tildes.net/{{ group.name }}/wiki/new_page) if you want to create one.
{%- endif %}
{% for link in group.wiki_links -%}
* [{{ link.name }}]({{ link.url }})
{% endfor -%}
{% endfor %}