1
Fork 0
tildes-wiki-sitemap/source/main.rs

98 lines
3.6 KiB
Rust

use std::{error::Error, fs, thread, time::Duration};
use reqwest::blocking::Client;
use scraper::{ElementRef, Html, Selector};
fn main() -> Result<(), Box<dyn Error>> {
let client = Client::builder()
.user_agent("Tildes Wiki Sitemap")
.build()?;
// Get the HTML from the groups list.
let response = client.get("https://tildes.net/groups").send()?;
let body = response.text()?;
// Parse the HTML.
let html = Html::parse_document(&body);
// Create a selector to grab all anchors that link to a group.
let selector = Selector::parse(".group-list .link-group").unwrap();
// Get all the group link elements from the HTML.
let group_links = html.select(&selector).collect::<Vec<ElementRef>>();
// Create the sitemap with the info.
let mut sitemap = "# Tildes Wiki Sitemap\n\n".to_string();
sitemap += "Automatically generated by \
[this program](https://git.holllo.cc/Bauke/tildes-wiki-sitemap). \
[message @Bauke](https://tildes.net/user/Bauke/new_message?subject=Tildes%20Wiki%20Sitemap\
&message=Update%20the%20sitemap%20you%20doofus!) if this page is outdated and \
you can't run the program yourself.\n\n\
This page is a temporary placeholder to help wiki contributors navigate. \
Find this page easily by bookmarking it!\n\n";
for group_link in group_links {
// Get the group name without the tilde.
let group_name = group_link.inner_html()[1..].to_string();
println!("┌ Processing ~{}!", group_name);
// Get the HTML from the group page.
let response = client
.get(&format!("https://tildes.net/~{}", group_name))
.send()?;
let body = response.text()?;
// Parse the HTML.
let html = Html::parse_document(&body);
// Create a selector to grab all the anchors in the sidebar that lead to a wiki page.
let selector =
Selector::parse("#sidebar .nav a[href*=\"/wiki/\"]").unwrap();
// Get all the wiki URL elements from the HTML.
let wiki_links = html.select(&selector).collect::<Vec<ElementRef>>();
let wiki_links_amount = wiki_links.len();
// Create a selector to grab the group description.
let selector =
Selector::parse("#sidebar .group-short-description").unwrap();
// Get the group description from the HTML.
let group_description = html.select(&selector).collect::<Vec<ElementRef>>();
// Add the group as a new header.
sitemap += format!("## ~{}\n\n", group_name).as_str();
// If a group description is found, add it to the Markdown.
if !group_description.is_empty() {
let description = group_description.first().unwrap().inner_html();
sitemap += format!("> {}\n\n", description).as_str();
}
// If there's no wiki pages, add a little blurb with a link to create one.
if wiki_links.is_empty() {
sitemap += format!("There are no wiki pages for ~{} yet, \
[click here and be the first to create one](https://tildes.net/~{}/wiki/new_page), \
if you were granted the necessary permission to do so!\n", group_name, group_name).as_str();
}
// Loop over the links and add them in a list.
for wiki_link in wiki_links {
let wiki_page_title = wiki_link.inner_html();
let wiki_page_link = wiki_link.value().attr("href").unwrap_or("");
sitemap += &format!("* [{}]({})\n", wiki_page_title, wiki_page_link);
}
sitemap += "\n";
println!("└ Processed {} wiki links.", wiki_links_amount);
// Sleep 500ms between HTTP requests.
thread::sleep(Duration::from_millis(500));
}
sitemap = sitemap.trim_end().to_string() + "\n";
fs::write("./sitemap.md", sitemap)?;
println!("✓ Done!");
Ok(())
}