102 lines
3.7 KiB
Rust
102 lines
3.7 KiB
Rust
use std::error::Error;
|
|
use std::fs;
|
|
use std::thread;
|
|
use std::time::Duration;
|
|
|
|
use reqwest;
|
|
use scraper::{ElementRef, Html, Selector};
|
|
|
|
fn main() -> Result<(), Box<dyn Error>> {
|
|
let client = reqwest::blocking::Client::new();
|
|
|
|
// Get the HTML from the groups list.
|
|
let response = client
|
|
.get("https://tildes.net/groups")
|
|
.header(reqwest::header::USER_AGENT, "Tildes Wiki Sitemap")
|
|
.send()?;
|
|
let body = response.text()?;
|
|
|
|
// Parse the HTML.
|
|
let html = Html::parse_document(body.as_str());
|
|
|
|
// Create a selector to grab all anchors that link to a group.
|
|
let selector = Selector::parse(".group-list .link-group").unwrap();
|
|
|
|
// Get all the group link elements from the HTML.
|
|
let group_links = html.select(&selector).collect::<Vec<ElementRef>>();
|
|
|
|
// Create the sitemap with the info.
|
|
let mut sitemap = String::from("# Tildes Wiki Sitemap\n\n");
|
|
sitemap += "Automatically generated by \
|
|
[this program](https://gitlab.com/bauke/tildes-wiki-sitemap). \
|
|
[PM @Bauke](https://tildes.net/user/Bauke/new_message?subject=Tildes%20Wiki%20Sitemap\
|
|
&message=Update%20the%20sitemap%20you%20doofus!) if this page is outdated and \
|
|
you can't run the program yourself.\n\n\
|
|
This page is a temporary placeholder to help wiki contributors navigate. \
|
|
Find this page easily by bookmarking it!\n\n";
|
|
|
|
for group_link in group_links {
|
|
// Get the group name without the tilde.
|
|
let group_name = String::from(&group_link.inner_html()[1..]);
|
|
println!("┌ Processing ~{}!", group_name);
|
|
|
|
// Get the HTML from the group page.
|
|
let response = client
|
|
.get(format!("https://tildes.net/~{}", group_name).as_str())
|
|
.header(reqwest::header::USER_AGENT, "Tildes Wiki Sitemap")
|
|
.send()?;
|
|
let body = response.text()?;
|
|
|
|
// Parse the HTML.
|
|
let html = Html::parse_document(body.as_str());
|
|
|
|
// Create a selector to grab all the anchors in the sidebar that lead to a wiki page.
|
|
let selector =
|
|
Selector::parse("#sidebar .nav a[href*=\"/wiki/\"]").unwrap();
|
|
|
|
// Get all the wiki URL elements from the HTML.
|
|
let wiki_links = html.select(&selector).collect::<Vec<ElementRef>>();
|
|
let wiki_links_amount = wiki_links.len();
|
|
|
|
// Create a selector to grab the group description.
|
|
let selector =
|
|
Selector::parse("#sidebar .group-short-description").unwrap();
|
|
|
|
// Get all the wiki URL elements from the HTML.
|
|
let group_description = html.select(&selector).collect::<Vec<ElementRef>>();
|
|
|
|
// Add the group as a new header.
|
|
sitemap += format!("## ~{}\n\n", group_name).as_str();
|
|
|
|
// If a group description is found, add it to the Markdown.
|
|
if !group_description.is_empty() {
|
|
let description = group_description.first().unwrap().inner_html();
|
|
sitemap += format!("> {}\n\n", description).as_str();
|
|
}
|
|
|
|
// If there's no wiki pages, add a little blurb with a link to create one.
|
|
if wiki_links.is_empty() {
|
|
sitemap += format!("There are no wiki pages for ~{} yet, \
|
|
[click here and be the first to create one](https://tildes.net/~{}/wiki/new_page), \
|
|
if you have the necessary permission!\n", group_name, group_name).as_str();
|
|
}
|
|
|
|
// Loop over the links and add them in a list.
|
|
for wiki_link in wiki_links {
|
|
let wiki_page_title = wiki_link.inner_html();
|
|
let wiki_page_link = wiki_link.value().attr("href").unwrap_or("");
|
|
sitemap +=
|
|
format!("* [{}]({})\n", wiki_page_title, wiki_page_link).as_str();
|
|
}
|
|
|
|
sitemap += "\n";
|
|
println!("└ Processed {} wiki links.", wiki_links_amount);
|
|
thread::sleep(Duration::from_millis(500));
|
|
}
|
|
|
|
sitemap = sitemap.trim_end().to_string() + "\n";
|
|
fs::write("./sitemap.md", sitemap)?;
|
|
println!("✓ Done!");
|
|
Ok(())
|
|
}
|