use std::{error::Error, fs, thread, time::Duration}; use reqwest::blocking::Client; use scraper::{ElementRef, Html, Selector}; fn main() -> Result<(), Box> { let client = Client::builder() .user_agent("Tildes Wiki Sitemap") .build()?; // Get the HTML from the groups list. let response = client.get("https://tildes.net/groups").send()?; let body = response.text()?; // Parse the HTML. let html = Html::parse_document(&body); // Create a selector to grab all anchors that link to a group. let selector = Selector::parse(".group-list .link-group").unwrap(); // Get all the group link elements from the HTML. let group_links = html.select(&selector).collect::>(); // Create the sitemap with the info. let mut sitemap = "# Tildes Wiki Sitemap\n\n".to_string(); sitemap += "Automatically generated by \ [this program](https://git.holllo.cc/Bauke/tildes-wiki-sitemap). \ [message @Bauke](https://tildes.net/user/Bauke/new_message?subject=Tildes%20Wiki%20Sitemap\ &message=Update%20the%20sitemap%20you%20doofus!) if this page is outdated and \ you can't run the program yourself.\n\n\ This page is a temporary placeholder to help wiki contributors navigate. \ Find this page easily by bookmarking it!\n\n"; for group_link in group_links { // Get the group name without the tilde. let group_name = group_link.inner_html()[1..].to_string(); println!("┌ Processing ~{}!", group_name); // Get the HTML from the group page. let response = client .get(&format!("https://tildes.net/~{}", group_name)) .send()?; let body = response.text()?; // Parse the HTML. let html = Html::parse_document(&body); // Create a selector to grab all the anchors in the sidebar that lead to a wiki page. let selector = Selector::parse("#sidebar .nav a[href*=\"/wiki/\"]").unwrap(); // Get all the wiki URL elements from the HTML. let wiki_links = html.select(&selector).collect::>(); let wiki_links_amount = wiki_links.len(); // Create a selector to grab the group description. let selector = Selector::parse("#sidebar .group-short-description").unwrap(); // Get the group description from the HTML. let group_description = html.select(&selector).collect::>(); // Add the group as a new header. sitemap += format!("## ~{}\n\n", group_name).as_str(); // If a group description is found, add it to the Markdown. if !group_description.is_empty() { let description = group_description.first().unwrap().inner_html(); sitemap += format!("> {}\n\n", description).as_str(); } // If there's no wiki pages, add a little blurb with a link to create one. if wiki_links.is_empty() { sitemap += format!("There are no wiki pages for ~{} yet, \ [click here and be the first to create one](https://tildes.net/~{}/wiki/new_page), \ if you were granted the necessary permission to do so!\n", group_name, group_name).as_str(); } // Loop over the links and add them in a list. for wiki_link in wiki_links { let wiki_page_title = wiki_link.inner_html(); let wiki_page_link = wiki_link.value().attr("href").unwrap_or(""); sitemap += &format!("* [{}]({})\n", wiki_page_title, wiki_page_link); } sitemap += "\n"; println!("└ Processed {} wiki links.", wiki_links_amount); // Sleep 500ms between HTTP requests. thread::sleep(Duration::from_millis(500)); } sitemap = sitemap.trim_end().to_string() + "\n"; fs::write("./sitemap.md", sitemap)?; println!("✓ Done!"); Ok(()) }