use std::error::Error; use std::fs; use std::thread; use std::time::Duration; use reqwest; use scraper::{ElementRef, Html, Selector}; fn main() -> Result<(), Box> { let client = reqwest::blocking::Client::new(); // Get the HTML from the groups list. let response = client .get("https://tildes.net/groups") .header(reqwest::header::USER_AGENT, "Tildes Wiki Sitemap") .send()?; let body = response.text()?; // Parse the HTML. let html = Html::parse_document(body.as_str()); // Create a selector to grab all anchors that link to a group. let selector = Selector::parse(".group-list .link-group").unwrap(); // Get all the group link elements from the HTML. let group_links = html.select(&selector).collect::>(); // Create the sitemap with the info. let mut sitemap = String::from("# Tildes Wiki Sitemap\n\n"); sitemap += "Automatically generated by \ [this program](https://gitlab.com/bauke/tildes-wiki-sitemap). \ [PM @Bauke](https://tildes.net/user/Bauke/new_message?subject=Tildes%20Wiki%20Sitemap\ &message=Update%20the%20sitemap%20you%20doofus!) if this page is outdated and \ you can't run the program yourself.\n\n\ This page is a temporary placeholder to help wiki contributors navigate. \ Find this page easily by bookmarking it!\n\n"; for group_link in group_links { // Get the group name without the tilde. let group_name = String::from(&group_link.inner_html()[1..]); println!("┌ Processing ~{}!", group_name); // Get the HTML from the group page. let response = client .get(format!("https://tildes.net/~{}", group_name).as_str()) .header(reqwest::header::USER_AGENT, "Tildes Wiki Sitemap") .send()?; let body = response.text()?; // Parse the HTML. let html = Html::parse_document(body.as_str()); // Create a selector to grab all the anchors in the sidebar that lead to a wiki page. let selector = Selector::parse("#sidebar .nav a[href*=\"/wiki/\"]").unwrap(); // Get all the wiki URL elements from the HTML. let wiki_links = html.select(&selector).collect::>(); let wiki_links_amount = wiki_links.len(); // Create a selector to grab the group description. let selector = Selector::parse("#sidebar .group-short-description").unwrap(); // Get all the wiki URL elements from the HTML. let group_description = html.select(&selector).collect::>(); // Add the group as a new header. sitemap += format!("## ~{}\n\n", group_name).as_str(); // If a group description is found, add it to the Markdown. if !group_description.is_empty() { let description = group_description.first().unwrap().inner_html(); sitemap += format!("> {}\n\n", description).as_str(); } // If there's no wiki pages, add a little blurb with a link to create one. if wiki_links.is_empty() { sitemap += format!("There are no wiki pages for ~{} yet, \ [click here and be the first to create one](https://tildes.net/~{}/wiki/new_page), \ if you have the necessary permission!\n", group_name, group_name).as_str(); } // Loop over the links and add them in a list. for wiki_link in wiki_links { let wiki_page_title = wiki_link.inner_html(); let wiki_page_link = wiki_link.value().attr("href").unwrap_or(""); sitemap += format!("* [{}]({})\n", wiki_page_title, wiki_page_link).as_str(); } sitemap += "\n"; println!("└ Processed {} wiki links.", wiki_links_amount); thread::sleep(Duration::from_millis(500)); } sitemap = sitemap.trim_end().to_string() + "\n"; fs::write("./sitemap.md", sitemap)?; println!("✓ Done!"); Ok(()) }