package main import ( "fmt" "io/ioutil" "os" "sort" "strings" "time" "github.com/gocolly/colly" log "github.com/sirupsen/logrus" ) type wikiPage struct { name string url string } func main() { log.Infof("Tildes Wiki Sitemap\n") // Create a variable we'll use to check if a current sitemap already exists _, sitemapExists := os.Stat("sitemap.md") if sitemapExists == nil { // If it does exist, rename it to "previous" os.Rename("sitemap.md", "previous-sitemap.md") } // Create a groups array, which will be used to sort the output // (couldn't figure out how to sort a map by keys) groups := make([]string, 0) // Create a map for the pages where the key will be the group name pages := make(map[string][]wikiPage) // Create a new collector that's only allowed to visit Tildes.net collector := colly.NewCollector(colly.AllowedDomains("tildes.net")) // When receiving HTML: collector.OnHTML("html", func(page *colly.HTMLElement) { time.Sleep(time.Second) // Define the URL for brevity url := page.Request.URL.String() if strings.HasSuffix(url, "/groups") { // If the URL ends with /groups we want to visit each group found in the table page.ForEach("td>.link-group", func(_ int, element *colly.HTMLElement) { log.Printf("Visiting group %s", element.Text) groups = append(groups, element.Text) // Make the pages for all groups start out as an empty array // This makes it so groups without any wiki pages also get added to the output, instead of being skipped pages[element.Text] = make([]wikiPage, 0) collector.Visit(fmt.Sprintf("https://tildes.net/%s", element.Text)) }) } else if strings.Contains(url, "~") { // Else if the URL has a tilde in it, we want to extract the wiki pages group := url[strings.LastIndex(url, "/")+1:] page.ForEach(".nav>.nav-item>a", func(_ int, element *colly.HTMLElement) { // Append the new page to the array and set its name and URL pages[group] = append(pages[group], wikiPage{ name: element.Text, url: element.Attr("href"), }) log.Printf("Found wiki page: %s/%s", group, element.Text) }) } }) // After defining the OnHTML callback, visit the group listing // This won't finish until all groups have been visited inside and the entire callback is done collector.Visit("https://tildes.net/groups") // Create the sitemap.md file file, _ := os.Create("sitemap.md") defer file.Close() // Write the boilerplate stuff first file.WriteString("# Tildes Wiki Sitemap\n\n") file.WriteString("Automatically generated by [this program](https://gitlab.com/bauke/tildes-wiki-sitemap). [PM @Bauke](https://tildes.net/user/Bauke/new_message) if this page is outdated and you can't run the program yourself.\n\n") file.WriteString("This page is a temporary placeholder to help wiki contributors navigate. Find this page easily by bookmarking it!\n\n") // Sort the groups sort.Strings(groups) // Define a variable that will keep track of the pages count pagesTotal := 0 // Write a . to indicate the start of the tree file.WriteString(".\n") // Iterate over the group names for index, group := range groups { prefix := "" if index == len(groups)-1 { // If we're at the last group, write the "L" piece instead file.WriteString(fmt.Sprintf("└── [%s](https://tildes.net/%s/wiki)\n", group[1:], group)) // And because there doesn't have to be an extra vertical piece here, we need to preserve the leading space // So we set the prefix to 2 non-breaking spaces, Markdown won't remove these prefix = "  " } else { // Else we're not at the last group yet, so we wanna use the rotated "T" piece file.WriteString(fmt.Sprintf("├── [%s](https://tildes.net/%s/wiki)\n", group[1:], group)) prefix = "│ " } for pageIndex, page := range pages[group] { // Increment the total pages pagesTotal++ // Write the prefix determined earlier file.WriteString(prefix) if pageIndex == len(pages[group])-1 { // Same reasoning as with the groups, if we're on the last page use the "L" piece file.WriteString("└") } else { // Otherwise the rotated "T" file.WriteString("├") } // And then write the Markdown link file.WriteString(fmt.Sprintf("── [%s](%s)\n", page.name, page.url)) } } // And finally write how many groups and pages there are, like `tree` writes directories and files file.WriteString(fmt.Sprintf("\n%v groups, %v pages\n", len(groups), pagesTotal)) // If the sitemap exists we want to read both sitemaps and check if they're the same if sitemapExists == nil { file, err := os.Open("sitemap.md") if err != nil { log.Fatal(err) } current, _ := ioutil.ReadAll(file) file.Close() file, err = os.Open("previous-sitemap.md") if err != nil { log.Fatal(err) } previous, _ := ioutil.ReadAll(file) file.Close() // If they're the same just log that all is good, if not warn that we need to update if string(current) == string(previous) { log.Infof("Current and previous sitemaps are the same, no need to update.\n") } else { log.Warnf("Current and previous sitemaps are not the same, you should update it.\n") } } log.Printf("Done! Found %v groups and %v pages, see sitemap.md for the output", len(groups), pagesTotal) }