1
Fork 0
tildes-wiki-sitemap/main.go

145 lines
5.2 KiB
Go

package main
import (
"fmt"
"io/ioutil"
"os"
"sort"
"strings"
"time"
"github.com/gocolly/colly"
log "github.com/sirupsen/logrus"
)
type wikiPage struct {
name string
url string
}
func main() {
log.Infof("Tildes Wiki Sitemap\n")
// Create a variable we'll use to check if a current sitemap already exists
_, sitemapExists := os.Stat("sitemap.md")
if sitemapExists == nil {
// If it does exist, rename it to "previous"
os.Rename("sitemap.md", "previous-sitemap.md")
}
// Create a groups array, which will be used to sort the output
// (couldn't figure out how to sort a map by keys)
groups := make([]string, 0)
// Create a map for the pages where the key will be the group name
pages := make(map[string][]wikiPage)
// Create a new collector that's only allowed to visit Tildes.net
collector := colly.NewCollector(colly.AllowedDomains("tildes.net"))
// When receiving HTML:
collector.OnHTML("html", func(page *colly.HTMLElement) {
time.Sleep(time.Second)
// Define the URL for brevity
url := page.Request.URL.String()
if strings.HasSuffix(url, "/groups") {
// If the URL ends with /groups we want to visit each group found in the table
page.ForEach("td>.link-group", func(_ int, element *colly.HTMLElement) {
log.Printf("Visiting group %s", element.Text)
groups = append(groups, element.Text)
// Make the pages for all groups start out as an empty array
// This makes it so groups without any wiki pages also get added to the output, instead of being skipped
pages[element.Text] = make([]wikiPage, 0)
collector.Visit(fmt.Sprintf("https://tildes.net/%s", element.Text))
})
} else if strings.Contains(url, "~") {
// Else if the URL has a tilde in it, we want to extract the wiki pages
group := url[strings.LastIndex(url, "/")+1:]
page.ForEach(".nav>.nav-item>a", func(_ int, element *colly.HTMLElement) {
// Append the new page to the array and set its name and URL
pages[group] = append(pages[group], wikiPage{
name: element.Text,
url: element.Attr("href"),
})
log.Printf("Found wiki page: %s/%s", group, element.Text)
})
}
})
// After defining the OnHTML callback, visit the group listing
// This won't finish until all groups have been visited inside and the entire callback is done
collector.Visit("https://tildes.net/groups")
// Create the sitemap.md file
file, _ := os.Create("sitemap.md")
defer file.Close()
// Write the boilerplate stuff first
file.WriteString("# Tildes Wiki Sitemap\n\n")
file.WriteString("Automatically generated by [this program](https://gitlab.com/bauke/tildes-wiki-sitemap). [PM @Bauke](https://tildes.net/user/Bauke/new_message) if this page is outdated and you can't run the program yourself.\n\n")
file.WriteString("This page is a temporary placeholder to help wiki contributors navigate. Find this page easily by bookmarking it!\n\n")
// Sort the groups
sort.Strings(groups)
// Define a variable that will keep track of the pages count
pagesTotal := 0
// Write a . to indicate the start of the tree
file.WriteString(".\n")
// Iterate over the group names
for index, group := range groups {
prefix := ""
if index == len(groups)-1 {
// If we're at the last group, write the "L" piece instead
file.WriteString(fmt.Sprintf("└── [%s](https://tildes.net/%s/wiki)\n", group[1:], group))
// And because there doesn't have to be an extra vertical piece here, we need to preserve the leading space
// So we set the prefix to 2 non-breaking spaces, Markdown won't remove these
prefix = "  "
} else {
// Else we're not at the last group yet, so we wanna use the rotated "T" piece
file.WriteString(fmt.Sprintf("├── [%s](https://tildes.net/%s/wiki)\n", group[1:], group))
prefix = "│ "
}
for pageIndex, page := range pages[group] {
// Increment the total pages
pagesTotal++
// Write the prefix determined earlier
file.WriteString(prefix)
if pageIndex == len(pages[group])-1 {
// Same reasoning as with the groups, if we're on the last page use the "L" piece
file.WriteString("└")
} else {
// Otherwise the rotated "T"
file.WriteString("├")
}
// And then write the Markdown link
file.WriteString(fmt.Sprintf("── [%s](%s)\n", page.name, page.url))
}
}
// And finally write how many groups and pages there are, like `tree` writes directories and files
file.WriteString(fmt.Sprintf("\n%v groups, %v pages\n", len(groups), pagesTotal))
// If the sitemap exists we want to read both sitemaps and check if they're the same
if sitemapExists == nil {
file, err := os.Open("sitemap.md")
if err != nil {
log.Fatal(err)
}
current, _ := ioutil.ReadAll(file)
file.Close()
file, err = os.Open("previous-sitemap.md")
if err != nil {
log.Fatal(err)
}
previous, _ := ioutil.ReadAll(file)
file.Close()
// If they're the same just log that all is good, if not warn that we need to update
if string(current) == string(previous) {
log.Infof("Current and previous sitemaps are the same, no need to update.\n")
} else {
log.Warnf("Current and previous sitemaps are not the same, you should update it.\n")
}
}
log.Printf("Done! Found %v groups and %v pages, see sitemap.md for the output", len(groups), pagesTotal)
}