109 lines
4.2 KiB
Go
109 lines
4.2 KiB
Go
|
package main
|
||
|
|
||
|
import (
|
||
|
"fmt"
|
||
|
"os"
|
||
|
"sort"
|
||
|
"strings"
|
||
|
|
||
|
"github.com/gocolly/colly"
|
||
|
log "github.com/sirupsen/logrus"
|
||
|
)
|
||
|
|
||
|
type wikiPage struct {
|
||
|
name string
|
||
|
url string
|
||
|
}
|
||
|
|
||
|
func main() {
|
||
|
log.Infof("Tildes Wiki Sitemap\n")
|
||
|
// Create a groups array, which will be used to sort the output
|
||
|
// (couldn't figure out how to sort a map by keys)
|
||
|
groups := make([]string, 0)
|
||
|
// Create a map for the pages where the key will be the group name
|
||
|
pages := make(map[string][]wikiPage)
|
||
|
|
||
|
// Create a new collector that's only allowed to visit Tildes.net
|
||
|
collector := colly.NewCollector(colly.AllowedDomains("tildes.net"))
|
||
|
|
||
|
// When receiving HTML:
|
||
|
collector.OnHTML("html", func(page *colly.HTMLElement) {
|
||
|
// Define the URL for brevity
|
||
|
url := page.Request.URL.String()
|
||
|
if strings.HasSuffix(url, "/groups") {
|
||
|
// If the URL ends with /groups we want to visit each group found in the table
|
||
|
page.ForEach("td>.link-group", func(_ int, element *colly.HTMLElement) {
|
||
|
log.Printf("Visiting group %s", element.Text)
|
||
|
groups = append(groups, element.Text)
|
||
|
// Make the pages for all groups start out as an empty array
|
||
|
// This makes it so groups without any wiki pages also get added to the output, instead of being skipped
|
||
|
pages[element.Text] = make([]wikiPage, 0)
|
||
|
collector.Visit(fmt.Sprintf("https://tildes.net/%s", element.Text))
|
||
|
})
|
||
|
} else if strings.Contains(url, "~") {
|
||
|
// Else if the URL has a tilde in it, we want to extract the wiki pages
|
||
|
group := url[strings.LastIndex(url, "/")+1:]
|
||
|
page.ForEach(".nav>.nav-item>a", func(_ int, element *colly.HTMLElement) {
|
||
|
// Append the new page to the array and set its name and URL
|
||
|
pages[group] = append(pages[group], wikiPage{
|
||
|
name: element.Text,
|
||
|
url: element.Attr("href"),
|
||
|
})
|
||
|
log.Printf("Found wiki page: %s/%s", group, element.Text)
|
||
|
})
|
||
|
}
|
||
|
})
|
||
|
|
||
|
// After defining the OnHTML callback, visit the group listing
|
||
|
// This won't finish until all groups have been visited inside and the entire callback is done
|
||
|
collector.Visit("https://tildes.net/groups")
|
||
|
|
||
|
// Create the sitemap.md file
|
||
|
file, _ := os.Create("sitemap.md")
|
||
|
defer file.Close()
|
||
|
// Write the boilerplate stuff first
|
||
|
file.WriteString("# Tildes Wiki Sitemap\n\n")
|
||
|
file.WriteString("Automatically generated by [this program](https://gitlab.com/bauke/tildes-wiki-sitemap). [PM @Bauke](https://tildes.net/user/Bauke/new_message) if this page is outdated and you can't run the program yourself.\n\n")
|
||
|
file.WriteString("This page is a temporary placeholder to help wiki contributors navigate. Find this page easily by bookmarking it!\n\n")
|
||
|
// Sort the groups
|
||
|
sort.Strings(groups)
|
||
|
// Define a variable that will keep track of the pages count
|
||
|
pagesTotal := 0
|
||
|
// Write a . to indicate the start of the tree
|
||
|
file.WriteString(".\n")
|
||
|
// Iterate over the group names
|
||
|
for index, group := range groups {
|
||
|
prefix := ""
|
||
|
if index == len(groups)-1 {
|
||
|
// If we're at the last group, write the "L" piece instead
|
||
|
file.WriteString(fmt.Sprintf("└── [%s](https://tildes.net/%s/wiki)\n", group[1:], group))
|
||
|
// And because there doesn't have to be an extra vertical piece here, we need to preserve the leading space
|
||
|
// So we set the prefix to 2 non-breaking spaces, Markdown won't remove these
|
||
|
prefix = " "
|
||
|
} else {
|
||
|
// Else we're not at the last group yet, so we wanna use the rotated "T" piece
|
||
|
file.WriteString(fmt.Sprintf("├── [%s](https://tildes.net/%s/wiki)\n", group[1:], group))
|
||
|
prefix = "│ "
|
||
|
}
|
||
|
for pageIndex, page := range pages[group] {
|
||
|
// Increment the total pages
|
||
|
pagesTotal++
|
||
|
// Write the prefix determined earlier
|
||
|
file.WriteString(prefix)
|
||
|
if pageIndex == len(pages[group])-1 {
|
||
|
// Same reasoning as with the groups, if we're on the last page use the "L" piece
|
||
|
file.WriteString("└")
|
||
|
} else {
|
||
|
// Otherwise the rotated "T"
|
||
|
file.WriteString("├")
|
||
|
}
|
||
|
// And then write the Markdown link
|
||
|
file.WriteString(fmt.Sprintf("── [%s](%s)\n", page.name, page.url))
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// And finally write how many groups and pages there are, like `tree` writes directories and files
|
||
|
file.WriteString(fmt.Sprintf("\n%v groups, %v pages\n", len(groups), pagesTotal))
|
||
|
log.Printf("Done! Found %v groups and %v pages, see sitemap.md for the output", len(groups), pagesTotal)
|
||
|
}
|