2019-07-02 13:59:41 +00:00
package main
import (
"fmt"
2019-10-10 20:41:09 +00:00
"io/ioutil"
2019-07-02 13:59:41 +00:00
"os"
"sort"
"strings"
2019-08-14 10:41:06 +00:00
"time"
2019-07-02 13:59:41 +00:00
"github.com/gocolly/colly"
log "github.com/sirupsen/logrus"
)
type wikiPage struct {
name string
url string
}
func main ( ) {
log . Infof ( "Tildes Wiki Sitemap\n" )
2019-10-10 20:41:09 +00:00
// Create a variable we'll use to check if a current sitemap already exists
_ , sitemapExists := os . Stat ( "sitemap.md" )
if sitemapExists == nil {
// If it does exist, rename it to "previous"
os . Rename ( "sitemap.md" , "previous-sitemap.md" )
}
2019-07-02 13:59:41 +00:00
// Create a groups array, which will be used to sort the output
// (couldn't figure out how to sort a map by keys)
groups := make ( [ ] string , 0 )
// Create a map for the pages where the key will be the group name
pages := make ( map [ string ] [ ] wikiPage )
// Create a new collector that's only allowed to visit Tildes.net
collector := colly . NewCollector ( colly . AllowedDomains ( "tildes.net" ) )
// When receiving HTML:
collector . OnHTML ( "html" , func ( page * colly . HTMLElement ) {
2019-08-14 10:41:06 +00:00
time . Sleep ( time . Second )
2019-07-02 13:59:41 +00:00
// Define the URL for brevity
url := page . Request . URL . String ( )
if strings . HasSuffix ( url , "/groups" ) {
// If the URL ends with /groups we want to visit each group found in the table
page . ForEach ( "td>.link-group" , func ( _ int , element * colly . HTMLElement ) {
log . Printf ( "Visiting group %s" , element . Text )
groups = append ( groups , element . Text )
// Make the pages for all groups start out as an empty array
// This makes it so groups without any wiki pages also get added to the output, instead of being skipped
pages [ element . Text ] = make ( [ ] wikiPage , 0 )
collector . Visit ( fmt . Sprintf ( "https://tildes.net/%s" , element . Text ) )
} )
} else if strings . Contains ( url , "~" ) {
// Else if the URL has a tilde in it, we want to extract the wiki pages
group := url [ strings . LastIndex ( url , "/" ) + 1 : ]
page . ForEach ( ".nav>.nav-item>a" , func ( _ int , element * colly . HTMLElement ) {
// Append the new page to the array and set its name and URL
pages [ group ] = append ( pages [ group ] , wikiPage {
name : element . Text ,
url : element . Attr ( "href" ) ,
} )
log . Printf ( "Found wiki page: %s/%s" , group , element . Text )
} )
}
} )
// After defining the OnHTML callback, visit the group listing
// This won't finish until all groups have been visited inside and the entire callback is done
collector . Visit ( "https://tildes.net/groups" )
// Create the sitemap.md file
file , _ := os . Create ( "sitemap.md" )
defer file . Close ( )
// Write the boilerplate stuff first
file . WriteString ( "# Tildes Wiki Sitemap\n\n" )
file . WriteString ( "Automatically generated by [this program](https://gitlab.com/bauke/tildes-wiki-sitemap). [PM @Bauke](https://tildes.net/user/Bauke/new_message) if this page is outdated and you can't run the program yourself.\n\n" )
file . WriteString ( "This page is a temporary placeholder to help wiki contributors navigate. Find this page easily by bookmarking it!\n\n" )
// Sort the groups
sort . Strings ( groups )
// Define a variable that will keep track of the pages count
pagesTotal := 0
// Write a . to indicate the start of the tree
file . WriteString ( ".\n" )
// Iterate over the group names
for index , group := range groups {
prefix := ""
if index == len ( groups ) - 1 {
// If we're at the last group, write the "L" piece instead
file . WriteString ( fmt . Sprintf ( "└── [%s](https://tildes.net/%s/wiki)\n" , group [ 1 : ] , group ) )
// And because there doesn't have to be an extra vertical piece here, we need to preserve the leading space
// So we set the prefix to 2 non-breaking spaces, Markdown won't remove these
prefix = " "
} else {
// Else we're not at the last group yet, so we wanna use the rotated "T" piece
file . WriteString ( fmt . Sprintf ( "├── [%s](https://tildes.net/%s/wiki)\n" , group [ 1 : ] , group ) )
prefix = "│ "
}
for pageIndex , page := range pages [ group ] {
// Increment the total pages
pagesTotal ++
// Write the prefix determined earlier
file . WriteString ( prefix )
if pageIndex == len ( pages [ group ] ) - 1 {
// Same reasoning as with the groups, if we're on the last page use the "L" piece
file . WriteString ( "└" )
} else {
// Otherwise the rotated "T"
file . WriteString ( "├" )
}
// And then write the Markdown link
file . WriteString ( fmt . Sprintf ( "── [%s](%s)\n" , page . name , page . url ) )
}
}
// And finally write how many groups and pages there are, like `tree` writes directories and files
file . WriteString ( fmt . Sprintf ( "\n%v groups, %v pages\n" , len ( groups ) , pagesTotal ) )
2019-10-10 20:41:09 +00:00
// If the sitemap exists we want to read both sitemaps and check if they're the same
if sitemapExists == nil {
file , err := os . Open ( "sitemap.md" )
if err != nil {
log . Fatal ( err )
}
current , _ := ioutil . ReadAll ( file )
file . Close ( )
file , err = os . Open ( "previous-sitemap.md" )
if err != nil {
log . Fatal ( err )
}
previous , _ := ioutil . ReadAll ( file )
file . Close ( )
// If they're the same just log that all is good, if not warn that we need to update
if string ( current ) == string ( previous ) {
log . Infof ( "Current and previous sitemaps are the same, no need to update.\n" )
} else {
log . Warnf ( "Current and previous sitemaps are not the same, you should update it.\n" )
}
}
2019-07-02 13:59:41 +00:00
log . Printf ( "Done! Found %v groups and %v pages, see sitemap.md for the output" , len ( groups ) , pagesTotal )
}