Rewrite the program in Rust.
This commit is contained in:
parent
18a96e9d54
commit
ecb5dea427
|
@ -1,23 +1,7 @@
|
|||
# Binaries for programs and plugins
|
||||
*.exe
|
||||
*.exe~
|
||||
*.dll
|
||||
*.so
|
||||
*.dylib
|
||||
# Compiled files and executables.
|
||||
/target/
|
||||
|
||||
# Test binary, built with `go test -c`
|
||||
*.test
|
||||
# Backup files generated by rustfmt.
|
||||
**/*.rs.bk
|
||||
|
||||
# Output of the go coverage tool, specifically when used with LiteIDE
|
||||
*.out
|
||||
|
||||
# Dependency directories (remove the comment below to include it)
|
||||
# vendor/
|
||||
|
||||
### Go Patch ###
|
||||
/vendor/
|
||||
/Godeps/
|
||||
|
||||
# Sitemap files
|
||||
previous-sitemap.md
|
||||
sitemap.md
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,15 @@
|
|||
# https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[package]
|
||||
name = "tildes-wiki-sitemap"
|
||||
version = "0.1.0"
|
||||
authors = ["Bauke <me@bauke.xyz>"]
|
||||
edition = "2018"
|
||||
|
||||
[[bin]]
|
||||
name = "tildes-wiki-sitemap"
|
||||
path = "source/main.rs"
|
||||
|
||||
[dependencies]
|
||||
reqwest = { version = "0.10.4", features = ["blocking"] }
|
||||
scraper = "0.11.0"
|
14
ReadMe.md
14
ReadMe.md
|
@ -1,22 +1,24 @@
|
|||
# Tildes Wiki Sitemap
|
||||
|
||||
> Generates a `tree`-like [sitemap](https://tildes.net/~tildes/wiki/sitemap) of all group wiki pages of Tildes.net
|
||||
> Generates a Markdown file with all group wiki pages of Tildes.net
|
||||
|
||||
## Usage
|
||||
|
||||
Precombiled binaries are not provided, if you can't or don't know how to run this program and the sitemap is outdated please [PM me (@Bauke)](https://tildes.net/user/Bauke/new_message) and I'll update it.
|
||||
Precompiled binaries are not provided, if you can't or don't know how to run this program and the sitemap is outdated please [PM me (@Bauke)](https://tildes.net/user/Bauke/new_message?subject=Tildes%20Wiki%20Sitemap&message=Update%20the%20sitemap%20you%20doofus!) and I'll update it.
|
||||
|
||||
### Quick Command
|
||||
|
||||
Only tested on `go1.12.1 linux/amd64` and `go version go1.13.1 linux/amd64`, if something breaks please [create an issue](https://gitlab.com/Bauke/tildes-wiki-sitemap/issues/new).
|
||||
|
||||
```sh
|
||||
$ git clone https://gitlab.com/Bauke/tildes-wiki-sitemap.git
|
||||
cd tildes-wiki-sitemap
|
||||
go run .
|
||||
cargo run
|
||||
edit sitemap.md
|
||||
```
|
||||
|
||||
## Previous Version
|
||||
|
||||
If you're looking for the previous version of this program written in Go, [click here](https://gitlab.com/Bauke/tildes-wiki-sitemap/-/tree/18a96e9d541fd1e231574ceec4d4bdf5783e3b5f) to go to the commit before the Rust rewrite.
|
||||
|
||||
## License
|
||||
|
||||
Licensed under [AGPL-3.0-or-later](License).
|
||||
Open-sourced with the [AGPL-3.0-or-later license](License).
|
||||
|
|
25
go.mod
25
go.mod
|
@ -1,25 +0,0 @@
|
|||
module gitlab.com/bauke/tildes-wiki-sitemap
|
||||
|
||||
go 1.12
|
||||
|
||||
require (
|
||||
github.com/PuerkitoBio/goquery v1.5.0 // indirect
|
||||
github.com/andybalholm/cascadia v1.1.0 // indirect
|
||||
github.com/antchfx/htmlquery v1.0.0 // indirect
|
||||
github.com/antchfx/xmlquery v1.0.0 // indirect
|
||||
github.com/antchfx/xpath v1.0.0 // indirect
|
||||
github.com/gobwas/glob v0.2.3 // indirect
|
||||
github.com/gocolly/colly v1.2.0
|
||||
github.com/golang/protobuf v1.3.2 // indirect
|
||||
github.com/kennygrant/sanitize v1.2.4 // indirect
|
||||
github.com/konsorten/go-windows-terminal-sequences v1.0.2 // indirect
|
||||
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
|
||||
github.com/sirupsen/logrus v1.4.2
|
||||
github.com/stretchr/objx v0.2.0 // indirect
|
||||
github.com/temoto/robotstxt v1.1.1 // indirect
|
||||
golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 // indirect
|
||||
golang.org/x/net v0.0.0-20191009170851-d66e71096ffb // indirect
|
||||
golang.org/x/sys v0.0.0-20191010194322-b09406accb47 // indirect
|
||||
golang.org/x/tools v0.0.0-20190701194522-38ae2c8f6412 // indirect
|
||||
google.golang.org/appengine v1.6.5 // indirect
|
||||
)
|
73
go.sum
73
go.sum
|
@ -1,73 +0,0 @@
|
|||
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
|
||||
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
|
||||
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
|
||||
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
||||
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
|
||||
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
||||
github.com/antchfx/htmlquery v1.0.0 h1:O5IXz8fZF3B3MW+B33MZWbTHBlYmcfw0BAxgErHuaMA=
|
||||
github.com/antchfx/htmlquery v1.0.0/go.mod h1:MS9yksVSQXls00iXkiMqXr0J+umL/AmxXKuP28SUJM8=
|
||||
github.com/antchfx/xmlquery v1.0.0 h1:YuEPqexGG2opZKNc9JU3Zw6zFXwC47wNcy6/F8oKsrM=
|
||||
github.com/antchfx/xmlquery v1.0.0/go.mod h1:/+CnyD/DzHRnv2eRxrVbieRU/FIF6N0C+7oTtyUtCKk=
|
||||
github.com/antchfx/xpath v1.0.0 h1:Q5gFgh2O40VTSwMOVbFE7nFNRBu3tS21Tn0KAWeEjtk=
|
||||
github.com/antchfx/xpath v1.0.0/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
|
||||
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
|
||||
github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI=
|
||||
github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA=
|
||||
github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
|
||||
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs=
|
||||
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
|
||||
github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
|
||||
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||
github.com/konsorten/go-windows-terminal-sequences v1.0.2 h1:DB17ag19krx9CFsz4o3enTrPXyIXCl+2iCXH/aMAp9s=
|
||||
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
|
||||
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
|
||||
github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4=
|
||||
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea h1:hH8P1IiDpzRU6ZDbDh/RDnVuezi2oOXJpApa06M0zyI=
|
||||
github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea/go.mod h1:aOux3gHPCftJ3KHq6Pz/AlDjYJ7Y+yKfm1gU/3B0u04=
|
||||
github.com/temoto/robotstxt v1.1.1 h1:Gh8RCs8ouX3hRSxxK7B1mO5RFByQ4CmJZDwgom++JaA=
|
||||
github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20190628185345-da137c7871d7 h1:rTIdg5QFRR7XCaK4LCjBiPbx8j4DQRpdYMnGn/bJUEU=
|
||||
golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20191009170851-d66e71096ffb h1:TR699M2v0qoKTOHxeLgp6zPqaQNs74f01a/ob9W0qko=
|
||||
golang.org/x/net v0.0.0-20191009170851-d66e71096ffb/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb h1:fgwFCsaw9buMuxNd6+DQfAuSFqbNiQZpcgJQAgJsK6k=
|
||||
golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191010194322-b09406accb47 h1:/XfQ9z7ib8eEJX2hdgFTZJ/ntt0swNk5oYBziWeTCvY=
|
||||
golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
|
||||
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
|
||||
golang.org/x/tools v0.0.0-20190701194522-38ae2c8f6412/go.mod h1:jcCCGcm9btYwXyDqrUWc6MKQKKGJCWEQ3AfLSRIbEuI=
|
||||
google.golang.org/appengine v1.6.1 h1:QzqyMA1tlu6CgqCDUtU9V+ZKhLFT2dkJuANu5QaxI3I=
|
||||
google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
|
||||
google.golang.org/appengine v1.6.5 h1:tycE03LOZYQNhDpS27tcQdAzLCVMaj7QT2SXxebnpCM=
|
||||
google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
|
144
main.go
144
main.go
|
@ -1,144 +0,0 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gocolly/colly"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type wikiPage struct {
|
||||
name string
|
||||
url string
|
||||
}
|
||||
|
||||
func main() {
|
||||
log.Infof("Tildes Wiki Sitemap\n")
|
||||
// Create a variable we'll use to check if a current sitemap already exists
|
||||
_, sitemapExists := os.Stat("sitemap.md")
|
||||
if sitemapExists == nil {
|
||||
// If it does exist, rename it to "previous"
|
||||
os.Rename("sitemap.md", "previous-sitemap.md")
|
||||
}
|
||||
|
||||
// Create a groups array, which will be used to sort the output
|
||||
// (couldn't figure out how to sort a map by keys)
|
||||
groups := make([]string, 0)
|
||||
// Create a map for the pages where the key will be the group name
|
||||
pages := make(map[string][]wikiPage)
|
||||
|
||||
// Create a new collector that's only allowed to visit Tildes.net
|
||||
collector := colly.NewCollector(colly.AllowedDomains("tildes.net"))
|
||||
|
||||
// When receiving HTML:
|
||||
collector.OnHTML("html", func(page *colly.HTMLElement) {
|
||||
time.Sleep(time.Second)
|
||||
// Define the URL for brevity
|
||||
url := page.Request.URL.String()
|
||||
if strings.HasSuffix(url, "/groups") {
|
||||
// If the URL ends with /groups we want to visit each group found in the table
|
||||
page.ForEach("td>.link-group", func(_ int, element *colly.HTMLElement) {
|
||||
log.Printf("Visiting group %s", element.Text)
|
||||
groups = append(groups, element.Text)
|
||||
// Make the pages for all groups start out as an empty array
|
||||
// This makes it so groups without any wiki pages also get added to the output, instead of being skipped
|
||||
pages[element.Text] = make([]wikiPage, 0)
|
||||
collector.Visit(fmt.Sprintf("https://tildes.net/%s", element.Text))
|
||||
})
|
||||
} else if strings.Contains(url, "~") {
|
||||
// Else if the URL has a tilde in it, we want to extract the wiki pages
|
||||
group := url[strings.LastIndex(url, "/")+1:]
|
||||
page.ForEach(".nav>.nav-item>a", func(_ int, element *colly.HTMLElement) {
|
||||
// Append the new page to the array and set its name and URL
|
||||
pages[group] = append(pages[group], wikiPage{
|
||||
name: element.Text,
|
||||
url: element.Attr("href"),
|
||||
})
|
||||
log.Printf("Found wiki page: %s/%s", group, element.Text)
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
// After defining the OnHTML callback, visit the group listing
|
||||
// This won't finish until all groups have been visited inside and the entire callback is done
|
||||
collector.Visit("https://tildes.net/groups")
|
||||
|
||||
// Create the sitemap.md file
|
||||
file, _ := os.Create("sitemap.md")
|
||||
defer file.Close()
|
||||
// Write the boilerplate stuff first
|
||||
file.WriteString("# Tildes Wiki Sitemap\n\n")
|
||||
file.WriteString("Automatically generated by [this program](https://gitlab.com/bauke/tildes-wiki-sitemap). [PM @Bauke](https://tildes.net/user/Bauke/new_message) if this page is outdated and you can't run the program yourself.\n\n")
|
||||
file.WriteString("This page is a temporary placeholder to help wiki contributors navigate. Find this page easily by bookmarking it!\n\n")
|
||||
// Sort the groups
|
||||
sort.Strings(groups)
|
||||
// Define a variable that will keep track of the pages count
|
||||
pagesTotal := 0
|
||||
// Write a . to indicate the start of the tree
|
||||
file.WriteString(".\n")
|
||||
// Iterate over the group names
|
||||
for index, group := range groups {
|
||||
prefix := ""
|
||||
if index == len(groups)-1 {
|
||||
// If we're at the last group, write the "L" piece instead
|
||||
file.WriteString(fmt.Sprintf("└── [%s](https://tildes.net/%s/wiki)\n", group[1:], group))
|
||||
// And because there doesn't have to be an extra vertical piece here, we need to preserve the leading space
|
||||
// So we set the prefix to 2 non-breaking spaces, Markdown won't remove these
|
||||
prefix = " "
|
||||
} else {
|
||||
// Else we're not at the last group yet, so we wanna use the rotated "T" piece
|
||||
file.WriteString(fmt.Sprintf("├── [%s](https://tildes.net/%s/wiki)\n", group[1:], group))
|
||||
prefix = "│ "
|
||||
}
|
||||
for pageIndex, page := range pages[group] {
|
||||
// Increment the total pages
|
||||
pagesTotal++
|
||||
// Write the prefix determined earlier
|
||||
file.WriteString(prefix)
|
||||
if pageIndex == len(pages[group])-1 {
|
||||
// Same reasoning as with the groups, if we're on the last page use the "L" piece
|
||||
file.WriteString("└")
|
||||
} else {
|
||||
// Otherwise the rotated "T"
|
||||
file.WriteString("├")
|
||||
}
|
||||
// And then write the Markdown link
|
||||
file.WriteString(fmt.Sprintf("── [%s](%s)\n", page.name, page.url))
|
||||
}
|
||||
}
|
||||
|
||||
// And finally write how many groups and pages there are, like `tree` writes directories and files
|
||||
file.WriteString(fmt.Sprintf("\n%v groups, %v pages\n", len(groups), pagesTotal))
|
||||
|
||||
// If the sitemap exists we want to read both sitemaps and check if they're the same
|
||||
if sitemapExists == nil {
|
||||
file, err := os.Open("sitemap.md")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
current, _ := ioutil.ReadAll(file)
|
||||
file.Close()
|
||||
file, err = os.Open("previous-sitemap.md")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
previous, _ := ioutil.ReadAll(file)
|
||||
file.Close()
|
||||
|
||||
// If they're the same just log that all is good, if not warn that we need to update
|
||||
if string(current) == string(previous) {
|
||||
log.Infof("Current and previous sitemaps are the same, no need to update.\n")
|
||||
} else {
|
||||
log.Warnf("Current and previous sitemaps are not the same, you should update it.\n")
|
||||
}
|
||||
}
|
||||
|
||||
log.Printf("Done! Found %v groups and %v pages, see sitemap.md for the output", len(groups), pagesTotal)
|
||||
}
|
|
@ -0,0 +1,2 @@
|
|||
tab_spaces = 2
|
||||
max_width = 80
|
|
@ -0,0 +1,101 @@
|
|||
use std::error::Error;
|
||||
use std::fs;
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
use reqwest;
|
||||
use scraper::{ElementRef, Html, Selector};
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
let client = reqwest::blocking::Client::new();
|
||||
|
||||
// Get the HTML from the groups list.
|
||||
let response = client
|
||||
.get("https://tildes.net/groups")
|
||||
.header(reqwest::header::USER_AGENT, "Tildes Wiki Sitemap")
|
||||
.send()?;
|
||||
let body = response.text()?;
|
||||
|
||||
// Parse the HTML.
|
||||
let html = Html::parse_document(body.as_str());
|
||||
|
||||
// Create a selector to grab all anchors that link to a group.
|
||||
let selector = Selector::parse(".group-list .link-group").unwrap();
|
||||
|
||||
// Get all the group link elements from the HTML.
|
||||
let group_links = html.select(&selector).collect::<Vec<ElementRef>>();
|
||||
|
||||
// Create the sitemap with the info.
|
||||
let mut sitemap = String::from("# Tildes Wiki Sitemap\n\n");
|
||||
sitemap += "Automatically generated by \
|
||||
[this program](https://gitlab.com/bauke/tildes-wiki-sitemap). \
|
||||
[PM @Bauke](https://tildes.net/user/Bauke/new_message?subject=Tildes%20Wiki%20Sitemap\
|
||||
&message=Update%20the%20sitemap%20you%20doofus!) if this page is outdated and \
|
||||
you can't run the program yourself.\n\n\
|
||||
This page is a temporary placeholder to help wiki contributors navigate. \
|
||||
Find this page easily by bookmarking it!\n\n";
|
||||
|
||||
for group_link in group_links {
|
||||
// Get the group name without the tilde.
|
||||
let group_name = String::from(&group_link.inner_html()[1..]);
|
||||
println!("┌ Processing ~{}!", group_name);
|
||||
|
||||
// Get the HTML from the group page.
|
||||
let response = client
|
||||
.get(format!("https://tildes.net/~{}", group_name).as_str())
|
||||
.header(reqwest::header::USER_AGENT, "Tildes Wiki Sitemap")
|
||||
.send()?;
|
||||
let body = response.text()?;
|
||||
|
||||
// Parse the HTML.
|
||||
let html = Html::parse_document(body.as_str());
|
||||
|
||||
// Create a selector to grab all the anchors in the sidebar that lead to a wiki page.
|
||||
let selector =
|
||||
Selector::parse("#sidebar .nav a[href*=\"/wiki/\"]").unwrap();
|
||||
|
||||
// Get all the wiki URL elements from the HTML.
|
||||
let wiki_links = html.select(&selector).collect::<Vec<ElementRef>>();
|
||||
let wiki_links_amount = wiki_links.len();
|
||||
|
||||
// Create a selector to grab the group description.
|
||||
let selector =
|
||||
Selector::parse("#sidebar .group-short-description").unwrap();
|
||||
|
||||
// Get all the wiki URL elements from the HTML.
|
||||
let group_description = html.select(&selector).collect::<Vec<ElementRef>>();
|
||||
|
||||
// Add the group as a new header.
|
||||
sitemap += format!("## ~{}\n\n", group_name).as_str();
|
||||
|
||||
// If a group description is found, add it to the Markdown.
|
||||
if !group_description.is_empty() {
|
||||
let description = group_description.first().unwrap().inner_html();
|
||||
sitemap += format!("> {}\n\n", description).as_str();
|
||||
}
|
||||
|
||||
// If there's no wiki pages, add a little blurb with a link to create one.
|
||||
if wiki_links.is_empty() {
|
||||
sitemap += format!("There are no wiki pages for ~{} yet, \
|
||||
[click here and be the first to create one](https://tildes.net/~{}/wiki/new_page), \
|
||||
if you have the necessary permission!\n", group_name, group_name).as_str();
|
||||
}
|
||||
|
||||
// Loop over the links and add them in a list.
|
||||
for wiki_link in wiki_links {
|
||||
let wiki_page_title = wiki_link.inner_html();
|
||||
let wiki_page_link = wiki_link.value().attr("href").unwrap_or("");
|
||||
sitemap +=
|
||||
format!("* [{}]({})\n", wiki_page_title, wiki_page_link).as_str();
|
||||
}
|
||||
|
||||
sitemap += "\n";
|
||||
println!("└ Processed {} wiki links.", wiki_links_amount);
|
||||
thread::sleep(Duration::from_millis(500));
|
||||
}
|
||||
|
||||
sitemap = sitemap.trim_end().to_string() + "\n";
|
||||
fs::write("./sitemap.md", sitemap)?;
|
||||
println!("✓ Done!");
|
||||
Ok(())
|
||||
}
|
Loading…
Reference in New Issue