Compare commits
5 Commits
d97f145c89
...
e0361b0631
Author | SHA1 | Date |
---|---|---|
Bauke | e0361b0631 | |
Bauke | ecb5dea427 | |
Bauke | 18a96e9d54 | |
Bauke | bf19c87916 | |
Bauke | ff1e0129b7 |
|
@ -1,23 +1,8 @@
|
||||||
# Binaries for programs and plugins
|
# Compiled files and executables.
|
||||||
*.exe
|
/target/
|
||||||
*.exe~
|
|
||||||
*.dll
|
|
||||||
*.so
|
|
||||||
*.dylib
|
|
||||||
|
|
||||||
# Test binary, built with `go test -c`
|
# Backup files generated by rustfmt.
|
||||||
*.test
|
**/*.rs.bk
|
||||||
|
|
||||||
# Output of the go coverage tool, specifically when used with LiteIDE
|
# The actual Sitemap, to be copied to https://tildes.net/~tildes/wiki/sitemap.
|
||||||
*.out
|
|
||||||
|
|
||||||
# Dependency directories (remove the comment below to include it)
|
|
||||||
# vendor/
|
|
||||||
|
|
||||||
### Go Patch ###
|
|
||||||
/vendor/
|
|
||||||
/Godeps/
|
|
||||||
|
|
||||||
# Sitemap files
|
|
||||||
previous-sitemap.md
|
|
||||||
sitemap.md
|
sitemap.md
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,18 @@
|
||||||
|
# https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[package]
|
||||||
|
name = "tildes-wiki-sitemap"
|
||||||
|
version = "0.1.0"
|
||||||
|
authors = ["Bauke <me@bauke.xyz>"]
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "tildes-wiki-sitemap"
|
||||||
|
path = "source/main.rs"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
scraper = "0.12.0"
|
||||||
|
|
||||||
|
[dependencies.reqwest]
|
||||||
|
version = "0.10.7"
|
||||||
|
features = ["blocking"]
|
43
README.md
43
README.md
|
@ -1,33 +1,38 @@
|
||||||
<img src="images/tildes-wiki-sitemap.png" align="right">
|
|
||||||
|
|
||||||
# Tildes Wiki Sitemap
|
# Tildes Wiki Sitemap
|
||||||
|
|
||||||
> Generates a `tree`-like [sitemap](https://tildes.net/~tildes/wiki/sitemap) of all group wiki pages of Tildes.net
|
> Generates a Markdown file with all group wiki pages of Tildes.
|
||||||
|
|
||||||
---
|
## Installation
|
||||||
|
|
||||||
|
### Binary
|
||||||
|
|
||||||
|
Precompiled binaries are available [here](https://git.holllo.cc/Bauke/tildes-wiki-sitemap/releases).
|
||||||
|
|
||||||
|
### Source
|
||||||
|
|
||||||
|
Requires [Rust and Cargo](https://www.rust-lang.org/tools/install) to be installed.
|
||||||
|
|
||||||
|
```sh
|
||||||
|
git clone https://git.holllo.cc/Bauke/tildes-wiki-sitemap.git
|
||||||
|
cd tildes-wiki-sitemap
|
||||||
|
cargo build --release
|
||||||
|
mv target/release/tildes-wiki-sitemap ./
|
||||||
|
```
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
Precombiled binaries are not provided, if you can't or don't know how to run this program and the sitemap is outdated please [PM me (@Bauke)](https://tildes.net/user/Bauke/new_message) and I'll update it.
|
Make sure the file is executable, then run it. A `sitemap.md` file will be created with the results.
|
||||||
|
|
||||||
### Quick Command
|
|
||||||
|
|
||||||
Only tested on `go1.12.1 linux/amd64`, if something breaks please [create an issue](https://gitlab.com/Bauke/tildes-wiki-sitemap/issues/new).
|
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
$ git clone https://gitlab.com/Bauke/tildes-wiki-sitemap.git
|
chmod +x ./tildes-wiki-sitemap
|
||||||
cd tildes-wiki-sitemap
|
./tildes-wiki-sitemap
|
||||||
go run .
|
less sitemap.md
|
||||||
edit sitemap.md
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Attributes
|
## Previous Version
|
||||||
|
|
||||||
- [Colly](https://github.com/gocolly/colly) (Apache License 2.0)
|
If you're looking for the previous version of this program written in Go, [click here](https://git.holllo.cc/Bauke/tildes-wiki-sitemap/src/commit/18a96e9d541fd1e231574ceec4d4bdf5783e3b5f) to go to the commit before the Rust rewrite.
|
||||||
- [Logrus](https://github.com/Sirupsen/logrus) (MIT License)
|
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
Licensed under [AGPL-3.0-or-later](LICENSE).
|
Open-sourced with the [AGPL-3.0-or-later license](https://git.holllo.cc/Bauke/tildes-wiki-sitemap/src/branch/main/LICENSE).
|
||||||
|
|
||||||
![AGPL-3.0-or-later Logo](images/license.png)
|
|
||||||
|
|
23
go.mod
23
go.mod
|
@ -1,23 +0,0 @@
|
||||||
module gitlab.com/bauke/tildes-wiki-sitemap
|
|
||||||
|
|
||||||
go 1.12
|
|
||||||
|
|
||||||
require (
|
|
||||||
github.com/PuerkitoBio/goquery v1.5.0 // indirect
|
|
||||||
github.com/antchfx/htmlquery v1.0.0 // indirect
|
|
||||||
github.com/antchfx/xmlquery v1.0.0 // indirect
|
|
||||||
github.com/antchfx/xpath v1.0.0 // indirect
|
|
||||||
github.com/gobwas/glob v0.2.3 // indirect
|
|
||||||
github.com/gocolly/colly v1.2.0
|
|
||||||
github.com/kennygrant/sanitize v1.2.4 // indirect
|
|
||||||
github.com/konsorten/go-windows-terminal-sequences v1.0.2 // indirect
|
|
||||||
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
|
|
||||||
github.com/sirupsen/logrus v1.4.2
|
|
||||||
github.com/stretchr/objx v0.2.0 // indirect
|
|
||||||
github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea // indirect
|
|
||||||
golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 // indirect
|
|
||||||
golang.org/x/net v0.0.0-20190628185345-da137c7871d7 // indirect
|
|
||||||
golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb // indirect
|
|
||||||
golang.org/x/tools v0.0.0-20190701194522-38ae2c8f6412 // indirect
|
|
||||||
google.golang.org/appengine v1.6.1 // indirect
|
|
||||||
)
|
|
60
go.sum
60
go.sum
|
@ -1,60 +0,0 @@
|
||||||
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
|
|
||||||
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
|
|
||||||
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
|
|
||||||
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
|
||||||
github.com/antchfx/htmlquery v1.0.0 h1:O5IXz8fZF3B3MW+B33MZWbTHBlYmcfw0BAxgErHuaMA=
|
|
||||||
github.com/antchfx/htmlquery v1.0.0/go.mod h1:MS9yksVSQXls00iXkiMqXr0J+umL/AmxXKuP28SUJM8=
|
|
||||||
github.com/antchfx/xmlquery v1.0.0 h1:YuEPqexGG2opZKNc9JU3Zw6zFXwC47wNcy6/F8oKsrM=
|
|
||||||
github.com/antchfx/xmlquery v1.0.0/go.mod h1:/+CnyD/DzHRnv2eRxrVbieRU/FIF6N0C+7oTtyUtCKk=
|
|
||||||
github.com/antchfx/xpath v1.0.0 h1:Q5gFgh2O40VTSwMOVbFE7nFNRBu3tS21Tn0KAWeEjtk=
|
|
||||||
github.com/antchfx/xpath v1.0.0/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
|
|
||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
|
||||||
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
|
|
||||||
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
|
|
||||||
github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI=
|
|
||||||
github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA=
|
|
||||||
github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
|
|
||||||
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
|
||||||
github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
|
|
||||||
github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
|
|
||||||
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
|
||||||
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
|
||||||
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
|
|
||||||
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
|
|
||||||
github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4=
|
|
||||||
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
|
|
||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
|
||||||
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
|
||||||
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
|
|
||||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
|
||||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
|
||||||
github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea h1:hH8P1IiDpzRU6ZDbDh/RDnVuezi2oOXJpApa06M0zyI=
|
|
||||||
github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea/go.mod h1:aOux3gHPCftJ3KHq6Pz/AlDjYJ7Y+yKfm1gU/3B0u04=
|
|
||||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
|
||||||
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
|
||||||
golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
|
||||||
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
|
||||||
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
|
||||||
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
|
||||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
|
||||||
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
|
|
||||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
|
||||||
golang.org/x/net v0.0.0-20190628185345-da137c7871d7 h1:rTIdg5QFRR7XCaK4LCjBiPbx8j4DQRpdYMnGn/bJUEU=
|
|
||||||
golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
|
||||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
|
||||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
|
||||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
|
||||||
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
|
||||||
golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
|
||||||
golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb h1:fgwFCsaw9buMuxNd6+DQfAuSFqbNiQZpcgJQAgJsK6k=
|
|
||||||
golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
|
||||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
|
||||||
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
|
|
||||||
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
|
||||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
|
||||||
golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
|
|
||||||
golang.org/x/tools v0.0.0-20190701194522-38ae2c8f6412/go.mod h1:jcCCGcm9btYwXyDqrUWc6MKQKKGJCWEQ3AfLSRIbEuI=
|
|
||||||
google.golang.org/appengine v1.6.1 h1:QzqyMA1tlu6CgqCDUtU9V+ZKhLFT2dkJuANu5QaxI3I=
|
|
||||||
google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
|
|
Binary file not shown.
Before Width: | Height: | Size: 3.5 KiB |
Binary file not shown.
Before Width: | Height: | Size: 6.4 KiB |
144
main.go
144
main.go
|
@ -1,144 +0,0 @@
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"io/ioutil"
|
|
||||||
"os"
|
|
||||||
"sort"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/gocolly/colly"
|
|
||||||
log "github.com/sirupsen/logrus"
|
|
||||||
)
|
|
||||||
|
|
||||||
type wikiPage struct {
|
|
||||||
name string
|
|
||||||
url string
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
log.Infof("Tildes Wiki Sitemap\n")
|
|
||||||
// Create a variable we'll use to check if a current sitemap already exists
|
|
||||||
_, sitemapExists := os.Stat("sitemap.md")
|
|
||||||
if sitemapExists == nil {
|
|
||||||
// If it does exist, rename it to "previous"
|
|
||||||
os.Rename("sitemap.md", "previous-sitemap.md")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create a groups array, which will be used to sort the output
|
|
||||||
// (couldn't figure out how to sort a map by keys)
|
|
||||||
groups := make([]string, 0)
|
|
||||||
// Create a map for the pages where the key will be the group name
|
|
||||||
pages := make(map[string][]wikiPage)
|
|
||||||
|
|
||||||
// Create a new collector that's only allowed to visit Tildes.net
|
|
||||||
collector := colly.NewCollector(colly.AllowedDomains("tildes.net"))
|
|
||||||
|
|
||||||
// When receiving HTML:
|
|
||||||
collector.OnHTML("html", func(page *colly.HTMLElement) {
|
|
||||||
time.Sleep(time.Second)
|
|
||||||
// Define the URL for brevity
|
|
||||||
url := page.Request.URL.String()
|
|
||||||
if strings.HasSuffix(url, "/groups") {
|
|
||||||
// If the URL ends with /groups we want to visit each group found in the table
|
|
||||||
page.ForEach("td>.link-group", func(_ int, element *colly.HTMLElement) {
|
|
||||||
log.Printf("Visiting group %s", element.Text)
|
|
||||||
groups = append(groups, element.Text)
|
|
||||||
// Make the pages for all groups start out as an empty array
|
|
||||||
// This makes it so groups without any wiki pages also get added to the output, instead of being skipped
|
|
||||||
pages[element.Text] = make([]wikiPage, 0)
|
|
||||||
collector.Visit(fmt.Sprintf("https://tildes.net/%s", element.Text))
|
|
||||||
})
|
|
||||||
} else if strings.Contains(url, "~") {
|
|
||||||
// Else if the URL has a tilde in it, we want to extract the wiki pages
|
|
||||||
group := url[strings.LastIndex(url, "/")+1:]
|
|
||||||
page.ForEach(".nav>.nav-item>a", func(_ int, element *colly.HTMLElement) {
|
|
||||||
// Append the new page to the array and set its name and URL
|
|
||||||
pages[group] = append(pages[group], wikiPage{
|
|
||||||
name: element.Text,
|
|
||||||
url: element.Attr("href"),
|
|
||||||
})
|
|
||||||
log.Printf("Found wiki page: %s/%s", group, element.Text)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
// After defining the OnHTML callback, visit the group listing
|
|
||||||
// This won't finish until all groups have been visited inside and the entire callback is done
|
|
||||||
collector.Visit("https://tildes.net/groups")
|
|
||||||
|
|
||||||
// Create the sitemap.md file
|
|
||||||
file, _ := os.Create("sitemap.md")
|
|
||||||
defer file.Close()
|
|
||||||
// Write the boilerplate stuff first
|
|
||||||
file.WriteString("# Tildes Wiki Sitemap\n\n")
|
|
||||||
file.WriteString("Automatically generated by [this program](https://gitlab.com/bauke/tildes-wiki-sitemap). [PM @Bauke](https://tildes.net/user/Bauke/new_message) if this page is outdated and you can't run the program yourself.\n\n")
|
|
||||||
file.WriteString("This page is a temporary placeholder to help wiki contributors navigate. Find this page easily by bookmarking it!\n\n")
|
|
||||||
// Sort the groups
|
|
||||||
sort.Strings(groups)
|
|
||||||
// Define a variable that will keep track of the pages count
|
|
||||||
pagesTotal := 0
|
|
||||||
// Write a . to indicate the start of the tree
|
|
||||||
file.WriteString(".\n")
|
|
||||||
// Iterate over the group names
|
|
||||||
for index, group := range groups {
|
|
||||||
prefix := ""
|
|
||||||
if index == len(groups)-1 {
|
|
||||||
// If we're at the last group, write the "L" piece instead
|
|
||||||
file.WriteString(fmt.Sprintf("└── [%s](https://tildes.net/%s/wiki)\n", group[1:], group))
|
|
||||||
// And because there doesn't have to be an extra vertical piece here, we need to preserve the leading space
|
|
||||||
// So we set the prefix to 2 non-breaking spaces, Markdown won't remove these
|
|
||||||
prefix = " "
|
|
||||||
} else {
|
|
||||||
// Else we're not at the last group yet, so we wanna use the rotated "T" piece
|
|
||||||
file.WriteString(fmt.Sprintf("├── [%s](https://tildes.net/%s/wiki)\n", group[1:], group))
|
|
||||||
prefix = "│ "
|
|
||||||
}
|
|
||||||
for pageIndex, page := range pages[group] {
|
|
||||||
// Increment the total pages
|
|
||||||
pagesTotal++
|
|
||||||
// Write the prefix determined earlier
|
|
||||||
file.WriteString(prefix)
|
|
||||||
if pageIndex == len(pages[group])-1 {
|
|
||||||
// Same reasoning as with the groups, if we're on the last page use the "L" piece
|
|
||||||
file.WriteString("└")
|
|
||||||
} else {
|
|
||||||
// Otherwise the rotated "T"
|
|
||||||
file.WriteString("├")
|
|
||||||
}
|
|
||||||
// And then write the Markdown link
|
|
||||||
file.WriteString(fmt.Sprintf("── [%s](%s)\n", page.name, page.url))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// And finally write how many groups and pages there are, like `tree` writes directories and files
|
|
||||||
file.WriteString(fmt.Sprintf("\n%v groups, %v pages\n", len(groups), pagesTotal))
|
|
||||||
|
|
||||||
// If the sitemap exists we want to read both sitemaps and check if they're the same
|
|
||||||
if sitemapExists == nil {
|
|
||||||
file, err := os.Open("sitemap.md")
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
current, _ := ioutil.ReadAll(file)
|
|
||||||
file.Close()
|
|
||||||
file, err = os.Open("previous-sitemap.md")
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
previous, _ := ioutil.ReadAll(file)
|
|
||||||
file.Close()
|
|
||||||
|
|
||||||
// If they're the same just log that all is good, if not warn that we need to update
|
|
||||||
if string(current) == string(previous) {
|
|
||||||
log.Infof("Current and previous sitemaps are the same, no need to update.\n")
|
|
||||||
} else {
|
|
||||||
log.Warnf("Current and previous sitemaps are not the same, you should update it.\n")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Printf("Done! Found %v groups and %v pages, see sitemap.md for the output", len(groups), pagesTotal)
|
|
||||||
}
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
max_width = 80
|
||||||
|
tab_spaces = 2
|
|
@ -0,0 +1,97 @@
|
||||||
|
use std::{error::Error, fs, thread, time::Duration};
|
||||||
|
|
||||||
|
use reqwest::blocking::Client;
|
||||||
|
use scraper::{ElementRef, Html, Selector};
|
||||||
|
|
||||||
|
fn main() -> Result<(), Box<dyn Error>> {
|
||||||
|
let client = Client::builder()
|
||||||
|
.user_agent("Tildes Wiki Sitemap")
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
// Get the HTML from the groups list.
|
||||||
|
let response = client.get("https://tildes.net/groups").send()?;
|
||||||
|
let body = response.text()?;
|
||||||
|
|
||||||
|
// Parse the HTML.
|
||||||
|
let html = Html::parse_document(&body);
|
||||||
|
|
||||||
|
// Create a selector to grab all anchors that link to a group.
|
||||||
|
let selector = Selector::parse(".group-list .link-group").unwrap();
|
||||||
|
|
||||||
|
// Get all the group link elements from the HTML.
|
||||||
|
let group_links = html.select(&selector).collect::<Vec<ElementRef>>();
|
||||||
|
|
||||||
|
// Create the sitemap with the info.
|
||||||
|
let mut sitemap = "# Tildes Wiki Sitemap\n\n".to_string();
|
||||||
|
sitemap += "Automatically generated by \
|
||||||
|
[this program](https://git.holllo.cc/Bauke/tildes-wiki-sitemap). \
|
||||||
|
[message @Bauke](https://tildes.net/user/Bauke/new_message?subject=Tildes%20Wiki%20Sitemap\
|
||||||
|
&message=Update%20the%20sitemap%20you%20doofus!) if this page is outdated and \
|
||||||
|
you can't run the program yourself.\n\n\
|
||||||
|
This page is a temporary placeholder to help wiki contributors navigate. \
|
||||||
|
Find this page easily by bookmarking it!\n\n";
|
||||||
|
|
||||||
|
for group_link in group_links {
|
||||||
|
// Get the group name without the tilde.
|
||||||
|
let group_name = group_link.inner_html()[1..].to_string();
|
||||||
|
println!("┌ Processing ~{}!", group_name);
|
||||||
|
|
||||||
|
// Get the HTML from the group page.
|
||||||
|
let response = client
|
||||||
|
.get(&format!("https://tildes.net/~{}", group_name))
|
||||||
|
.send()?;
|
||||||
|
let body = response.text()?;
|
||||||
|
|
||||||
|
// Parse the HTML.
|
||||||
|
let html = Html::parse_document(&body);
|
||||||
|
|
||||||
|
// Create a selector to grab all the anchors in the sidebar that lead to a wiki page.
|
||||||
|
let selector =
|
||||||
|
Selector::parse("#sidebar .nav a[href*=\"/wiki/\"]").unwrap();
|
||||||
|
|
||||||
|
// Get all the wiki URL elements from the HTML.
|
||||||
|
let wiki_links = html.select(&selector).collect::<Vec<ElementRef>>();
|
||||||
|
let wiki_links_amount = wiki_links.len();
|
||||||
|
|
||||||
|
// Create a selector to grab the group description.
|
||||||
|
let selector =
|
||||||
|
Selector::parse("#sidebar .group-short-description").unwrap();
|
||||||
|
|
||||||
|
// Get the group description from the HTML.
|
||||||
|
let group_description = html.select(&selector).collect::<Vec<ElementRef>>();
|
||||||
|
|
||||||
|
// Add the group as a new header.
|
||||||
|
sitemap += format!("## ~{}\n\n", group_name).as_str();
|
||||||
|
|
||||||
|
// If a group description is found, add it to the Markdown.
|
||||||
|
if !group_description.is_empty() {
|
||||||
|
let description = group_description.first().unwrap().inner_html();
|
||||||
|
sitemap += format!("> {}\n\n", description).as_str();
|
||||||
|
}
|
||||||
|
|
||||||
|
// If there's no wiki pages, add a little blurb with a link to create one.
|
||||||
|
if wiki_links.is_empty() {
|
||||||
|
sitemap += format!("There are no wiki pages for ~{} yet, \
|
||||||
|
[click here and be the first to create one](https://tildes.net/~{}/wiki/new_page), \
|
||||||
|
if you were granted the necessary permission to do so!\n", group_name, group_name).as_str();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Loop over the links and add them in a list.
|
||||||
|
for wiki_link in wiki_links {
|
||||||
|
let wiki_page_title = wiki_link.inner_html();
|
||||||
|
let wiki_page_link = wiki_link.value().attr("href").unwrap_or("");
|
||||||
|
sitemap += &format!("* [{}]({})\n", wiki_page_title, wiki_page_link);
|
||||||
|
}
|
||||||
|
|
||||||
|
sitemap += "\n";
|
||||||
|
println!("└ Processed {} wiki links.", wiki_links_amount);
|
||||||
|
|
||||||
|
// Sleep 500ms between HTTP requests.
|
||||||
|
thread::sleep(Duration::from_millis(500));
|
||||||
|
}
|
||||||
|
|
||||||
|
sitemap = sitemap.trim_end().to_string() + "\n";
|
||||||
|
fs::write("./sitemap.md", sitemap)?;
|
||||||
|
println!("✓ Done!");
|
||||||
|
Ok(())
|
||||||
|
}
|
Loading…
Reference in New Issue