2020-02-29 22:48:09 +00:00
|
|
|
import {promises as fsp} from 'fs';
|
|
|
|
import {join} from 'path';
|
|
|
|
import cheerio from 'cheerio';
|
|
|
|
import got, {Response} from 'got';
|
|
|
|
|
|
|
|
export interface OfficialTopic {
|
|
|
|
date: Date;
|
|
|
|
id: string;
|
|
|
|
title: string;
|
|
|
|
url: string;
|
|
|
|
}
|
|
|
|
|
|
|
|
async function entry(): Promise<void> {
|
|
|
|
const timeout = 250;
|
|
|
|
const officialTopics: OfficialTopic[] = [];
|
|
|
|
const baseURL = 'https://tildes.net/~tildes.official?order=new&per_page=100';
|
|
|
|
|
|
|
|
let hasNextButton = true;
|
|
|
|
|
|
|
|
while (hasNextButton) {
|
|
|
|
// Set the URL to be downloaded, on the first iteration we won't have an ID for
|
|
|
|
// `&after=` yet so we make sure to check for that.
|
|
|
|
let getURL: string = baseURL;
|
|
|
|
if (officialTopics.length > 0) {
|
|
|
|
getURL += `&after=${officialTopics[officialTopics.length - 1].id}`;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Download the page and load the HTML into Cheerio.
|
|
|
|
const response: Response<string> = await got(getURL, {
|
|
|
|
headers: {
|
|
|
|
'User-Agent': 'Tildes Issue Log scraping Official Topics'
|
|
|
|
}
|
|
|
|
});
|
|
|
|
const html: CheerioStatic = cheerio.load(response.body);
|
|
|
|
|
|
|
|
// Grab all topics from the listing.
|
|
|
|
const topics: CheerioElement[] = html(
|
|
|
|
'.topic-listing > li > article'
|
|
|
|
).toArray();
|
|
|
|
|
|
|
|
for (const topic of topics) {
|
|
|
|
const $topic: CheerioStatic = cheerio.load(topic);
|
|
|
|
|
|
|
|
// Topic IDs in the listing will start with `topic-` so remove that first.
|
|
|
|
const id: string = topic.attribs.id.slice(6);
|
|
|
|
|
|
|
|
officialTopics.push({
|
|
|
|
date: new Date($topic('time').attr('datetime')!),
|
|
|
|
id,
|
|
|
|
title: $topic('.topic-title > a').text(),
|
|
|
|
url: `https://tild.es/${id}`
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
const paginationButtons: CheerioElement[] = html('.pagination')
|
|
|
|
.find('.page-item.btn')
|
|
|
|
.toArray();
|
|
|
|
|
|
|
|
// If all pagination buttons are "previous" buttons, stop the loop.
|
|
|
|
if (
|
2020-06-29 22:26:01 +00:00
|
|
|
paginationButtons.every((value) =>
|
|
|
|
value.firstChild.data?.toLowerCase().includes('prev')
|
2020-02-29 22:48:09 +00:00
|
|
|
)
|
|
|
|
) {
|
|
|
|
hasNextButton = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
await wait(timeout);
|
|
|
|
}
|
|
|
|
|
|
|
|
await fsp.mkdir(join(__dirname, '../pages/data/'), {recursive: true});
|
|
|
|
await fsp.writeFile(
|
|
|
|
join(__dirname, '../pages/data/official-topics.json'),
|
|
|
|
JSON.stringify(officialTopics, null, 2)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
export async function wait(timeout: number): Promise<void> {
|
2020-06-29 22:26:01 +00:00
|
|
|
return new Promise((resolve) => setTimeout(resolve, timeout));
|
2020-02-29 22:48:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
export async function getTopicsFromMonth(
|
|
|
|
data: OfficialTopic[],
|
|
|
|
year: number,
|
|
|
|
month: number
|
|
|
|
): Promise<OfficialTopic[]> {
|
|
|
|
const topics: OfficialTopic[] = [];
|
|
|
|
for (const topic of data) {
|
|
|
|
topic.date = new Date(topic.date);
|
|
|
|
if (
|
|
|
|
topic.date.getFullYear() === year &&
|
|
|
|
topic.date.getMonth() + 1 === month
|
|
|
|
) {
|
|
|
|
topics.push(topic);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
topics.sort((a, b) => a.date.getDate() - b.date.getDate());
|
|
|
|
return topics;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (require.main === module) {
|
2020-06-29 22:26:01 +00:00
|
|
|
void entry();
|
2020-02-29 22:48:09 +00:00
|
|
|
}
|