import {promises as fsp} from 'fs'; import {join} from 'path'; import cheerio from 'cheerio'; import got, {Response} from 'got'; export interface OfficialTopic { date: Date; id: string; title: string; url: string; } async function entry(): Promise { const timeout = 250; const officialTopics: OfficialTopic[] = []; const baseURL = 'https://tildes.net/~tildes.official?order=new&per_page=100'; let hasNextButton = true; while (hasNextButton) { // Set the URL to be downloaded, on the first iteration we won't have an ID for // `&after=` yet so we make sure to check for that. let getURL: string = baseURL; if (officialTopics.length > 0) { getURL += `&after=${officialTopics[officialTopics.length - 1].id}`; } // Download the page and load the HTML into Cheerio. const response: Response = await got(getURL, { headers: { 'User-Agent': 'Tildes Issue Log scraping Official Topics' } }); const html: CheerioStatic = cheerio.load(response.body); // Grab all topics from the listing. const topics: CheerioElement[] = html( '.topic-listing > li > article' ).toArray(); for (const topic of topics) { const $topic: CheerioStatic = cheerio.load(topic); // Topic IDs in the listing will start with `topic-` so remove that first. const id: string = topic.attribs.id.slice(6); officialTopics.push({ date: new Date($topic('time').attr('datetime')!), id, title: $topic('.topic-title > a').text(), url: `https://tild.es/${id}` }); } const paginationButtons: CheerioElement[] = html('.pagination') .find('.page-item.btn') .toArray(); // If all pagination buttons are "previous" buttons, stop the loop. if ( paginationButtons.every((value) => value.firstChild.data?.toLowerCase().includes('prev') ) ) { hasNextButton = false; } await wait(timeout); } await fsp.mkdir(join(__dirname, '../pages/data/'), {recursive: true}); await fsp.writeFile( join(__dirname, '../pages/data/official-topics.json'), JSON.stringify(officialTopics, null, 2) ); } export async function wait(timeout: number): Promise { return new Promise((resolve) => setTimeout(resolve, timeout)); } export async function getTopicsFromMonth( data: OfficialTopic[], year: number, month: number ): Promise { const topics: OfficialTopic[] = []; for (const topic of data) { topic.date = new Date(topic.date); if ( topic.date.getFullYear() === year && topic.date.getMonth() + 1 === month ) { topics.push(topic); } } topics.sort((a, b) => a.date.getDate() - b.date.getDate()); return topics; } if (require.main === module) { void entry(); }