Initial code commit with basic functionality.
This commit is contained in:
parent
6f142f5c39
commit
c2965641be
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,41 @@
|
|||
[package]
|
||||
name = "tildes-statistics"
|
||||
description = "Statistics for Tildes.net."
|
||||
repository = "https://git.bauke.xyz/Bauke/tildes-statistics"
|
||||
license = "AGPL-3.0-or-later"
|
||||
version = "0.1.0"
|
||||
authors = ["Bauke <me@bauke.xyz>"]
|
||||
edition = "2021"
|
||||
|
||||
[[bin]]
|
||||
name = "tildes-statistics"
|
||||
path = "source/main.rs"
|
||||
|
||||
[dependencies]
|
||||
async-std = "1.12.0"
|
||||
chrono = "0.4.22"
|
||||
color-eyre = "0.6.2"
|
||||
dotenvy = "0.15.5"
|
||||
sea-orm-migration = "0.9.3"
|
||||
tracing = "0.1.36"
|
||||
|
||||
[dependencies.clap]
|
||||
features = ["derive"]
|
||||
version = "4.0.10"
|
||||
|
||||
[dependencies.sea-orm]
|
||||
features = ["macros", "mock", "runtime-async-std-rustls", "sqlx-postgres"]
|
||||
version = "0.9.3"
|
||||
|
||||
[dependencies.surf]
|
||||
default-features = false
|
||||
features = ["encoding", "h1-client-rustls", "middleware-logger"]
|
||||
version = "2.3.2"
|
||||
|
||||
[dependencies.tildes-parser]
|
||||
git = "https://git.bauke.xyz/Bauke/tildes-parser.git"
|
||||
rev = "08bf7ed"
|
||||
|
||||
[dependencies.tracing-subscriber]
|
||||
features = ["env-filter"]
|
||||
version = "0.3.15"
|
|
@ -0,0 +1,88 @@
|
|||
//! All CLI-related code.
|
||||
|
||||
use {
|
||||
chrono::NaiveDate,
|
||||
clap::{Parser, Subcommand},
|
||||
};
|
||||
|
||||
mod run;
|
||||
|
||||
pub use run::run;
|
||||
|
||||
/// The Clap Derive CLI struct.
|
||||
#[derive(Debug, Parser)]
|
||||
#[command(author, version, about)]
|
||||
#[command(propagate_version = true)]
|
||||
pub struct Cli {
|
||||
/// The CLI subcommand.
|
||||
#[command(subcommand)]
|
||||
pub command: MainSubcommands,
|
||||
|
||||
/// Don't run pending migrations automatically.
|
||||
#[clap(long)]
|
||||
pub no_migrate: bool,
|
||||
|
||||
/// Output SQL queries in logging.
|
||||
#[clap(long, global = true)]
|
||||
pub sql_logging: bool,
|
||||
}
|
||||
|
||||
/// Main CLI subcommands.
|
||||
#[derive(Debug, Subcommand)]
|
||||
pub enum MainSubcommands {
|
||||
/// Database migrations.
|
||||
Migrate {
|
||||
/// Database migrations.
|
||||
#[command(subcommand)]
|
||||
command: MigrateSubcommands,
|
||||
},
|
||||
|
||||
/// Snapshot management.
|
||||
Snapshot {
|
||||
/// Snapshot management.
|
||||
#[command(subcommand)]
|
||||
command: SnapshotSubcommands,
|
||||
},
|
||||
}
|
||||
|
||||
/// Migrate subcommands.
|
||||
#[derive(Debug, Subcommand)]
|
||||
pub enum MigrateSubcommands {
|
||||
/// Rollback applied migrations.
|
||||
Down {
|
||||
/// How many migrations to rollback.
|
||||
#[clap(default_value = "1")]
|
||||
amount: u32,
|
||||
},
|
||||
|
||||
/// View the status of all migrations.
|
||||
Status,
|
||||
|
||||
/// Apply pending migrations.
|
||||
Up {
|
||||
/// How many migrations to apply.
|
||||
#[clap(default_value = "1")]
|
||||
amount: u32,
|
||||
},
|
||||
}
|
||||
|
||||
/// Snapshot subcommands.
|
||||
#[derive(Debug, Subcommand)]
|
||||
pub enum SnapshotSubcommands {
|
||||
/// Create a snapshot for today.
|
||||
Create {
|
||||
/// If a snapshot for today already exists, remove it and remake it.
|
||||
#[clap(long)]
|
||||
force: bool,
|
||||
},
|
||||
|
||||
/// List available snapshots.
|
||||
List {},
|
||||
|
||||
/// Show a snapshot.
|
||||
Show {
|
||||
/// The date of the snapshot to show, defaults to today.
|
||||
#[clap(short, long)]
|
||||
date: Option<NaiveDate>,
|
||||
},
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
//! All logic for running the CLI.
|
||||
|
||||
use {
|
||||
clap::Parser, color_eyre::Result, sea_orm_migration::MigratorTrait,
|
||||
tracing::info,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
cli::{Cli, MainSubcommands, MigrateSubcommands, SnapshotSubcommands},
|
||||
group_data::get_all_by_snapshot,
|
||||
migrations::Migrator,
|
||||
snapshots::{self, get_by_date},
|
||||
utilities::{create_db, today},
|
||||
};
|
||||
|
||||
/// Run the CLI.
|
||||
pub async fn run() -> Result<()> {
|
||||
let cli = Cli::parse();
|
||||
let db = create_db(cli.sql_logging).await?;
|
||||
|
||||
if !cli.no_migrate {
|
||||
Migrator::up(&db, None).await?;
|
||||
}
|
||||
|
||||
match cli.command {
|
||||
MainSubcommands::Migrate {
|
||||
command: migrate_command,
|
||||
} => match migrate_command {
|
||||
MigrateSubcommands::Down { amount } => {
|
||||
Migrator::down(&db, Some(amount)).await?;
|
||||
}
|
||||
|
||||
MigrateSubcommands::Status => {
|
||||
Migrator::status(&db).await?;
|
||||
}
|
||||
|
||||
MigrateSubcommands::Up { amount } => {
|
||||
Migrator::up(&db, Some(amount)).await?;
|
||||
}
|
||||
},
|
||||
|
||||
MainSubcommands::Snapshot {
|
||||
command: snapshot_command,
|
||||
} => match snapshot_command {
|
||||
SnapshotSubcommands::Create { force } => {
|
||||
snapshots::create(&db, force).await?;
|
||||
}
|
||||
|
||||
SnapshotSubcommands::List {} => {
|
||||
for snapshot in snapshots::get_all(&db).await? {
|
||||
info!("Snapshot {snapshot:?}")
|
||||
}
|
||||
}
|
||||
|
||||
SnapshotSubcommands::Show { date } => {
|
||||
let date = date.unwrap_or_else(today);
|
||||
let snapshot = if let Some(snapshot) = get_by_date(&db, date).await? {
|
||||
info!("Snapshot {snapshot:?}");
|
||||
snapshot
|
||||
} else {
|
||||
info!("No snapshot exists for {date}");
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let groups = get_all_by_snapshot(&db, &snapshot).await?;
|
||||
for group in groups {
|
||||
info!(
|
||||
id = group.id,
|
||||
name = group.name,
|
||||
subscribers = group.subscribers,
|
||||
);
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
//! SeaORM Entity. Generated by sea-orm-codegen 0.9.3
|
||||
|
||||
use sea_orm::entity::prelude::*;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, DeriveEntityModel)]
|
||||
#[sea_orm(table_name = "group_data")]
|
||||
pub struct Model {
|
||||
#[sea_orm(primary_key)]
|
||||
pub id: i64,
|
||||
#[sea_orm(column_type = "Text", nullable)]
|
||||
pub description: Option<String>,
|
||||
#[sea_orm(column_type = "Text")]
|
||||
pub name: String,
|
||||
pub snapshot_id: i64,
|
||||
pub subscribers: i64,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
|
||||
pub enum Relation {
|
||||
#[sea_orm(
|
||||
belongs_to = "super::snapshot::Entity",
|
||||
from = "Column::SnapshotId",
|
||||
to = "super::snapshot::Column::Id",
|
||||
on_update = "NoAction",
|
||||
on_delete = "Cascade"
|
||||
)]
|
||||
Snapshot,
|
||||
}
|
||||
|
||||
impl Related<super::snapshot::Entity> for Entity {
|
||||
fn to() -> RelationDef {
|
||||
Relation::Snapshot.def()
|
||||
}
|
||||
}
|
||||
|
||||
impl ActiveModelBehavior for ActiveModel {}
|
|
@ -0,0 +1,6 @@
|
|||
//! SeaORM Entity. Generated by sea-orm-codegen 0.9.3
|
||||
|
||||
pub mod prelude;
|
||||
|
||||
pub mod group_data;
|
||||
pub mod snapshot;
|
|
@ -0,0 +1,4 @@
|
|||
//! SeaORM Entity. Generated by sea-orm-codegen 0.9.3
|
||||
|
||||
pub use super::group_data::Entity as GroupData;
|
||||
pub use super::snapshot::Entity as Snapshot;
|
|
@ -0,0 +1,25 @@
|
|||
//! SeaORM Entity. Generated by sea-orm-codegen 0.9.3
|
||||
|
||||
use sea_orm::entity::prelude::*;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, DeriveEntityModel)]
|
||||
#[sea_orm(table_name = "snapshot")]
|
||||
pub struct Model {
|
||||
#[sea_orm(primary_key)]
|
||||
pub id: i64,
|
||||
pub date: Date,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
|
||||
pub enum Relation {
|
||||
#[sea_orm(has_many = "super::group_data::Entity")]
|
||||
GroupData,
|
||||
}
|
||||
|
||||
impl Related<super::group_data::Entity> for Entity {
|
||||
fn to() -> RelationDef {
|
||||
Relation::GroupData.def()
|
||||
}
|
||||
}
|
||||
|
||||
impl ActiveModelBehavior for ActiveModel {}
|
|
@ -0,0 +1,14 @@
|
|||
//! All logic for group datas.
|
||||
|
||||
use {color_eyre::Result, sea_orm::prelude::*};
|
||||
|
||||
use crate::entities::{group_data, snapshot};
|
||||
|
||||
/// Get all group datas from a given snapshot.
|
||||
pub async fn get_all_by_snapshot(
|
||||
db: &DatabaseConnection,
|
||||
snapshot: &snapshot::Model,
|
||||
) -> Result<Vec<group_data::Model>> {
|
||||
let groups = snapshot.find_related(group_data::Entity).all(db).await?;
|
||||
Ok(groups)
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
//! # Tildes Statistics
|
||||
//!
|
||||
//! > **Statistics for Tildes.net.**
|
||||
|
||||
#![forbid(unsafe_code)]
|
||||
#![warn(missing_docs)]
|
||||
|
||||
use {
|
||||
color_eyre::{install, Result},
|
||||
dotenvy::dotenv,
|
||||
tracing_subscriber::filter::{EnvFilter, LevelFilter},
|
||||
};
|
||||
|
||||
pub mod cli;
|
||||
pub mod group_data;
|
||||
pub mod migrations;
|
||||
pub mod snapshots;
|
||||
pub mod utilities;
|
||||
|
||||
/// The entities code is auto-generated using `sea-orm-cli`. With a database
|
||||
/// and `.env` file setup, run the following command.
|
||||
///
|
||||
/// ```
|
||||
/// sea-orm-cli generate entity -o source/entities
|
||||
/// ```
|
||||
#[allow(missing_docs, clippy::derive_partial_eq_without_eq)]
|
||||
pub mod entities;
|
||||
|
||||
/// The main function.
|
||||
pub fn main() -> Result<()> {
|
||||
install()?;
|
||||
dotenv().ok();
|
||||
|
||||
let env_filter = EnvFilter::builder()
|
||||
.with_default_directive(LevelFilter::INFO.into())
|
||||
.from_env_lossy();
|
||||
tracing_subscriber::fmt().with_env_filter(env_filter).init();
|
||||
|
||||
async_std::task::block_on(async { cli::run().await })
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
//! The migration for initial setup.
|
||||
|
||||
use sea_orm_migration::prelude::*;
|
||||
|
||||
pub struct Migration;
|
||||
|
||||
impl MigrationName for Migration {
|
||||
fn name(&self) -> &str {
|
||||
"m20221004_000001_initial_setup"
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MigrationTrait for Migration {
|
||||
async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> {
|
||||
manager
|
||||
.create_table(
|
||||
Table::create()
|
||||
.table(Snapshot::Table)
|
||||
.if_not_exists()
|
||||
.col(
|
||||
ColumnDef::new(Snapshot::Id)
|
||||
.big_integer()
|
||||
.not_null()
|
||||
.auto_increment()
|
||||
.primary_key(),
|
||||
)
|
||||
.col(ColumnDef::new(Snapshot::Date).date().not_null())
|
||||
.to_owned(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
manager
|
||||
.create_table(
|
||||
Table::create()
|
||||
.table(GroupData::Table)
|
||||
.if_not_exists()
|
||||
.foreign_key(
|
||||
ForeignKey::create()
|
||||
.from(GroupData::Table, GroupData::SnapshotId)
|
||||
.to(Snapshot::Table, Snapshot::Id)
|
||||
.on_delete(ForeignKeyAction::Cascade),
|
||||
)
|
||||
.col(
|
||||
ColumnDef::new(GroupData::Id)
|
||||
.big_integer()
|
||||
.not_null()
|
||||
.auto_increment()
|
||||
.primary_key(),
|
||||
)
|
||||
.col(ColumnDef::new(GroupData::Description).text())
|
||||
.col(ColumnDef::new(GroupData::Name).text().not_null())
|
||||
.col(
|
||||
ColumnDef::new(GroupData::SnapshotId)
|
||||
.big_integer()
|
||||
.not_null(),
|
||||
)
|
||||
.col(
|
||||
ColumnDef::new(GroupData::Subscribers)
|
||||
.big_integer()
|
||||
.not_null(),
|
||||
)
|
||||
.to_owned(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> {
|
||||
manager
|
||||
.drop_table(Table::drop().table(GroupData::Table).to_owned())
|
||||
.await?;
|
||||
|
||||
manager
|
||||
.drop_table(Table::drop().table(Snapshot::Table).to_owned())
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Iden)]
|
||||
enum Snapshot {
|
||||
Table,
|
||||
Id,
|
||||
Date,
|
||||
}
|
||||
|
||||
#[derive(Iden)]
|
||||
enum GroupData {
|
||||
Table,
|
||||
Id,
|
||||
SnapshotId,
|
||||
Name,
|
||||
Description,
|
||||
Subscribers,
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
//! Database migrations.
|
||||
|
||||
use sea_orm_migration::prelude::*;
|
||||
|
||||
mod m20221004_000001_initial_setup;
|
||||
|
||||
/// [`sea_orm_migration`] struct, see
|
||||
/// [Migration (API)](https://www.sea-ql.org/sea-orm-tutorial/ch01-03-migration-api.html)
|
||||
/// for details.
|
||||
pub struct Migrator;
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MigratorTrait for Migrator {
|
||||
fn migrations() -> Vec<Box<dyn MigrationTrait>> {
|
||||
vec![Box::new(m20221004_000001_initial_setup::Migration)]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
//! Code for creating a new snapshot.
|
||||
|
||||
use {
|
||||
color_eyre::Result,
|
||||
sea_orm::{prelude::*, ActiveValue::*, TransactionTrait},
|
||||
tildes_parser::{Group, GroupList},
|
||||
tracing::{debug, info},
|
||||
};
|
||||
|
||||
use crate::{
|
||||
entities::{group_data, snapshot},
|
||||
snapshots::get_by_date,
|
||||
utilities::{create_http_client, download_html, today},
|
||||
};
|
||||
|
||||
/// Create a snapshot for today.
|
||||
pub async fn create(db: &DatabaseConnection, force: bool) -> Result<()> {
|
||||
let snapshot_date = today();
|
||||
match (force, get_by_date(db, snapshot_date).await?) {
|
||||
(true, Some(existing)) => {
|
||||
info!("Removing existing snapshot {:?}", existing);
|
||||
existing.delete(db).await?;
|
||||
}
|
||||
|
||||
(false, Some(existing)) => {
|
||||
info!("Snapshot for today already exists");
|
||||
info!("Use --force to override snapshot {:?}", existing);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
(_, None) => (),
|
||||
};
|
||||
|
||||
let transaction = db.begin().await?;
|
||||
let snapshot = snapshot::ActiveModel {
|
||||
date: Set(snapshot_date),
|
||||
..Default::default()
|
||||
}
|
||||
.insert(&transaction)
|
||||
.await?;
|
||||
|
||||
info!("Scraping data for snapshot {:?}", snapshot);
|
||||
|
||||
let http = create_http_client()?;
|
||||
let group_list = GroupList::from_html(
|
||||
&download_html(&http, "https://tildes.net/groups").await?,
|
||||
)?;
|
||||
|
||||
let mut groups_to_insert = vec![];
|
||||
|
||||
for summary in group_list.summaries {
|
||||
debug!(summary = ?summary);
|
||||
let group = Group::from_html(
|
||||
&download_html(&http, format!("https://tildes.net/{}", summary.name))
|
||||
.await?,
|
||||
)?;
|
||||
|
||||
debug!(group = ?group);
|
||||
groups_to_insert.push(group_data::ActiveModel {
|
||||
description: Set(group.description),
|
||||
name: Set(group.name),
|
||||
snapshot_id: Set(snapshot.id),
|
||||
subscribers: Set(group.subscribers.into()),
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
|
||||
info!("Inserting {} groups", groups_to_insert.len());
|
||||
group_data::Entity::insert_many(groups_to_insert)
|
||||
.exec(&transaction)
|
||||
.await?;
|
||||
|
||||
transaction.commit().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
//! All logic for snapshots.
|
||||
|
||||
use {
|
||||
color_eyre::Result,
|
||||
sea_orm::{prelude::*, QueryOrder},
|
||||
};
|
||||
|
||||
use crate::entities::snapshot;
|
||||
|
||||
mod create;
|
||||
|
||||
pub use create::create;
|
||||
|
||||
/// Get a snapshot for a given date.
|
||||
pub async fn get_by_date(
|
||||
db: &DatabaseConnection,
|
||||
date: ChronoDate,
|
||||
) -> Result<Option<snapshot::Model>> {
|
||||
let existing = snapshot::Entity::find()
|
||||
.filter(snapshot::Column::Date.eq(date))
|
||||
.order_by_desc(snapshot::Column::Date)
|
||||
.one(db)
|
||||
.await?;
|
||||
|
||||
Ok(existing)
|
||||
}
|
||||
|
||||
/// Get all snapshots.
|
||||
pub async fn get_all(db: &DatabaseConnection) -> Result<Vec<snapshot::Model>> {
|
||||
let snapshots = snapshot::Entity::find().all(db).await?;
|
||||
|
||||
Ok(snapshots)
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
//! Helper functions and miscellaneous utilities.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use {
|
||||
async_std::task::sleep,
|
||||
chrono::{NaiveDate, Utc},
|
||||
color_eyre::{
|
||||
eyre::{eyre, WrapErr},
|
||||
Result,
|
||||
},
|
||||
sea_orm::{ConnectOptions, Database, DatabaseConnection},
|
||||
surf::{Client, Config},
|
||||
tildes_parser::Html,
|
||||
};
|
||||
|
||||
/// Creates the SeaQL [`DatabaseConnection`].
|
||||
pub async fn create_db(sql_logging: bool) -> Result<DatabaseConnection> {
|
||||
let database_url = get_env_var("DATABASE_URL")?;
|
||||
|
||||
let mut connect_options = ConnectOptions::new(database_url);
|
||||
connect_options.sqlx_logging(sql_logging);
|
||||
|
||||
Database::connect(connect_options)
|
||||
.await
|
||||
.wrap_err("Failed to connect to database")
|
||||
}
|
||||
|
||||
/// Creates the HTTP [`Client`].
|
||||
pub fn create_http_client() -> Result<Client> {
|
||||
let user_agent = get_env_var("USER_AGENT")?;
|
||||
let http: Client = Config::default()
|
||||
.add_header("User-Agent", user_agent)
|
||||
.map_err(|err| eyre!(err))?
|
||||
.try_into()?;
|
||||
|
||||
Ok(http)
|
||||
}
|
||||
|
||||
/// Shorthand to download a URL and parse it to [`Html`].
|
||||
pub async fn download_html(
|
||||
http: &Client,
|
||||
url: impl AsRef<str>,
|
||||
) -> Result<Html> {
|
||||
sleep(Duration::from_millis(500)).await;
|
||||
let html = http
|
||||
.get(url)
|
||||
.recv_string()
|
||||
.await
|
||||
.map_err(|err| eyre!(err))?;
|
||||
|
||||
Ok(Html::parse_document(&html))
|
||||
}
|
||||
|
||||
/// Shorthand for [`std::env::var`] with wrapped error message.
|
||||
pub fn get_env_var(key: &str) -> Result<String> {
|
||||
std::env::var(key).wrap_err(key.to_string())
|
||||
}
|
||||
|
||||
/// Create a [`NaiveDate`] for today.
|
||||
pub fn today() -> NaiveDate {
|
||||
Utc::now().date().naive_utc()
|
||||
}
|
Loading…
Reference in New Issue