Initial code commit with basic functionality.

This commit is contained in:
Bauke 2022-10-06 14:54:25 +02:00
parent 6f142f5c39
commit c2965641be
Signed by: Bauke
GPG Key ID: C1C0F29952BCF558
15 changed files with 4139 additions and 0 deletions

3520
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

41
Cargo.toml Normal file
View File

@ -0,0 +1,41 @@
[package]
name = "tildes-statistics"
description = "Statistics for Tildes.net."
repository = "https://git.bauke.xyz/Bauke/tildes-statistics"
license = "AGPL-3.0-or-later"
version = "0.1.0"
authors = ["Bauke <me@bauke.xyz>"]
edition = "2021"
[[bin]]
name = "tildes-statistics"
path = "source/main.rs"
[dependencies]
async-std = "1.12.0"
chrono = "0.4.22"
color-eyre = "0.6.2"
dotenvy = "0.15.5"
sea-orm-migration = "0.9.3"
tracing = "0.1.36"
[dependencies.clap]
features = ["derive"]
version = "4.0.10"
[dependencies.sea-orm]
features = ["macros", "mock", "runtime-async-std-rustls", "sqlx-postgres"]
version = "0.9.3"
[dependencies.surf]
default-features = false
features = ["encoding", "h1-client-rustls", "middleware-logger"]
version = "2.3.2"
[dependencies.tildes-parser]
git = "https://git.bauke.xyz/Bauke/tildes-parser.git"
rev = "08bf7ed"
[dependencies.tracing-subscriber]
features = ["env-filter"]
version = "0.3.15"

88
source/cli/mod.rs Normal file
View File

@ -0,0 +1,88 @@
//! All CLI-related code.
use {
chrono::NaiveDate,
clap::{Parser, Subcommand},
};
mod run;
pub use run::run;
/// The Clap Derive CLI struct.
#[derive(Debug, Parser)]
#[command(author, version, about)]
#[command(propagate_version = true)]
pub struct Cli {
/// The CLI subcommand.
#[command(subcommand)]
pub command: MainSubcommands,
/// Don't run pending migrations automatically.
#[clap(long)]
pub no_migrate: bool,
/// Output SQL queries in logging.
#[clap(long, global = true)]
pub sql_logging: bool,
}
/// Main CLI subcommands.
#[derive(Debug, Subcommand)]
pub enum MainSubcommands {
/// Database migrations.
Migrate {
/// Database migrations.
#[command(subcommand)]
command: MigrateSubcommands,
},
/// Snapshot management.
Snapshot {
/// Snapshot management.
#[command(subcommand)]
command: SnapshotSubcommands,
},
}
/// Migrate subcommands.
#[derive(Debug, Subcommand)]
pub enum MigrateSubcommands {
/// Rollback applied migrations.
Down {
/// How many migrations to rollback.
#[clap(default_value = "1")]
amount: u32,
},
/// View the status of all migrations.
Status,
/// Apply pending migrations.
Up {
/// How many migrations to apply.
#[clap(default_value = "1")]
amount: u32,
},
}
/// Snapshot subcommands.
#[derive(Debug, Subcommand)]
pub enum SnapshotSubcommands {
/// Create a snapshot for today.
Create {
/// If a snapshot for today already exists, remove it and remake it.
#[clap(long)]
force: bool,
},
/// List available snapshots.
List {},
/// Show a snapshot.
Show {
/// The date of the snapshot to show, defaults to today.
#[clap(short, long)]
date: Option<NaiveDate>,
},
}

78
source/cli/run.rs Normal file
View File

@ -0,0 +1,78 @@
//! All logic for running the CLI.
use {
clap::Parser, color_eyre::Result, sea_orm_migration::MigratorTrait,
tracing::info,
};
use crate::{
cli::{Cli, MainSubcommands, MigrateSubcommands, SnapshotSubcommands},
group_data::get_all_by_snapshot,
migrations::Migrator,
snapshots::{self, get_by_date},
utilities::{create_db, today},
};
/// Run the CLI.
pub async fn run() -> Result<()> {
let cli = Cli::parse();
let db = create_db(cli.sql_logging).await?;
if !cli.no_migrate {
Migrator::up(&db, None).await?;
}
match cli.command {
MainSubcommands::Migrate {
command: migrate_command,
} => match migrate_command {
MigrateSubcommands::Down { amount } => {
Migrator::down(&db, Some(amount)).await?;
}
MigrateSubcommands::Status => {
Migrator::status(&db).await?;
}
MigrateSubcommands::Up { amount } => {
Migrator::up(&db, Some(amount)).await?;
}
},
MainSubcommands::Snapshot {
command: snapshot_command,
} => match snapshot_command {
SnapshotSubcommands::Create { force } => {
snapshots::create(&db, force).await?;
}
SnapshotSubcommands::List {} => {
for snapshot in snapshots::get_all(&db).await? {
info!("Snapshot {snapshot:?}")
}
}
SnapshotSubcommands::Show { date } => {
let date = date.unwrap_or_else(today);
let snapshot = if let Some(snapshot) = get_by_date(&db, date).await? {
info!("Snapshot {snapshot:?}");
snapshot
} else {
info!("No snapshot exists for {date}");
return Ok(());
};
let groups = get_all_by_snapshot(&db, &snapshot).await?;
for group in groups {
info!(
id = group.id,
name = group.name,
subscribers = group.subscribers,
);
}
}
},
}
Ok(())
}

View File

@ -0,0 +1,36 @@
//! SeaORM Entity. Generated by sea-orm-codegen 0.9.3
use sea_orm::entity::prelude::*;
#[derive(Clone, Debug, PartialEq, DeriveEntityModel)]
#[sea_orm(table_name = "group_data")]
pub struct Model {
#[sea_orm(primary_key)]
pub id: i64,
#[sea_orm(column_type = "Text", nullable)]
pub description: Option<String>,
#[sea_orm(column_type = "Text")]
pub name: String,
pub snapshot_id: i64,
pub subscribers: i64,
}
#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
pub enum Relation {
#[sea_orm(
belongs_to = "super::snapshot::Entity",
from = "Column::SnapshotId",
to = "super::snapshot::Column::Id",
on_update = "NoAction",
on_delete = "Cascade"
)]
Snapshot,
}
impl Related<super::snapshot::Entity> for Entity {
fn to() -> RelationDef {
Relation::Snapshot.def()
}
}
impl ActiveModelBehavior for ActiveModel {}

6
source/entities/mod.rs Normal file
View File

@ -0,0 +1,6 @@
//! SeaORM Entity. Generated by sea-orm-codegen 0.9.3
pub mod prelude;
pub mod group_data;
pub mod snapshot;

View File

@ -0,0 +1,4 @@
//! SeaORM Entity. Generated by sea-orm-codegen 0.9.3
pub use super::group_data::Entity as GroupData;
pub use super::snapshot::Entity as Snapshot;

View File

@ -0,0 +1,25 @@
//! SeaORM Entity. Generated by sea-orm-codegen 0.9.3
use sea_orm::entity::prelude::*;
#[derive(Clone, Debug, PartialEq, DeriveEntityModel)]
#[sea_orm(table_name = "snapshot")]
pub struct Model {
#[sea_orm(primary_key)]
pub id: i64,
pub date: Date,
}
#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
pub enum Relation {
#[sea_orm(has_many = "super::group_data::Entity")]
GroupData,
}
impl Related<super::group_data::Entity> for Entity {
fn to() -> RelationDef {
Relation::GroupData.def()
}
}
impl ActiveModelBehavior for ActiveModel {}

14
source/group_data/mod.rs Normal file
View File

@ -0,0 +1,14 @@
//! All logic for group datas.
use {color_eyre::Result, sea_orm::prelude::*};
use crate::entities::{group_data, snapshot};
/// Get all group datas from a given snapshot.
pub async fn get_all_by_snapshot(
db: &DatabaseConnection,
snapshot: &snapshot::Model,
) -> Result<Vec<group_data::Model>> {
let groups = snapshot.find_related(group_data::Entity).all(db).await?;
Ok(groups)
}

40
source/main.rs Normal file
View File

@ -0,0 +1,40 @@
//! # Tildes Statistics
//!
//! > **Statistics for Tildes.net.**
#![forbid(unsafe_code)]
#![warn(missing_docs)]
use {
color_eyre::{install, Result},
dotenvy::dotenv,
tracing_subscriber::filter::{EnvFilter, LevelFilter},
};
pub mod cli;
pub mod group_data;
pub mod migrations;
pub mod snapshots;
pub mod utilities;
/// The entities code is auto-generated using `sea-orm-cli`. With a database
/// and `.env` file setup, run the following command.
///
/// ```
/// sea-orm-cli generate entity -o source/entities
/// ```
#[allow(missing_docs, clippy::derive_partial_eq_without_eq)]
pub mod entities;
/// The main function.
pub fn main() -> Result<()> {
install()?;
dotenv().ok();
let env_filter = EnvFilter::builder()
.with_default_directive(LevelFilter::INFO.into())
.from_env_lossy();
tracing_subscriber::fmt().with_env_filter(env_filter).init();
async_std::task::block_on(async { cli::run().await })
}

View File

@ -0,0 +1,98 @@
//! The migration for initial setup.
use sea_orm_migration::prelude::*;
pub struct Migration;
impl MigrationName for Migration {
fn name(&self) -> &str {
"m20221004_000001_initial_setup"
}
}
#[async_trait::async_trait]
impl MigrationTrait for Migration {
async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> {
manager
.create_table(
Table::create()
.table(Snapshot::Table)
.if_not_exists()
.col(
ColumnDef::new(Snapshot::Id)
.big_integer()
.not_null()
.auto_increment()
.primary_key(),
)
.col(ColumnDef::new(Snapshot::Date).date().not_null())
.to_owned(),
)
.await?;
manager
.create_table(
Table::create()
.table(GroupData::Table)
.if_not_exists()
.foreign_key(
ForeignKey::create()
.from(GroupData::Table, GroupData::SnapshotId)
.to(Snapshot::Table, Snapshot::Id)
.on_delete(ForeignKeyAction::Cascade),
)
.col(
ColumnDef::new(GroupData::Id)
.big_integer()
.not_null()
.auto_increment()
.primary_key(),
)
.col(ColumnDef::new(GroupData::Description).text())
.col(ColumnDef::new(GroupData::Name).text().not_null())
.col(
ColumnDef::new(GroupData::SnapshotId)
.big_integer()
.not_null(),
)
.col(
ColumnDef::new(GroupData::Subscribers)
.big_integer()
.not_null(),
)
.to_owned(),
)
.await?;
Ok(())
}
async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> {
manager
.drop_table(Table::drop().table(GroupData::Table).to_owned())
.await?;
manager
.drop_table(Table::drop().table(Snapshot::Table).to_owned())
.await?;
Ok(())
}
}
#[derive(Iden)]
enum Snapshot {
Table,
Id,
Date,
}
#[derive(Iden)]
enum GroupData {
Table,
Id,
SnapshotId,
Name,
Description,
Subscribers,
}

17
source/migrations/mod.rs Normal file
View File

@ -0,0 +1,17 @@
//! Database migrations.
use sea_orm_migration::prelude::*;
mod m20221004_000001_initial_setup;
/// [`sea_orm_migration`] struct, see
/// [Migration (API)](https://www.sea-ql.org/sea-orm-tutorial/ch01-03-migration-api.html)
/// for details.
pub struct Migrator;
#[async_trait::async_trait]
impl MigratorTrait for Migrator {
fn migrations() -> Vec<Box<dyn MigrationTrait>> {
vec![Box::new(m20221004_000001_initial_setup::Migration)]
}
}

View File

@ -0,0 +1,76 @@
//! Code for creating a new snapshot.
use {
color_eyre::Result,
sea_orm::{prelude::*, ActiveValue::*, TransactionTrait},
tildes_parser::{Group, GroupList},
tracing::{debug, info},
};
use crate::{
entities::{group_data, snapshot},
snapshots::get_by_date,
utilities::{create_http_client, download_html, today},
};
/// Create a snapshot for today.
pub async fn create(db: &DatabaseConnection, force: bool) -> Result<()> {
let snapshot_date = today();
match (force, get_by_date(db, snapshot_date).await?) {
(true, Some(existing)) => {
info!("Removing existing snapshot {:?}", existing);
existing.delete(db).await?;
}
(false, Some(existing)) => {
info!("Snapshot for today already exists");
info!("Use --force to override snapshot {:?}", existing);
return Ok(());
}
(_, None) => (),
};
let transaction = db.begin().await?;
let snapshot = snapshot::ActiveModel {
date: Set(snapshot_date),
..Default::default()
}
.insert(&transaction)
.await?;
info!("Scraping data for snapshot {:?}", snapshot);
let http = create_http_client()?;
let group_list = GroupList::from_html(
&download_html(&http, "https://tildes.net/groups").await?,
)?;
let mut groups_to_insert = vec![];
for summary in group_list.summaries {
debug!(summary = ?summary);
let group = Group::from_html(
&download_html(&http, format!("https://tildes.net/{}", summary.name))
.await?,
)?;
debug!(group = ?group);
groups_to_insert.push(group_data::ActiveModel {
description: Set(group.description),
name: Set(group.name),
snapshot_id: Set(snapshot.id),
subscribers: Set(group.subscribers.into()),
..Default::default()
});
}
info!("Inserting {} groups", groups_to_insert.len());
group_data::Entity::insert_many(groups_to_insert)
.exec(&transaction)
.await?;
transaction.commit().await?;
Ok(())
}

33
source/snapshots/mod.rs Normal file
View File

@ -0,0 +1,33 @@
//! All logic for snapshots.
use {
color_eyre::Result,
sea_orm::{prelude::*, QueryOrder},
};
use crate::entities::snapshot;
mod create;
pub use create::create;
/// Get a snapshot for a given date.
pub async fn get_by_date(
db: &DatabaseConnection,
date: ChronoDate,
) -> Result<Option<snapshot::Model>> {
let existing = snapshot::Entity::find()
.filter(snapshot::Column::Date.eq(date))
.order_by_desc(snapshot::Column::Date)
.one(db)
.await?;
Ok(existing)
}
/// Get all snapshots.
pub async fn get_all(db: &DatabaseConnection) -> Result<Vec<snapshot::Model>> {
let snapshots = snapshot::Entity::find().all(db).await?;
Ok(snapshots)
}

63
source/utilities.rs Normal file
View File

@ -0,0 +1,63 @@
//! Helper functions and miscellaneous utilities.
use std::time::Duration;
use {
async_std::task::sleep,
chrono::{NaiveDate, Utc},
color_eyre::{
eyre::{eyre, WrapErr},
Result,
},
sea_orm::{ConnectOptions, Database, DatabaseConnection},
surf::{Client, Config},
tildes_parser::Html,
};
/// Creates the SeaQL [`DatabaseConnection`].
pub async fn create_db(sql_logging: bool) -> Result<DatabaseConnection> {
let database_url = get_env_var("DATABASE_URL")?;
let mut connect_options = ConnectOptions::new(database_url);
connect_options.sqlx_logging(sql_logging);
Database::connect(connect_options)
.await
.wrap_err("Failed to connect to database")
}
/// Creates the HTTP [`Client`].
pub fn create_http_client() -> Result<Client> {
let user_agent = get_env_var("USER_AGENT")?;
let http: Client = Config::default()
.add_header("User-Agent", user_agent)
.map_err(|err| eyre!(err))?
.try_into()?;
Ok(http)
}
/// Shorthand to download a URL and parse it to [`Html`].
pub async fn download_html(
http: &Client,
url: impl AsRef<str>,
) -> Result<Html> {
sleep(Duration::from_millis(500)).await;
let html = http
.get(url)
.recv_string()
.await
.map_err(|err| eyre!(err))?;
Ok(Html::parse_document(&html))
}
/// Shorthand for [`std::env::var`] with wrapped error message.
pub fn get_env_var(key: &str) -> Result<String> {
std::env::var(key).wrap_err(key.to_string())
}
/// Create a [`NaiveDate`] for today.
pub fn today() -> NaiveDate {
Utc::now().date().naive_utc()
}