diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 5e9481d..2ab76e1 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -29,5 +29,5 @@ jobs: - name: Install Rust run: rustup toolchain install stable - uses: Swatinem/rust-cache@v2 - - name: Generate code coverage - run: cargo run --example export_private_jets + - name: Run etl + run: cargo run --features="build-binary" --bin etl_private_jets diff --git a/Cargo.toml b/Cargo.toml index b788ba5..2b98df0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,10 +38,47 @@ aws-config = { version = "1.1.4", features = ["behavior-version-latest"] } aws-sdk-s3 = "*" aws-credential-types = "*" +clap = { version = "4.4.6", features = ["derive"], optional = true } +tokio = { version="1.0", features=["rt", "macros", "rt-multi-thread"], optional = true } +tinytemplate = { version = "1.1", optional = true } +itertools = { version = "*", optional = true } +num-format = { version = "*", optional = true } +simple_logger = { version = "*", optional = true } + [dev-dependencies] -tinytemplate = "1.1" -clap = { version = "4.4.6", features = ["derive"] } tokio = {version="1.0", features=["rt", "macros", "rt-multi-thread"]} -simple_logger = "*" -num-format = "*" itertools = "*" + +[features] +build-binary = [ + "clap", + "tokio", + "tinytemplate", + "itertools", + "num-format", + "simple_logger", +] + +[[bin]] +name = "etl_legs" +required-features = ["build-binary"] + +[[bin]] +name = "etl_positions" +required-features = ["build-binary"] + +[[bin]] +name = "etl_private_jets" +required-features = ["build-binary"] + +[[bin]] +name = "period" +required-features = ["build-binary"] + +[[bin]] +name = "country" +required-features = ["build-binary"] + +[[bin]] +name = "single_day" +required-features = ["build-binary"] diff --git a/README.md b/README.md index f97a6a2..25a3983 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ This repository contains both a Rust library and a set of [`examples/`](./exampl to perform actual calculations. To use one of such examples: 1. Install Rust -2. run `cargo run --example single_day -- --tail-number "OY-GFS" --date "2023-10-20"` +2. run `cargo run --features="build-binary" --bin single_day -- --tail-number "OY-GFS" --date "2023-10-20"` 3. open `OY-GFS_2023-10-20_0.md` Step 2. has an optional arguments, `--access-key`, `--secret-access-key`, specifying @@ -55,22 +55,22 @@ that preserves data integrity. ```bash # Story about Danish private jets that flew to Davos between two dates -cargo run --example country -- --from=2024-01-13 --to=2024-01-21 --country=denmark --location=davos +cargo run --features="build-binary" --bin country -- --from=2024-01-13 --to=2024-01-21 --country=denmark --location=davos # Story about Danish private jets that flew between two dates -cargo run --example country -- --from=2024-01-13 --to=2024-01-21 --country=denmark +cargo run --features="build-binary" --bin country -- --from=2024-01-13 --to=2024-01-21 --country=denmark # Story about Portuguese private jets that flew between two dates -cargo run --example country -- --from=2024-01-13 --to=2024-01-21 --country=portugal +cargo run --features="build-binary" --bin country -- --from=2024-01-13 --to=2024-01-21 --country=portugal # Story about German private jets that flew in 2023, where secret is on a file -cargo run --example country -- --from=2023-01-01 --to=2024-01-01 --country=germany --access-key=DO00AUDGL32QLFKV8CEP --secret-access-key=$(cat secrets.txt) +cargo run --features="build-binary" --bin country -- --from=2023-01-01 --to=2024-01-01 --country=germany --access-key=DO00AUDGL32QLFKV8CEP --secret-access-key=$(cat secrets.txt) # Build database of positions `[2020, 2023]` -cargo run --release --example etl_positions -- --access-key=DO00AUDGL32QLFKV8CEP --secret-access-key=$(cat secrets.txt) +cargo run --features="build-binary" --release --bin etl_positions -- --access-key=DO00AUDGL32QLFKV8CEP --secret-access-key=$(cat secrets.txt) # they are available at # https://private-jets.fra1.digitaloceanspaces.com/position/icao_number={icao}/month={year}-{month}/data.json # Build database of legs `[2020, 2023]` (over existing positions computed by `etl_positions`) -cargo run --release --example etl_legs -- --access-key=DO00AUDGL32QLFKV8CEP --secret-access-key=$(cat secrets.txt) +cargo run --features="build-binary" --release --bin etl_legs -- --access-key=DO00AUDGL32QLFKV8CEP --secret-access-key=$(cat secrets.txt) # they are available at # https://private-jets.fra1.digitaloceanspaces.com/leg/v1/data/icao_number={icao}/month={year}-{month}/data.csv ``` diff --git a/examples/cache_state.rs b/examples/cache_state.rs deleted file mode 100644 index 0e6a412..0000000 --- a/examples/cache_state.rs +++ /dev/null @@ -1,51 +0,0 @@ -/// Prints how much of the 2023 dataset has been computed. -use std::collections::HashSet; - -use itertools::Itertools; - -use flights::Aircraft; - -async fn private_jets( - client: Option<&flights::fs_s3::ContainerClient>, -) -> Result, Box> { - // load datasets to memory - let aircrafts = flights::load_aircrafts(client).await?; - let models = flights::load_private_jet_models()?; - - Ok(aircrafts - .into_iter() - // its primary use is to be a private jet - .filter_map(|(_, a)| models.contains_key(&a.model).then_some(a)) - .collect()) -} - -#[tokio::main(flavor = "multi_thread")] -async fn main() -> Result<(), Box> { - let client = flights::fs_s3::anonymous_client().await; - - let months = (2023..2024) - .cartesian_product(1..=12u8) - .map(|(year, month)| { - time::Date::from_calendar_date(year, time::Month::try_from(month).unwrap(), 1) - .expect("day 1 never errors") - }); - let private_jets = private_jets(Some(&client)).await?; - println!("jets : {}", private_jets.len()); - let required = private_jets - .into_iter() - .map(|a| a.icao_number) - .cartesian_product(months) - .collect::>(); - println!("required : {}", required.len()); - - let completed = flights::existing_months_positions(&client).await?; - println!("completed: {}", completed.len()); - println!( - "progress : {:.2}%", - (completed.len() as f64) / (required.len() as f64) * 100.0 - ); - let todo = required.difference(&completed).collect::>(); - println!("todo : {}", todo.len()); - - Ok(()) -} diff --git a/examples/clean_cache.rs b/examples/clean_cache.rs new file mode 100644 index 0000000..9f2d384 --- /dev/null +++ b/examples/clean_cache.rs @@ -0,0 +1,57 @@ +use clap::Parser; + +use flights::{fs_s3::ContainerClient, BlobStorageProvider}; +use futures::StreamExt; +use simple_logger::SimpleLogger; + +async fn delete(client: &ContainerClient) -> Result<(), Box> { + let tasks = client.list("position/icao_number=3b9b60").await?; + + log::info!("{}", tasks.len()); + let tasks = tasks + .into_iter() + .map(|blob| async move { client.delete(&blob).await }); + + futures::stream::iter(tasks) + // limit concurrent tasks + .buffered(200) + // continue if error + .map(|r| { + if let Err(e) = r { + log::error!("{e}"); + } + }) + .collect::>() + .await; + + Ok(()) +} + +#[derive(Parser, Debug)] +#[command(author, version)] +struct Cli { + /// The token to the remote storage + #[arg(long)] + access_key: String, + /// The token to the remote storage + #[arg(long)] + secret_access_key: String, + /// Optional country to fetch from (in ISO 3166); defaults to whole world + #[arg(long)] + country: Option, +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + SimpleLogger::new() + .with_level(log::LevelFilter::Info) + .init() + .unwrap(); + + let cli = Cli::parse(); + + let client = flights::fs_s3::client(cli.access_key, cli.secret_access_key).await; + + delete(&client).await?; + Ok(()) +} diff --git a/examples/country.md b/src/bin/country.md similarity index 100% rename from examples/country.md rename to src/bin/country.md diff --git a/examples/country.rs b/src/bin/country.rs similarity index 99% rename from examples/country.rs rename to src/bin/country.rs index 4e2ea39..e05bd76 100644 --- a/examples/country.rs +++ b/src/bin/country.rs @@ -14,7 +14,7 @@ use time::Date; fn render(context: &Context) -> Result<(), Box> { let path = format!("{}_story.md", context.country.name.to_lowercase()); - let template = std::fs::read_to_string("examples/country.md")?; + let template = std::fs::read_to_string("src/bin/country.md")?; let mut tt = tinytemplate::TinyTemplate::new(); tt.set_default_formatter(&tinytemplate::format_unescaped); diff --git a/examples/etl_legs.rs b/src/bin/etl_legs.rs similarity index 99% rename from examples/etl_legs.rs rename to src/bin/etl_legs.rs index 78572f6..ea5861b 100644 --- a/examples/etl_legs.rs +++ b/src/bin/etl_legs.rs @@ -16,7 +16,6 @@ static DATABASE: &'static str = "leg/v1/data/"; #[derive(serde::Serialize, serde::Deserialize)] struct LegOut { - icao_number: String, tail_number: String, model: String, #[serde(with = "time::serde::rfc3339")] @@ -73,7 +72,6 @@ fn transform<'a>( legs.into_iter().map(|leg| { let aircraft = private_jets.get(icao_number).expect(icao_number); LegOut { - icao_number: icao_number.to_string(), tail_number: aircraft.tail_number.to_string(), model: aircraft.model.to_string(), start: leg.from().datetime(), diff --git a/examples/etl_positions.rs b/src/bin/etl_positions.rs similarity index 100% rename from examples/etl_positions.rs rename to src/bin/etl_positions.rs diff --git a/examples/export_private_jets.rs b/src/bin/etl_private_jets.rs similarity index 100% rename from examples/export_private_jets.rs rename to src/bin/etl_private_jets.rs diff --git a/examples/period.rs b/src/bin/period.rs similarity index 98% rename from examples/period.rs rename to src/bin/period.rs index 8e8dd98..4f41e4f 100644 --- a/examples/period.rs +++ b/src/bin/period.rs @@ -24,7 +24,7 @@ pub struct Context { fn render(context: &Context) -> Result<(), Box> { let path = "story.md"; - let template = std::fs::read_to_string("examples/period_template.md")?; + let template = std::fs::read_to_string("src/bin/period_template.md")?; let mut tt = tinytemplate::TinyTemplate::new(); tt.set_default_formatter(&tinytemplate::format_unescaped); diff --git a/examples/period_template.md b/src/bin/period_template.md similarity index 100% rename from examples/period_template.md rename to src/bin/period_template.md diff --git a/examples/single_day.rs b/src/bin/single_day.rs similarity index 98% rename from examples/single_day.rs rename to src/bin/single_day.rs index 0b301b1..4930637 100644 --- a/examples/single_day.rs +++ b/src/bin/single_day.rs @@ -157,7 +157,7 @@ fn process_leg( dane_years, }; - let template = std::fs::read_to_string("examples/single_day_template.md")?; + let template = std::fs::read_to_string("src/bin/single_day_template.md")?; let mut tt = TinyTemplate::new(); tt.set_default_formatter(&tinytemplate::format_unescaped); diff --git a/examples/single_day_template.md b/src/bin/single_day_template.md similarity index 100% rename from examples/single_day_template.md rename to src/bin/single_day_template.md