diff --git a/README.md b/README.md index 7b49370..190e4af 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,9 @@ and has anonymous and public read permissions. ## How to use +This repository contains both a Rust library and a set of [`examples/`](./examples) used +to perform actual calculations. To use one of such examples: + 1. Install Rust 2. run `cargo run --example single_day -- --tail-number "OY-GFS" --date "2023-10-20"` 3. open `OY-GFS_2023-10-20_0.md` @@ -47,106 +50,13 @@ As of today, the flag `--azure-sas-token` is only available when the code is exe from `main`, as writing to the blob storage must be done through a controlled code base that preserves data integrity. -## Assumptions - -* Aircrafts are uniquely identified by a tail number (aka registration number), e.g. - `OY-EUR`, by the owner of the aircraft. -* Civil aviation in Europe is mandated to have an ADS-B transponder turned on in-flight. -* Every aircraft flying has a unique transponder identifier (hereby denoted the ICAO number), - e.g. `4596B2`. -* At any given point in time, there is a one-to-one relationship between the ICAO number and a tail number (`OY-EUR -> 4596B2`) - -## Functional specification - -### FS-1 - Behaviour - -This solution is a CLI executed in a terminal on Windows, Linux or Mac OS. - -It receives two arguments, a tail number and a date, and writes a -markdown file with a description of: -* the owner of said tail number -* the legs that tail number flew on that date -* how many emissions (CO2e) were emitted -* how many emissions (CO2e) would have been emitted if a commercial flight would - have been taken instead. -* how many emissions per year (CO2e/y) a Dane emits -* The source of each of the claims. - -templated based on [`src/template.md`](./src/template.md). - -### FS-2 - Methodology - -The methodology used to support this solution is the follow: - -#### 1. Identify aircraft types whose primary use is private jet flying - -This was performed by a human, and consisted in going through different aircraft -manufacturers' websites and identifying the aircrafts that were advertised as used -for private flying. - -For example, `Dassault Falcon 2000` (`F2TH` in https://www.icao.int) is advertised as a -private jet on https://www.dassaultfalcon.com/aircraft/overview-of-the-fleet/. - -This is stored in [`./src/types.csv`](./src/types.csv). - -#### 2. Identify all aircrafts, ICAO number tail number and type - -This is performed automatically by the computer program and consists -in extracting the database of all aircrafts in https://globe.adsbexchange.com. - -Details are available in the source code, [src/aircraft_db.rs](./src/aircraft_db.rs). - -#### 3. Identify aircraft owner in denmark - -This was performed by a human, and consisted in extracting the ownership of the active -tail number from website https://www.danishaircraft.dk. - -For example `OY-CKK` results in 3 records, whose most recent, `OY-CKK(3)`, is registered -to owned by `Kirkbi Invest A/S`. - -This is stored in [`./src/owners.csv`](./src/owners.csv). - -It also consisted in extracting statements or slogans from these owners from their websites -to illustrate the incompatibility between owning a private jet and their sustainability goals. - -This is stored in [`./src/owners.json`](./src/owners.json). - -#### 4. Identify ICAO number's route in a day - -This is performed automatically by the computer program and consists in looking for -the historical route of the ICAO number in https://globe.adsbexchange.com. -This contains the sequence of `(latitude, longitude)` and other information. - -Details are available in the source code, [src/legs.rs](./src/legs.rs). - -#### 5. Identify legs of a route - -This is performed automatically by the computer program and consists in identifying -points during the flight that the aircraft is in mode "ground", and computing the leg -between two ground situations. - -Since some aircrafts only turn on the transponder while in flight, we set that below 1000 feet -the aircraft is considered on the ground. - -Details are available in the source code, [src/legs.rs](./src/legs.rs). - -#### 8. Compute emissions of leg - -This is performed automatically by the computer program and consists in using the same -metholodogy as used by myclimate.org, available [here](https://www.myclimate.org/en/information/about-myclimate/downloads/flight-emission-calculator/), to compute the emissions of a commercial -flight in first class. - -Details are available in the source code, [src/emissions.rs](./src/emissions.rs). - -#### 9. Write output +## Methodology -This is performed automatically by the computer program and consists in a template, available -in [`src/template.md`](./src/template.md), to produce a complete document. +The methodology used to extract information is available at [`methodology.md`](./methodology.md). -Details are available in the source code, [src/main.rs](./src/main.rs). +## Generated datasets -## Design +### Set of worldwide aicrafts whose primary use is to be a private jet: -* Information can only be obtained from trustworthy publicly available sources that can -be easily verified. -* Main statements must be referenced against these sources +* [Data](https://privatejets.blob.core.windows.net/data/database/private_jets/2023/11/06/data.csv) +* [Description](https://privatejets.blob.core.windows.net/data/database/private_jets/2023/11/06/description.md) diff --git a/examples/dk_jets.rs b/examples/dk_jets.rs index 29c976b..b58bf33 100644 --- a/examples/dk_jets.rs +++ b/examples/dk_jets.rs @@ -67,7 +67,7 @@ async fn legs( let tasks = dates.map(|date| async move { Result::<_, Box>::Ok( - flights::positions(&aircraft.icao_number, date, 1000.0, client) + flights::positions(&aircraft.icao_number, date, client) .await? .collect::>(), ) @@ -75,11 +75,12 @@ async fn legs( let positions = futures::stream::iter(tasks) // limit concurrent tasks - .buffered(50) + .buffered(5) .try_collect::>() .await?; - Ok(flights::real_legs(positions.into_iter().flatten())) + log::info!("Computing legs {}", aircraft.icao_number); + Ok(flights::legs(positions.into_iter().flatten())) } #[tokio::main] diff --git a/examples/export_private_jets.rs b/examples/export_private_jets.rs new file mode 100644 index 0000000..d233b4a --- /dev/null +++ b/examples/export_private_jets.rs @@ -0,0 +1,96 @@ +use std::error::Error; + +use clap::Parser; +use simple_logger::SimpleLogger; + +use flights::BlobStorageProvider; +use flights::{load_aircraft_types, load_aircrafts}; + +#[derive(clap::ValueEnum, Debug, Clone)] +enum Backend { + Disk, + Azure, +} + +const ABOUT: &'static str = r#"Exports the database of all worldwide aircrafts whose primary use is to be a private jet to "data.csv" +and its description at `description.md` (in disk). +If `azure_sas_token` is provided, data is written to the public blob storage instead. +"#; + +#[derive(Parser, Debug)] +#[command(author, version, about = ABOUT)] +struct Cli { + /// The Azure token + #[arg(short, long)] + azure_sas_token: Option, + #[arg(short, long, value_enum, default_value_t=Backend::Azure)] + backend: Backend, +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + SimpleLogger::new() + .with_level(log::LevelFilter::Info) + .init() + .unwrap(); + + let cli = Cli::parse(); + + // optionally initialize Azure client + let client = match (cli.backend, cli.azure_sas_token.clone()) { + (Backend::Disk, None) => None, + (Backend::Azure, None) => Some(flights::fs_azure::initialize_anonymous( + "privatejets", + "data", + )), + (_, Some(token)) => Some(flights::fs_azure::initialize_sas( + &token, + "privatejets", + "data", + )?), + }; + + // load datasets to memory + let aircrafts = load_aircrafts(client.as_ref()).await?; + let types = load_aircraft_types()?; + + let private_jets = aircrafts + .values() + // its primary use is to be a private jet + .filter(|a| types.contains_key(&a.model)) + .collect::>(); + + let mut wtr = csv::Writer::from_writer(vec![]); + for jet in private_jets { + wtr.serialize(jet).unwrap() + } + let data_csv = wtr.into_inner().unwrap(); + let specification_md = r#"This dataset was created according to +[this methodology](https://github.com/jorgecardleitao/private-jets/methdology.md). + +It contains 3 columns: +* `icao_number`: The transponder identifier +* `tail_number`: The tail number of the aircraft +* `model`: The icao number of the aircraft type. It is only one of the ones + identified as private jet according to the methodology. + +Both `icao_number` and `tail_number` are unique keys (independently). +"#; + + if cli.azure_sas_token.is_some() { + let client = client.unwrap(); + client + .put("database/private_jets/2023/11/06/data.csv", data_csv) + .await?; + client + .put( + "database/private_jets/2023/11/06/description.md", + specification_md.as_bytes().to_vec(), + ) + .await?; + } else { + std::fs::write("data.csv", data_csv)?; + std::fs::write("description.md", specification_md.as_bytes())?; + } + Ok(()) +} diff --git a/examples/period.rs b/examples/period.rs index a447bdc..a6cbcfa 100644 --- a/examples/period.rs +++ b/examples/period.rs @@ -112,13 +112,13 @@ async fn main() -> Result<(), Box> { increment: time::Duration::days(1), }; - let iter = iter.map(|date| flights::positions(icao, date, 1000.0, client.as_ref())); + let iter = iter.map(|date| flights::positions(icao, date, client.as_ref())); let positions = futures::future::try_join_all(iter).await?; let mut positions = positions.into_iter().flatten().collect::>(); positions.sort_unstable_by_key(|x| x.datetime()); - let legs = flights::real_legs(positions.into_iter()); + let legs = flights::legs(positions.into_iter()); log::info!("number_of_legs: {}", legs.len()); for leg in &legs { log::info!( diff --git a/examples/single_day.rs b/examples/single_day.rs index 667c4cd..240b1f9 100644 --- a/examples/single_day.rs +++ b/examples/single_day.rs @@ -35,9 +35,18 @@ enum Backend { Azure, } -/// Simple program to greet a person +const ABOUT: &'static str = r#"Writes a markdown file per leg (named `{tail-number}_{date}_{leg}.md`) on disk with a description of: +* the owner of said tail number +* the from and to +* how many emissions (CO2e) were emitted +* how many emissions (CO2e) would have been emitted if a commercial flight would + have been taken instead. +* how many emissions per year (CO2e/y) a Dane emits +* The source of each of the claims +"#; + #[derive(Parser, Debug)] -#[command(author, version, about, long_about = None)] +#[command(author, version, about = ABOUT)] struct Cli { /// The tail number #[arg(short, long)] @@ -45,9 +54,10 @@ struct Cli { /// The date in format `yyyy-mm-dd` #[arg(short, long, value_parser = parse_date)] date: time::Date, - /// The Azure token + /// Optional azure token to write any new data to the blob storage #[arg(short, long)] azure_sas_token: Option, + /// The backend to read cached data from. #[arg(short, long, value_enum, default_value_t=Backend::Azure)] backend: Backend, } @@ -88,7 +98,7 @@ async fn flight_date( let icao = &aircraft.icao_number; log::info!("ICAO number: {}", icao); - let positions = positions(icao, date, 1000.0, client).await?; + let positions = positions(icao, date, client).await?; let legs = legs(positions); log::info!("Number of legs: {}", legs.len()); diff --git a/methodology.md b/methodology.md new file mode 100644 index 0000000..ae62e6e --- /dev/null +++ b/methodology.md @@ -0,0 +1,90 @@ +# Methodology + +This document describes the general methodology used by this solution. + +## Assumptions + +* Aircrafts are uniquely identified by a tail number (aka registration number), e.g. + `OY-EUR`, by the owner of the aircraft. +* Civil aviation in most of the world is mandated to have an ADS-B transponder turned on in-flight. +* Every aircraft flying has a unique transponder identifier (hereby denoted the ICAO number), + e.g. `4596B2`. +* At any given point in time, there is a one-to-one relationship between the ICAO number and a tail number (`OY-EUR -> 4596B2`) + +## Design + +* Information can only be obtained from trustworthy publicly available sources that can +be easily verified. +* Statements must be referenced against either existing sources or this methodology. + +## Methodology + +The methodology used to support this solution is the follow: + +### M-1: Identify all aircrafts, ICAO number tail number and type + +This is performed automatically by the solution and consists +in extracting the database of all aircrafts in https://globe.adsbexchange.com. + +Details are available in the source code, [src/aircraft_db.rs](./src/aircraft_db.rs). + +### M-2: Identify aircraft types whose primary use is to be a private flying + +This was performed by a human, and consisted in going through different aircraft +manufacturers' websites and identifying the aircrafts that were advertised as used +for private flying. + +For example, `Dassault Falcon 2000` (`F2TH` in https://www.icao.int) is advertised as a +private jet on https://www.dassaultfalcon.com/aircraft/overview-of-the-fleet/. + +This is stored in [`./src/types.csv`](./src/types.csv). + +**NOTE**: not all uses of a model whose primary use is to be a private jet is +private jet. For example, private jets are often used for emergency services. + +### M-3: Identify ICAO number's route in a day + +This is performed automatically by the computer program and consists in looking for +the historical route of the ICAO number in https://globe.adsbexchange.com. +This contains the sequence of `(latitude, longitude)` and other information. + +Each position is assigned the state `Grounded` whether +the transponder returns "grounded" or the (barometric) altitude is lower than 1000 feet, +else it is assigned the state `Flying`. + +Source code is available at [src/icao_to_trace.rs](./src/icao_to_trace.rs). + +### M-4: Identify legs of a route + +This is performed automatically by the computer program and consists in identifying +legs: contiguous sequence of positions that start and end on the state grounded. + +Furthermore, only legs fullfilling the below conditions are considered: + +* Its distance is higher than 3km +* Its duration is longer than 5m + +Source code is available at [src/legs.rs](./src/legs.rs). + +### M-5: Compute emissions of leg in a commercial flight + +This is performed automatically by the computer program and consists in using the same +metholodogy as used by myclimate.org, available [here](https://www.myclimate.org/en/information/about-myclimate/downloads/flight-emission-calculator/), to compute the emissions of a commercial +flight in first class. + +Details are available in the source code, [src/emissions.rs](./src/emissions.rs). + +### M-6: Identify aircraft owner in Denmark + +This was performed by a human, and consisted in extracting the ownership of the active +tail number from website https://www.danishaircraft.dk. + +For example `OY-CKK` results in 3 records, whose most recent, `OY-CKK(3)`, is registered +to owned by `Kirkbi Invest A/S`. + +This is stored in [`./src/owners.csv`](./src/owners.csv). + +It also consisted in extracting statements or slogans from these owners from their websites +to illustrate the incompatibility between owning a private jet and their sustainability goals. + +This is stored in [`./src/owners.json`](./src/owners.json). diff --git a/src/fs_azure.rs b/src/fs_azure.rs index b289656..f3fbd99 100644 --- a/src/fs_azure.rs +++ b/src/fs_azure.rs @@ -69,8 +69,6 @@ fn get_code(e: &azure_core::Error) -> Option { Some(a.status()) } -pub struct AzureContainer<'a>(pub &'a ContainerClient); - #[async_trait::async_trait] impl BlobStorageProvider for ContainerClient { type Error = Error; diff --git a/src/icao_to_trace.rs b/src/icao_to_trace.rs index 248c682..3fd4dda 100644 --- a/src/icao_to_trace.rs +++ b/src/icao_to_trace.rs @@ -177,12 +177,11 @@ pub async fn trace_cached( Ok(std::mem::take(trace)) } -/// Returns an iterator of [`Position`] over the trace of `icao` on day `date` assuming that -/// a flight below `threshold` feet is grounded. +/// Returns an iterator of [`Position`] over the trace of `icao` on day `date` according +/// to the [methodology `M-3`](../methodology.md). pub async fn positions( icao_number: &str, date: time::Date, - threshold: f64, client: Option<&fs_azure::ContainerClient>, ) -> Result, Box> { use time::ext::NumericalDuration; @@ -211,7 +210,8 @@ pub async fn positions( entry[3] .as_f64() .and_then(|altitude| { - Some(if altitude < threshold { + // < 1000 feet => grounded, see M-3 + Some(if altitude < 1000.0 { Position::Grounded { icao: icao.clone(), datetime, diff --git a/src/legs.rs b/src/legs.rs index d175a9a..b76d1ca 100644 --- a/src/legs.rs +++ b/src/legs.rs @@ -23,7 +23,7 @@ impl Leg { } /// Returns a set of [`Leg`]s from a sequence of [`Position`]s. -pub fn legs(mut positions: impl Iterator) -> Vec { +fn all_legs(mut positions: impl Iterator) -> Vec { let Some(mut prev_position) = positions.next() else { return vec![]; }; @@ -70,18 +70,14 @@ pub fn legs(mut positions: impl Iterator) -> Vec { legs } -/// Computes legs that, under the below heuristic, is a real leg: -/// * Its maximum altitude is higher than 1000 feet -/// * Its distance is higher than 3km -/// * Its duration is longer than 5m -pub fn real_legs(positions: impl Iterator) -> Vec { - legs(positions) +/// Returns a set of [`Leg`]s from a sequence of [`Position`]s according +/// to the [methodology `M-4`](../methodology.md). +pub fn legs(positions: impl Iterator) -> Vec { + all_legs(positions) .into_iter() // ignore legs that are too fast, as they are likely noise .filter(|leg| leg.duration() > time::Duration::minutes(5)) // ignore legs that are too short, as they are likely noise .filter(|leg| leg.distance() > 3.0) - // ignore legs that are too low, as they are likely noise - .filter(|leg| leg.maximum_altitude > 1000.0) .collect() } diff --git a/src/lib.rs b/src/lib.rs index ed87b2c..731e8c4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,6 +18,7 @@ pub use aircraft_owners::*; pub use aircraft_types::*; pub use airports::*; pub use emissions::*; +pub use fs::BlobStorageProvider; pub use icao_to_trace::*; pub use legs::*; pub use model::*; diff --git a/tests/it/main.rs b/tests/it/main.rs index f24bac8..d7cf79b 100644 --- a/tests/it/main.rs +++ b/tests/it/main.rs @@ -7,7 +7,7 @@ use time::macros::date; /// https://globe.adsbexchange.com/?icao=45d2ed&lat=54.128&lon=9.185&zoom=5.0&showTrace=2023-10-13 #[tokio::test] async fn acceptance_legs() -> Result<(), Box> { - let positions = flights::positions("45d2ed", date!(2023 - 10 - 13), 1000.0, None).await?; + let positions = flights::positions("45d2ed", date!(2023 - 10 - 13), None).await?; let legs = flights::legs(positions); assert_eq!(legs.len(), 2); @@ -47,8 +47,8 @@ fn acceptance_test_emissions() { #[tokio::test] async fn legs_() -> Result<(), Box> { - let positions = flights::positions("459cd3", date!(2023 - 11 - 17), 1000.0, None).await?; - let legs = flights::real_legs(positions); + let positions = flights::positions("459cd3", date!(2023 - 11 - 17), None).await?; + let legs = flights::legs(positions); // same as ads-b computes: https://globe.adsbexchange.com/?icao=459cd3&lat=53.265&lon=8.038&zoom=6.5&showTrace=2023-11-17 assert_eq!(legs.len(), 5);