Skip to content

Commit

Permalink
Added support for remote storage (#10)
Browse files Browse the repository at this point in the history
See updates to README
  • Loading branch information
jorgecardleitao authored Nov 22, 2023
1 parent 148fbe9 commit 219618a
Show file tree
Hide file tree
Showing 11 changed files with 523 additions and 121 deletions.
18 changes: 17 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0", default_features = false }

# perform requests to the internet
reqwest = {version="*", features = ["blocking", "gzip"]}
reqwest = {version="*", features = ["gzip"]}

# create random string for cookies
rand = {version="*", default_features = false, features = ["std", "std_rng", "getrandom"]}
Expand All @@ -24,6 +24,22 @@ geoutils = {version="*", default_features = false}
# read airport names
csv = {version="*", default_features = false}

#
async-trait = "*"

# logging
log = "*"

# azure integration
azure_storage = "*"
azure_storage_blobs = "*"
azure_core = "*"
futures = "0.3"
bytes = "1.5"
async-recursion = "1.0"

[dev-dependencies]
tinytemplate = "1.1"
clap = { version = "4.4.6", features = ["derive"] }
tokio = {version="1.0", features=["rt", "macros", "rt-multi-thread"]}
simple_logger = "*"
43 changes: 41 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,52 @@
# Danish private flights
This repository contains a small application that generates a text based summary of
private jet's flight information targetted to a Danish audience.
This repository contains a CLI application that generates a text based summary of
private jet's flight information targeted to a Danish audience.

It is supported by an Azure Blob storage container for caching data, thereby
reducing its impact to [https://adsbexchange.com/](https://adsbexchange.com/).

## Risk and impact

This code performs API calls to [https://adsbexchange.com/](https://adsbexchange.com/),
a production website of a company.

**Use critical thinking** when using this code and how it impacts them.

We strongly recommend that if you plan to perform large scale analysis (e.g. in time or aircrafts),
that you reach out via an issue _before_, so that we can work together
to cache all hits to [https://adsbexchange.com/](https://adsbexchange.com/)
on an horizontally scaled remote storage and therefore remove its impact to adsbexchange.com
of future calls.

All data cached is available on Azure blob storage:
* account: `privatejets`
* container: `data`

and has anonymous and public read permissions.

## How to use

1. Install Rust
2. run `cargo run --example single_day -- --tail-number "OY-GFS" --date "2023-10-20"`
3. open `OY-GFS_2023-10-20_0.md`

Step 2. has an optional argument, `--azure-sas-token`, specifying an Azure storage container SAS
for account `privatejets`, container `data`.
When used, cache is written to the remote container, as opposed to disk.

Finally, setting `--backend disk` ignores the Azure's remote storage altogether and
only uses disk for caching (resulting in higher cache misses and thus more
interactions with ADS-B exchange).

In general:
* Use the default parameters when creating ad-hoc stories
* Use `--azure-sas-token` when improving the database with new data.
* Use `--backend disk` when testing the caching system

As of today, the flag `--azure-sas-token` is only available when the code is executed
from `main`, as writing to the blob storage must be done through a controlled code base
that preserves data integrity.

## Assumptions

* Aircrafts are uniquely identified by a tail number (aka registration number), e.g.
Expand Down
67 changes: 55 additions & 12 deletions examples/period.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use std::error::Error;

use clap::Parser;
use simple_logger::SimpleLogger;

use flights::{
emissions, load_aircraft_owners, load_aircrafts, load_owners, Aircraft, Class, Company, Fact,
};
Expand Down Expand Up @@ -29,15 +32,54 @@ fn render(context: &Context) -> Result<(), Box<dyn Error>> {

let rendered = tt.render("t", context)?;

println!("Story written to {path}");
log::info!("Story written to {path}");
std::fs::write(path, rendered)?;
Ok(())
}

fn main() -> Result<(), Box<dyn Error>> {
#[derive(clap::ValueEnum, Debug, Clone)]
enum Backend {
Disk,
Azure,
}

#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Cli {
/// The Azure token
#[arg(short, long)]
azure_sas_token: Option<String>,
#[arg(short, long, value_enum, default_value_t=Backend::Azure)]
backend: Backend,
}

#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
SimpleLogger::new()
.with_level(log::LevelFilter::Info)
.init()
.unwrap();

let cli = Cli::parse();

// optionally initialize Azure client
let client = match (cli.backend, cli.azure_sas_token) {
(Backend::Disk, None) => None,
(Backend::Azure, None) => Some(flights::fs_azure::initialize_anonymous(
"privatejets",
"data",
)),
(_, Some(token)) => Some(flights::fs_azure::initialize_sas(
&token,
"privatejets",
"data",
)?),
};

// load datasets to memory
let owners = load_owners()?;
let aircraft_owners = load_aircraft_owners()?;
let aircrafts = load_aircrafts()?;
let aircrafts = load_aircrafts(client.as_ref()).await?;

let to = time::OffsetDateTime::now_utc().date() - time::Duration::days(1);
let from = to - time::Duration::days(90);
Expand All @@ -50,30 +92,31 @@ fn main() -> Result<(), Box<dyn Error>> {
let aircraft_owner = aircraft_owners
.get(tail_number)
.ok_or_else(|| Into::<Box<dyn Error>>::into("Owner of tail number not found"))?;
println!("Aircraft owner: {}", aircraft_owner.owner);
log::info!("Aircraft owner: {}", aircraft_owner.owner);
let company = owners
.get(&aircraft_owner.owner)
.ok_or_else(|| Into::<Box<dyn Error>>::into("Owner not found"))?;
println!("Owner information found");
log::info!("Owner information found");
let owner = Fact {
claim: company.clone(),
source: aircraft_owner.source.clone(),
date: aircraft_owner.date.clone(),
};

let icao = &aircraft.icao_number;
println!("ICAO number: {}", icao);
log::info!("ICAO number: {}", icao);

let iter = flights::DateIter {
from,
to,
increment: time::Duration::days(1),
};

let mut positions = vec![];
for date in iter {
positions.extend(flights::positions(icao, &date, 1000.0)?);
}
let iter = iter.map(|date| flights::positions(icao, date, 1000.0, client.as_ref()));

let positions = futures::future::try_join_all(iter).await?;
let mut positions = positions.into_iter().flatten().collect::<Vec<_>>();
positions.sort_unstable_by_key(|x| x.datetime());

let legs = flights::legs(positions.into_iter());
let legs = legs
Expand All @@ -85,9 +128,9 @@ fn main() -> Result<(), Box<dyn Error>> {
// ignore legs that are too low, as they are likely noise
.filter(|leg| leg.maximum_altitude > 1000.0)
.collect::<Vec<_>>();
println!("number_of_legs: {}", legs.len());
log::info!("number_of_legs: {}", legs.len());
for leg in &legs {
println!(
log::info!(
"{},{},{},{},{},{},{},{},{}",
leg.from.datetime(),
leg.from.latitude(),
Expand Down
80 changes: 57 additions & 23 deletions examples/single_day.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use std::error::Error;

use clap::Parser;
use simple_logger::SimpleLogger;
use tinytemplate::TinyTemplate;

use flights::*;

use clap::Parser;

static TEMPLATE_NAME: &'static str = "t";

#[derive(serde::Serialize, serde::Deserialize, Debug)]
Expand All @@ -29,6 +29,12 @@ pub struct Context {
pub dane_years: String,
}

#[derive(clap::ValueEnum, Debug, Clone)]
enum Backend {
Disk,
Azure,
}

/// Simple program to greet a person
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
Expand All @@ -37,26 +43,39 @@ struct Cli {
#[arg(short, long)]
tail_number: String,
/// The date in format `yyyy-mm-dd`
#[arg(short, long, value_parser = parse_date)]
date: time::Date,
/// The Azure token
#[arg(short, long)]
date: String,
azure_sas_token: Option<String>,
#[arg(short, long, value_enum, default_value_t=Backend::Azure)]
backend: Backend,
}

fn parse_date(arg: &str) -> Result<time::Date, time::error::Parse> {
time::Date::parse(
arg,
time::macros::format_description!("[year]-[month]-[day]"),
)
}

pub fn flight_date(
async fn flight_date(
tail_number: &str,
date: &time::Date,
date: time::Date,
owners: &Owners,
aircraft_owners: &AircraftOwners,
aircrafts: &Aircrafts,
client: Option<&fs_azure::ContainerClient>,
) -> Result<Vec<Event>, Box<dyn Error>> {
let airports = airports_cached()?;
let airports = airports_cached().await?;
let aircraft_owner = aircraft_owners
.get(tail_number)
.ok_or_else(|| Into::<Box<dyn Error>>::into("Owner of tail number not found"))?;
println!("Aircraft owner: {}", aircraft_owner.owner);
log::info!("Aircraft owner: {}", aircraft_owner.owner);
let company = owners
.get(&aircraft_owner.owner)
.ok_or_else(|| Into::<Box<dyn Error>>::into("Owner not found"))?;
println!("Owner information found");
log::info!("Owner information found");
let owner = Fact {
claim: company.clone(),
source: aircraft_owner.source.clone(),
Expand All @@ -67,17 +86,17 @@ pub fn flight_date(
.get(tail_number)
.ok_or_else(|| Into::<Box<dyn Error>>::into("Aircraft ICAO number not found"))?;
let icao = &aircraft.icao_number;
println!("ICAO number: {}", icao);
log::info!("ICAO number: {}", icao);

let positions = positions(icao, date, 1000.0)?;
let positions = positions(icao, date, 1000.0, client).await?;
let legs = legs(positions);

println!("Number of legs: {}", legs.len());
log::info!("Number of legs: {}", legs.len());

Ok(legs.into_iter().filter_map(|leg| {
let is_leg = matches!(leg.from, Position::Grounded{..}) & matches!(leg.to, Position::Grounded{..});
if !is_leg {
println!("{:?} -> {:?} skipped", leg.from, leg.to);
log::info!("{:?} -> {:?} skipped", leg.from, leg.to);
}
is_leg.then_some((leg.from, leg.to))
}).map(|(from, to)| {
Expand Down Expand Up @@ -132,39 +151,54 @@ fn process_leg(

let rendered = tt.render(TEMPLATE_NAME, &context)?;

println!("Story written to {path}");
log::info!("Story written to {path}");
std::fs::write(path, rendered)?;

Ok(())
}

pub fn main() -> Result<(), Box<dyn Error>> {
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
SimpleLogger::new()
.with_level(log::LevelFilter::Info)
.init()
.unwrap();

let cli = Cli::parse();

std::fs::create_dir_all("database")?;
// optionally initialize Azure client
let client = match (cli.backend, cli.azure_sas_token) {
(Backend::Disk, None) => None,
(Backend::Azure, None) => Some(flights::fs_azure::initialize_anonymous(
"privatejets",
"data",
)),
(_, Some(token)) => Some(flights::fs_azure::initialize_sas(
&token,
"privatejets",
"data",
)?),
};

let owners = load_owners()?;
let aircraft_owners = load_aircraft_owners()?;
let aircrafts = load_aircrafts()?;
let aircrafts = load_aircrafts(client.as_ref()).await?;

let dane_emissions_kg = Fact {
claim: 5100,
source: "https://ourworldindata.org/co2/country/denmark Denmark emits 5.1 t CO2/person/year in 2019.".to_string(),
date: "2023-10-08".to_string(),
};

let date = time::Date::parse(
&cli.date,
time::macros::format_description!("[year]-[month]-[day]"),
)?;

let mut events = flight_date(
&cli.tail_number,
&date,
cli.date,
&owners,
&aircraft_owners,
&aircrafts,
)?;
client.as_ref(),
)
.await?;

if events.len() == 2 && events[0].from_airport == events[1].to_airport {
let mut event = events.remove(0);
Expand Down
Loading

0 comments on commit 219618a

Please sign in to comment.