Skip to content

Commit

Permalink
Added high-altitude
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecardleitao committed Aug 7, 2024
1 parent a9a0020 commit c69d1be
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 98 deletions.
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,7 @@ required-features = ["build-binary"]
[[bin]]
name = "etl_aircrafts"
required-features = ["build-binary"]

[[bin]]
name = "altitude"
required-features = ["build-binary"]
101 changes: 18 additions & 83 deletions analysis.sql
Original file line number Diff line number Diff line change
@@ -1,86 +1,21 @@
SET s3_endpoint='fra1.digitaloceanspaces.com';
-- calculates the distance between two points on Earth (https://rosettacode.org/wiki/Haversine_formula)
CREATE MACRO distance(start_latitude, start_longitude, end_latitude, end_longitude) AS (
6372.8 * 2 * asin(sqrt(sin(radians(end_latitude - start_latitude) / 2)**2 + cos(radians(start_latitude)) * cos(radians(end_latitude)) * sin(radians(end_longitude - start_longitude) / 2)**2))
);
-- calculates emissions using M-emissions-private-jet
CREATE MACRO leg_tons_co2e(gph, leg_h) AS (
gph -- [gallon/h]
* 3.78541 -- liters / gallons [L/gallon]
* 0.8 -- liters to kg of jet fuel [L/kg]
* 3.16 -- emissions per kg [kg CO2 / kg jet fuel]
* 3.0 -- Radiative Forcing index [kg CO2e / kg CO2]
* 1.68 -- Radiative Forcing index [kg CO2e / kg CO2]
* leg_h -- leg time [h]
/ 1000 -- tons
);
-- used to avoid downloading the data on every run
--COPY
-- (SELECT *
-- FROM read_csv_auto("s3://private-jets/leg/v2/all/year=*/data.csv", header = true))
--TO 'results/leg/' (FORMAT 'parquet', PARTITION_BY "year");
-- COPY (
-- -- set of models for private use and corresponding gph averaged over sources
-- WITH "private_jet_model" AS (
-- SELECT model, AVG(gph) AS gph
-- FROM read_csv_auto("s3://private-jets/model/db/data.csv", header = true)
-- GROUP BY model
-- )
-- -- all legs from all icao numbers of private jets
-- , "leg" AS (
-- SELECT *, "end" - "start" AS duration
-- FROM 'results/leg/year=**/*.parquet'
-- )
-- , "aircraft" AS (
-- SELECT *
-- -- this uses a fixed time, but the correct way is to get all and compute the file closest to the day of the leg.
-- FROM read_csv_auto("s3://private-jets/aircraft/db/date=2024-06-25/data.csv", header = true)
-- )
-- , "private_jet" AS (
-- SELECT "aircraft".*, "gph"
-- FROM "aircraft"
-- JOIN "private_jet_model" ON "aircraft"."model" = "private_jet_model"."model"
-- )
-- , "private_jet_leg" AS (
-- SELECT "tail_number", "model", "country", "leg".*, "gph"
-- FROM "leg"
-- JOIN "private_jet" ON "leg"."icao_number" = "private_jet"."icao_number"
-- )
-- SELECT
-- "year",
-- country,
-- tail_number,
-- model,
-- COUNT(*) AS "flights",
-- SUM(epoch(duration)) / 3600 AS "flying_time_h",
-- SUM(distance("start_lat", "start_lon", "end_lat", "end_lon")) AS "flying_distance_km",
-- SUM("length") AS "flying_length_km",
-- SUM(leg_tons_co2e(gph, epoch(duration) / 3600)) AS "emissions_co2e_tons",
-- FROM "private_jet_leg"
-- WHERE
-- -- exceptions that seem to be incorrectly assigned in ADS-B db
-- (tail_number, year) NOT IN (('C-GSAT', 2019), ('C-GSAT', 2020), ('C-GSAT', 2021), ('C-FOGT', 2019))
-- AND tail_number NOT IN ('VQ-BIO', 'C-GPAT')
-- GROUP BY year, country, tail_number, model
-- ORDER BY "emissions_co2e_tons" DESC
-- )
-- TO 'all.csv' (FORMAT 'csv');

WITH "leg_dates" AS (
COPY (
SELECT
"icao_number",
CAST("start" AS DATE) AS "date",
FROM 'results/leg/year=**/*.parquet'
)
, "aircraft_db_dates" AS (
SELECT distinct("date") AS "date"
-- this uses a fixed time, but the correct way is to get all and compute the file closest to the day of the leg.
FROM read_csv_auto("s3://private-jets/aircraft/db/date=*/data.csv", header = true)
)
SELECT
"icao_number",
"leg_dates"."date",
--"leg_dates"."date" AS "d",
min(abs("aircraft_db_dates"."date" - "leg_dates"."date")) AS "diff",
FROM "aircraft_db_dates", "leg_dates"
GROUP BY "icao_number", "leg_dates"."date"
"aircraft_model",
"year",
COUNT(*) AS "legs",
SUM(epoch("end" - "start")) / 3600 AS "flight_time",
SUM("hours_above_30000") / (SUM(epoch("end" - "start")) / 3600) AS "ratio_above_30000",
SUM("hours_above_40000") / (SUM(epoch("end" - "start")) / 3600) AS "ratio_above_40000"
FROM read_csv_auto("s3://private-jets/leg/v2.1/all/year=2023/data.csv", header = true)
GROUP BY "aircraft_model", "year"
ORDER BY "ratio_above_30000" DESC
) TO 'altitude.csv' (FORMAT 'csv');

COPY (
SELECT model, AVG("gph") AS avg_gph, stddev_pop("gph") AS std_gph, stddev_pop("gph")/AVG("gph") AS ratio_gph
FROM read_csv_auto("s3://private-jets/model/db/data.csv", header = true)
GROUP BY model
ORDER BY "ratio_gph" DESC
) TO 'variation.csv' (FORMAT 'csv');
33 changes: 18 additions & 15 deletions src/bin/etl_legs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ struct LegOut {
end_altitude: f64,
/// The total two-dimensional length of the leg in km
length: f64,
/// The time above 30.000 feet
hours_above_30000: f64,
}

#[derive(serde::Serialize)]
Expand Down Expand Up @@ -90,6 +92,15 @@ fn transform<'a>(
end_lon: leg.to().longitude(),
end_altitude: leg.to().altitude(),
length: leg.length(),
hours_above_30000: leg
.positions()
.windows(2)
.filter_map(|w| {
(w[0].altitude() > 30000.0 && w[1].altitude() > 30000.0).then(|| {
(w[1].datetime() - w[0].datetime()).whole_seconds() as f64 / 60.0 / 60.0
})
})
.sum::<f64>(),
})
}

Expand Down Expand Up @@ -252,27 +263,16 @@ async fn main() -> Result<(), Box<dyn Error>> {
let client = flights::fs_s3::client(cli.access_key, cli.secret_access_key).await;
let client = &client;

log::info!("deleting");
futures::stream::iter(
client
.list(DATABASE)
.await?
.into_iter()
.map(|blob| async move { client.delete(&blob).await }),
)
.buffered(200)
.collect::<Vec<_>>()
.await;
log::info!("deleted");

log::info!("computing required tasks...");
let required =
flights::private_jets_in_month((2019..2025).rev(), maybe_country, client).await?;

log::info!("required : {}", required.len());

let completed = HashSet::new(); //;list(client).await?.into_iter().collect::<HashSet<_>>();
log::info!("computing completed tasks...");
let completed = list(client).await?.into_iter().collect::<HashSet<_>>();
log::info!("completed: {}", completed.len());

log::info!("computing ready tasks...");
let ready = flights::icao_to_trace::list_months_positions(client)
.await?
.into_iter()
Expand All @@ -284,6 +284,7 @@ async fn main() -> Result<(), Box<dyn Error>> {
todo.sort_unstable_by_key(|(icao, date)| (date, icao));
log::info!("todo : {}", todo.len());

log::info!("executing todos...");
let tasks = todo.into_iter().map(|icao_month| async {
let aircraft = required.get(icao_month).expect("limited to required above");
let (icao_number, month) = icao_month;
Expand All @@ -294,6 +295,8 @@ async fn main() -> Result<(), Box<dyn Error>> {
.buffered(50)
.collect::<Vec<_>>()
.await;
log::info!("todos completed");

log::info!("aggregating...");
aggregate(required, client).await
}

0 comments on commit c69d1be

Please sign in to comment.