Skip to content

Commit

Permalink
fix(compute_indicators): don't write intermediary dataframes
Browse files Browse the repository at this point in the history
  • Loading branch information
yannforget committed Oct 20, 2024
1 parent 2fa1316 commit f91aaa5
Showing 1 changed file with 0 additions and 5 deletions.
5 changes: 0 additions & 5 deletions compute_indicators/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,23 +63,18 @@ def compute(survey_dir: Path, cdr_dir: Path):

df = indicators.join_metadata(df, indicators_metadata=pl.read_csv(Path(cdr_dir, "indicators_metadata.csv")))
current_run.log_info(f"Joined metadata ({len(df)} values)")
df.write_parquet(Path(cdr_dir, "df1.parquet"))

df = indicators.spatial_aggregation(df)
current_run.log_info(f"Applied spatial aggregation ({len(df)} values)")
df.write_parquet(Path(cdr_dir, "df2.parquet"))

df = indicators.fill_missing_values(df)
current_run.log_info(f"Filled missing values ({len(df)} values)")
df.write_parquet(Path(cdr_dir, "df3.parquet"))

df = indicators.cumulate_indicators(df)
current_run.log_info(f"Cumulated indicators ({len(df)} values)")
df.write_parquet(Path(cdr_dir, "df4.parquet"))

df = indicators.retro_compatibility(df)
current_run.log_info(f"Modified columns for retro-compatibility ({len(df)} values)")
df.write_parquet(Path(cdr_dir, "df5.parquet"))

fp_parquet = Path(cdr_dir, "indicateurs.parquet")
fp_xlsx = Path(cdr_dir, "indicateurs.xlsx")
Expand Down

0 comments on commit f91aaa5

Please sign in to comment.