Skip to content

Commit

Permalink
feat: add excel CDR pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
yannforget committed Sep 27, 2024
1 parent b9419df commit 16a9cb3
Show file tree
Hide file tree
Showing 3 changed files with 321 additions and 0 deletions.
31 changes: 31 additions & 0 deletions .github/workflows/push-generate-excel-cdr.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Push "generate excel CDR" pipeline

on:
push:
paths:
- ".github/workflows/push-generate-excel-cdr.yml"
- "generate_excel_cdr/**"

jobs:
deploy:
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v2

- uses: actions/setup-python@v2
with:
python-version: "3.11"

- name: Configure OpenHEXA CLI
uses: blsq/openhexa-cli-action@v1
with:
workspace: "praps-f5e786"
token: ${{ secrets.OH_TOKEN }}
- name: Push pipeline to OpenHEXA
run: |
openhexa pipelines push generate_excel_cdr \
-n ${{ github.sha }} \
-l "https://github.com/BLSQ/openhexa-pipelines-praps2/commit/${{ github.sha }}" \
--yes
289 changes: 289 additions & 0 deletions generate_excel_cdr/pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,289 @@
from pathlib import Path

import polars as pl
import xlsxwriter
from openhexa.sdk import current_run, parameter, pipeline, workspace


@pipeline("generate-excel-cdr", name="generate-excel-cdr")
@parameter(
"targets_fp",
name="Cibles CDR",
help="Fichier contenant les cibles CDR",
type=str,
default="data/targets/CDR_Targets.csv",
)
@parameter("cdr_dir", name="Dossier CDR", help="Répertoire où le CDR est enregistré", type=str, default="data/cdr")
@parameter(
"dst_file",
name="Fichier de sortie",
help="Fichier Excel de sortie",
type=str,
default="data/cdr/praps_cadre_de_resultat.xlsx",
)
def generate_excel_cdr(targets_fp: str, cdr_dir: str, dst_file: str):
targets_fp = Path(workspace.files_path, targets_fp)
cdr_dir = Path(workspace.files_path, cdr_dir)
dst_file = Path(workspace.files_path, dst_file)
generate(targets_fp=targets_fp, cdr_dir=cdr_dir, dst_file=dst_file)


COUNTRIES = {
"Burkina-Faso": "BF",
"Mali": "ML",
"Mauritanie": "MR",
"Niger": "NE",
"Sénégal": "SN",
"Tchad": "TD",
"Régional": "REGIONAL",
}

SECTIONS = [
{
"name": "ODP",
"label": "Indicateurs ODP par objectif/résultat",
"components": [
{"name": "Actifs soutenus et maintenus", "indicators": ["IR-1", "IR-2"]},
{"name": "Ecosystèmes soutenus et maintenus", "indicators": ["IR-3"]},
{"name": "Moyens soutenus et maintenus", "indicators": ["IR-4"]},
],
},
{
"name": "IRI",
"label": "Indicateurs de résultats intermédiaires par composante",
"components": [
{
"name": "Amélioration de la santé animale et contrôle des médicaments vétérinaires",
"indicators": ["IRI-1", "IRI-2", "IRI-3", "IRI-4", "Reg Int 1"],
},
{
"name": "Gestion durable des paysages et amélioration de la gouvernance",
"indicators": [
"IRI-5",
"IRI-6",
"IRI-7",
"IRI-FA",
"Reg Int 2",
"Reg Int 3",
],
},
{
"name": "Amélioration des chaînes de valeur du bétail",
"indicators": [
"IRI-8",
"IRI-9",
"IRI-10",
"IRI-101",
"IRI-102",
"IRI-103",
"Reg Int 4",
],
},
{
"name": "Amélioration de l'inclusion sociale et économique, femmes et jeunes",
"indicators": ["IRI-11", "IRI-111", "IRI-112", "IRI-113", "IRI-12"],
},
{
"name": "Amélioration de l'inclusion sociale et économique, femmes et jeunes (suite)",
"indicators": ["IRI-13", "IRI-131", "IRI-132", "IRI-133"],
},
{
"name": "Coordination de projet, renforcement institutionnel, et prévention et réponse aux urgences",
"indicators": ["IRI-14", "IRI-141", "IRI-15", "IRI-16", "IRI-17"],
},
{
"name": "Coordination de projet, renforcement institutionnel, et prévention et réponse aux urgences (suite)",
"indicators": [
"IRI-18",
"IRI-181",
"IRI-19",
"Reg Int 5",
"Reg Int 6",
"Reg Int 7",
],
},
],
},
]


def get_target(df: pl.DataFrame, indicator: str, country: str, year: int) -> int | float:
"""Get indicator target for a given country and year."""
try:
row = df.row(
by_predicate=(pl.col("Code") == indicator) & (pl.col("Pays") == country) & (pl.col("année") == year),
named=True,
)
except pl.exceptions.NoRowsReturnedError:
return None
return row.get("valeur")


def get_value(df: pl.DataFrame, indicator: str, country: str, year: int) -> int | float:
"""Get indicator value for a given country and year."""
# country name from 2-letters code
mapping = {v: k for k, v in COUNTRIES.items()}
country = mapping[country]

try:
row = df.row(
by_predicate=(pl.col("indicator_code") == indicator)
& (pl.col("country") == country)
& (pl.col("year") == year),
named=True,
)
cum_value = row.get("cumulative_value")
value = row.get("value")
if cum_value is not None:
return cum_value
else:
return value
except pl.exceptions.NoRowsReturnedError:
return None


def generate(targets_fp: Path, cdr_dir: Path, dst_file: Path):
targets = pl.read_csv(targets_fp)
current_run.log_info(f"Loaded {len(targets)} targets")
indicators = pl.read_parquet(cdr_dir / "indicateurs.parquet")
indicators = indicators.fill_nan(None).filter(pl.col("level") <= 2)
current_run.log_info(f"Loaded {len(indicators)} indicators")
meta = pl.read_csv(cdr_dir / "indicators_metadata.csv")

dst_file.parent.mkdir(parents=True, exist_ok=True)
workbook = xlsxwriter.Workbook(dst_file.absolute().as_posix())
sheet = workbook.add_worksheet("Cadre de résultats")

default_fmt = workbook.add_format({"text_wrap": 1, "align": "left", "valign": "vcenter"})

country_fmt = workbook.add_format({"text_wrap": 1, "valign": "vcenter", "align": "left"})

value_fmt = workbook.add_format({"font_size": 10, "align": "center", "valign": "vcenter"})

target_fmt = workbook.add_format({"font_size": 10, "align": "center", "valign": "vcenter", "font_color": "#9e9e9e"})

section_header_fmt = workbook.add_format(
{"bold": 1, "fg_color": "#fff9c4", "text_wrap": 1, "align": "left", "valign": "vcenter"}
)

section_header_year_fmt = workbook.add_format(
{"bold": 1, "fg_color": "#fff9c4", "text_wrap": 1, "align": "center", "valign": "vcenter"}
)

component_fmt = workbook.add_format(
{"bold": 1, "fg_color": "#bbdefb", "text_wrap": 1, "align": "left", "valign": "vcenter"}
)

indicator_fmt = workbook.add_format({"bold": 1, "text_wrap": 1, "align": "left", "valign": "vcenter"})

ratio_fmt = workbook.add_format({"align": "center", "valign": "vcenter", "font_size": 10, "num_format": 9})

row = 0
col = 0
for section in SECTIONS:
col = 0
sheet.write(row, col, section["name"], section_header_fmt)
sheet.write(row, col + 1, section["label"], section_header_fmt)
sheet.write(row, col + 2, "Indicateur corporate", section_header_fmt)
sheet.write(row, col + 3, "Unité de mesure", section_header_fmt)
sheet.write(row, col + 4, "Pays", section_header_fmt)
sheet.write(row, col + 5, "2021", section_header_year_fmt)
sheet.write(row, col + 6, "2022", section_header_year_fmt)
sheet.write(row, col + 7, "%", section_header_year_fmt)
sheet.write(row, col + 8, "2023", section_header_year_fmt)
sheet.write(row, col + 9, "%", section_header_year_fmt)
sheet.write(row, col + 10, "2024", section_header_year_fmt)
sheet.write(row, col + 11, "%", section_header_year_fmt)
sheet.write(row, col + 12, "2025", section_header_year_fmt)
sheet.write(row, col + 13, "%", section_header_year_fmt)
sheet.write(row, col + 14, "2026", section_header_year_fmt)
sheet.write(row, col + 15, "%", section_header_year_fmt)
sheet.write(row, col + 16, "2027", section_header_year_fmt)
sheet.write(row, col + 17, "%", section_header_year_fmt)
row += 1

for component in section["components"]:
col = 0
sheet.merge_range(row, col, row, col + 17, component["name"], component_fmt)
row += 1

for indicator in component["indicators"]:
if indicator.startswith("Reg"):
nrows = 1 * 2 # regional only (value + target)
else:
nrows = 7 * 2 # all countries + regional (value + target)

col = 0

sheet.merge_range(row, col, row + nrows - 1, col, indicator, indicator_fmt)
col += 1

try:
label = meta.row(by_predicate=pl.col("code") == indicator, named=True)["designation"]
except pl.exceptions.NoRowsReturnedError:
label = None

try:
unit = targets.filter(pl.col("Code") == indicator)["unite"][0]
except IndexError:
unit = None

sheet.merge_range(row, col, row + nrows - 1, col, label, default_fmt)
col += 1

sheet.merge_range(row, col, row + nrows - 1, col, "", default_fmt)
col += 1

sheet.merge_range(row, col, row + nrows - 1, col, unit, default_fmt)
col += 1

col_ = col
for country_name, country_code in COUNTRIES.items():
if indicator.startswith("Reg") and country_name != "Régional":
continue

sheet.merge_range(row, col, row + 1, col, country_name, country_fmt)
col += 1

row_ = row
for year in range(2021, 2028):
value = get_value(indicators, indicator, country_code, year)
target = get_target(targets, indicator, country_code, year)

if unit == "Pourcentage" and value is not None:
value *= 100

sheet.write(row, col, value, value_fmt)
row += 1
sheet.write(row, col, target, target_fmt)

if year >= 2022:
col += 1
if value is not None and target:
ratio = round(value / target, 2)
else:
ratio = None
sheet.merge_range(row - 1, col, row, col, ratio, ratio_fmt)

row = row_
col += 1

col = col_
row += 2

label = None
unit = None

row += 1

sheet.set_column(0, 0, 15)
sheet.set_column(1, 1, 50)
sheet.set_column(2, 2, 10)
sheet.set_column(3, 3, 20)
sheet.set_column(4, 4, 30)
sheet.set_column(5, 17, 10)

workbook.close()

current_run.log_info(f"Saved {dst_file.name}")
current_run.add_file_output(dst_file.absolute().as_posix())
1 change: 1 addition & 0 deletions generate_excel_cdr/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
xlsxwriter

0 comments on commit 16a9cb3

Please sign in to comment.