diff --git a/mappings/mast/groups.json b/mappings/mast/groups.json new file mode 100644 index 0000000..5d9b338 --- /dev/null +++ b/mappings/mast/groups.json @@ -0,0 +1,26 @@ +{ + "abm": {"name": "bolometer", "imas": "bolometer"}, + "act": {"name": "charge_exchange", "imas": "charge_exchange"}, + "aga": {"name": "gas_injection", "imas": "gas_injection"}, + "ahx": {"name": "hard_x_rays", "imas": "hard_x_rays"}, + "ait": {"name": "camera_ir", "imas": "camera_ir"}, + "alp": {"name": "langmuir_probes", "imas": "langmuir_probes"}, + "anb": {"name": "nbi", "imas": "nbi"}, + "ane": {"name": "interferometer", "imas": "interferometer"}, + "amb": {"name": "magnetics_b", "imas": "magnetics"}, + "amc": {"name": "magnetics", "imas": "magnetics"}, + "asm": {"name": "magnetics_saddle", "imas": "magnetics"}, + "atm": {"name": "thomson_scattering", "imas": "thomson_scattering"}, + "ayc": {"name": "thomson_scattering", "imas": "thomson_scattering"}, + "aye": {"name": "thomson_scattering_edge", "imas": "thomson_scattering"}, + "efm": {"name": "equilibrium", "imas": "equilibrium"}, + "rba": {"name": "camera_visible_a", "imas": "camera_visible"}, + "rbb": {"name": "camera_visible_b", "imas": "camera_visible"}, + "rbc": {"name": "camera_visible_c", "imas": "camera_visible"}, + "xdc": {"name": "controllers", "imas": "controllers"}, + "xim": {"name": "spectrometer_visible", "imas": "spectrometer_visible"}, + "xma": {"name": "magnetics_a", "imas": "magnetics"}, + "xmc": {"name": "magnetics_c", "imas": "magnetics"}, + "xmo": {"name": "magnetics_mirnov","imas": "magnetics"}, + "xsx": {"name": "soft_x_rays", "imas": "soft_x_rays"} +} \ No newline at end of file diff --git a/mappings/mastu/groups.json b/mappings/mastu/groups.json new file mode 100644 index 0000000..b87efd3 --- /dev/null +++ b/mappings/mastu/groups.json @@ -0,0 +1,26 @@ +{ + "abm": {"name": "bolometer", "imas": "bolometer"}, + "act": {"name": "charge_exchange", "imas": "charge_exchange"}, + "aga": {"name": "gas_injection", "imas": "gas_injection"}, + "ahx": {"name": "hard_x_rays", "imas": "hard_x_rays"}, + "ait": {"name": "camera_ir", "imas": "camera_ir"}, + "alp": {"name": "langmuir_probes", "imas": "langmuir_probes"}, + "anb": {"name": "nbi", "imas": "nbi"}, + "ane": {"name": "interferometer", "imas": "interferometer"}, + "amb": {"name": "magnetics_b", "imas": "magnetics"}, + "amc": {"name": "magnetics", "imas": "magnetics"}, + "asm": {"name": "magnetics_saddle", "imas": "magnetics"}, + "ayc": {"name": "thomson_scattering", "imas": "thomson_scattering"}, + "ayd": {"name": "thomson_scattering_divertor", "imas": "thomson_scattering"}, + "epm": {"name": "equilibrium", "imas": "equilibrium"}, + "epq": {"name": "equilibrium_kinematic", "imas": "equilibrium"}, + "rba": {"name": "camera_visible_a", "imas": "camera_visible"}, + "rbb": {"name": "camera_visible_b", "imas": "camera_visible"}, + "rbc": {"name": "camera_visible_c", "imas": "camera_visible"}, + "xdc": {"name": "controllers", "imas": "controllers"}, + "xim": {"name": "spectrometer_visible", "imas": "spectrometer_visible"}, + "xma": {"name": "magnetics_a", "imas": "magnetics"}, + "xmc": {"name": "magnetics_c", "imas": "magnetics"}, + "xmo": {"name": "magnetics_mirnov","imas": "magnetics"}, + "xsx": {"name": "soft_x_rays", "imas": "soft_x_rays"} +} \ No newline at end of file diff --git a/src/builder.py b/src/builder.py index 0fd9f25..f287eca 100644 --- a/src/builder.py +++ b/src/builder.py @@ -5,7 +5,7 @@ from src.load import BaseLoader, MissingProfileError from src.log import logger from src.pipelines import Pipelines -from src.utils import harmonise_name +from src.utils import harmonise_name, read_json_file from src.writer import DatasetWriter @@ -23,18 +23,31 @@ def __init__( self.loader = loader self.include_datasets = include_datasets self.exclude_datasets = exclude_datasets + self.group_name_mapping = read_json_file(self.pipelines.group_mapping_file) def create(self, shot: int): dataset_infos = self.list_datasets(shot) + for dataset_info in dataset_infos: group_name = dataset_info.name logger.info(f"Loading dataset {group_name} for shot #{shot}") - dataset = self.load_datasets(shot, group_name) + datasets = self.load_datasets(shot, group_name) logger.info(f"Processing {group_name} for shot #{shot}") pipeline = self.pipelines.get(group_name) - dataset = pipeline(dataset) + + dataset = pipeline(datasets) + + # rename groups + if group_name in self.group_name_mapping: + mapping = self.group_name_mapping[group_name] + imas_name = mapping["imas"] + + for dataset in datasets.values(): + dataset.attrs["imas"] = imas_name + + group_name = mapping["name"] logger.info(f"Writing {group_name} for shot #{shot}") file_name = f"{shot}.{self.writer.file_extension}" diff --git a/src/pipelines.py b/src/pipelines.py index 98e4b60..04bcde5 100644 --- a/src/pipelines.py +++ b/src/pipelines.py @@ -32,8 +32,13 @@ def __call__(self, x: Any) -> Any: class Pipelines: - def __init__(self) -> None: - pass + @property + def group_mapping_file(self): + raise NotImplementedError() + + @property + def dimension_mapping_file(self): + raise NotImplementedError() def get(self, name: str) -> Pipeline: if name not in self.pipelines: @@ -42,20 +47,26 @@ def get(self, name: str) -> Pipeline: class MASTUPipelines(Pipelines): - def __init__(self) -> None: - dim_mapping_file = "mappings/mastu/dimensions.json" + @property + def group_mapping_file(self): + return "mappings/mastu/groups.json" + + @property + def dimension_mapping_file(self): + return "mappings/mastu/dimensions.json" + def __init__(self) -> None: self.pipelines = { "amb": Pipeline( [ - MapDict(RenameDimensions(dim_mapping_file)), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "amc": Pipeline( [ - MapDict(RenameDimensions(dim_mapping_file)), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), RenameVariables( @@ -67,56 +78,56 @@ def __init__(self) -> None: ), "anb": Pipeline( [ - MapDict(RenameDimensions(dim_mapping_file)), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "act": Pipeline( [ - MapDict(RenameDimensions(dim_mapping_file)), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "acu": Pipeline( [ - MapDict(RenameDimensions(dim_mapping_file)), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "ayc": Pipeline( [ - MapDict(RenameDimensions(dim_mapping_file)), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "ayd": Pipeline( [ - MapDict(RenameDimensions(dim_mapping_file)), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "epm": Pipeline( [ - MapDict(RenameDimensions(dim_mapping_file)), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "esm": Pipeline( [ - MapDict(RenameDimensions(dim_mapping_file)), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "xsx": Pipeline( [ - MapDict(RenameDimensions(dim_mapping_file)), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), TensoriseChannels("hcam_l", regex=r"hcam_l_ch(\d+)"), @@ -126,7 +137,7 @@ def __init__(self) -> None: ), "xdc": Pipeline( [ - MapDict(RenameDimensions(dim_mapping_file)), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] @@ -135,11 +146,19 @@ def __init__(self) -> None: class MASTPipelines(Pipelines): + @property + def group_mapping_file(self): + return "mappings/mast/groups.json" + + @property + def dimension_mapping_file(self): + return "mappings/mast/dimensions.json" + def __init__(self) -> None: self.pipelines = { "abm": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MapDict(DropZeroDimensions()), MergeDatasets(), TransformUnits(), @@ -147,86 +166,86 @@ def __init__(self) -> None: ), "acc": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "act": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "ada": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "aga": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "adg": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "ahx": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "aim": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "air": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "ait": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "alp": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MapDict(DropZeroDimensions()), - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "ama": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "amb": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), TensoriseChannels("ccbv"), @@ -272,7 +291,7 @@ def __init__(self) -> None: ), "amc": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), DropZeroDataset(), TransformUnits(), @@ -394,14 +413,14 @@ def __init__(self) -> None: ), "amh": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "amm": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), AddGeometry("botcol", "geometry/data/amm/amm_botcol.parquet"), @@ -459,64 +478,64 @@ def __init__(self) -> None: ), "ams": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "anb": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "ane": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "ant": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "anu": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "aoe": Pipeline( [ - MapDict(RenameDimensions()), - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "arp": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "asb": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "asm": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TensoriseChannels("sad_m"), TransformUnits(), @@ -524,7 +543,7 @@ def __init__(self) -> None: ), "asx": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MapDict(ASXTransform()), MergeDatasets(), TransformUnits(), @@ -532,7 +551,7 @@ def __init__(self) -> None: ), "atm": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), RenameVariables( @@ -544,7 +563,7 @@ def __init__(self) -> None: ), "ayc": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), DropCoordinates("segment_number", ["time_segment"]), DropDatasets(["time"]), MergeDatasets(), @@ -558,7 +577,7 @@ def __init__(self) -> None: ), "aye": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] @@ -579,7 +598,7 @@ def __init__(self) -> None: ), MapDict(ReplaceInvalidValues()), MapDict(DropZeroDimensions()), - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), LCFSTransform(), TransformUnits(), @@ -597,14 +616,14 @@ def __init__(self) -> None: "esm": Pipeline( [ MapDict(DropZeroDimensions()), - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "esx": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] @@ -622,35 +641,35 @@ def __init__(self) -> None: "rit": Pipeline([ProcessImage()]), "xdc": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "xim": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "xmo": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "xpc": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "xsx": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), RenameVariables( { @@ -743,7 +762,7 @@ def __init__(self) -> None: ), "xma": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), TensoriseChannels("ccbv", regex=r"ccbv_(\d+)"), @@ -789,7 +808,7 @@ def __init__(self) -> None: ), "xmb": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), TensoriseChannels("sad_out_l"), @@ -805,7 +824,7 @@ def __init__(self) -> None: ), "xmc": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), TensoriseChannels("cc_mt", regex=r"cc_mt_(\d+)"), @@ -821,14 +840,14 @@ def __init__(self) -> None: ), "xmp": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] ), "xms": Pipeline( [ - MapDict(RenameDimensions()), + MapDict(RenameDimensions(self.dimension_mapping_file)), MergeDatasets(), TransformUnits(), ] diff --git a/src/utils.py b/src/utils.py index 002cdac..212de9b 100644 --- a/src/utils.py +++ b/src/utils.py @@ -1,5 +1,7 @@ +import json import sys import uuid +from pathlib import Path from src.log import logger @@ -48,3 +50,8 @@ def read_shot_file(shot_file: str) -> list[int]: shot_nums = map(lambda x: x.strip(), shot_nums) shot_nums = list(sorted(map(int, shot_nums))) return shot_nums + + +def read_json_file(file_name: str): + with Path(file_name).open("r") as handle: + return json.load(handle)