From 853656fbf59a973364afe99d42ab9690ecc79508 Mon Sep 17 00:00:00 2001 From: Gavin Macaulay Date: Wed, 21 Aug 2024 15:06:38 +1200 Subject: [PATCH] Polish API and prepare for a release (#21) * Add a new page about other modelling software * add more modelling software * add more software * Revert "add more software" This reverts commit 0b9bf6c83da4ee7e85eec15a21ae039dfd2517b4. * Resources dir for docs, better coordinate system figure * Simplify API for BenchmarkData and ReferenceModels * Update to work with API changes * API polishing Move model_type into the dataframe/dataset function parameter Make reference model API more pythonic Update example code to match these changes * Up the version number --- pyproject.toml | 2 +- src/echosms/__init__.py | 4 ++-- src/echosms/referencemodels.py | 5 ++--- src/echosms/scattermodelbase.py | 21 +++++++++------------ src/echosms/utils.py | 19 +++++++++++-------- src/example_code.py | 23 ++++++++++++++--------- 6 files changed, 39 insertions(+), 35 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c69d642..1d95d0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ packages = ["src/echosms"] [project] name = 'echosms' -version = '0.0.3' +version = '0.1.0' license = {file = "LICENSE"} keywords = ["acoustic", "backscatter", "model"] authors = [ diff --git a/src/echosms/__init__.py b/src/echosms/__init__.py index 214fbcf..c7f2e9f 100644 --- a/src/echosms/__init__.py +++ b/src/echosms/__init__.py @@ -1,5 +1,5 @@ """Setup the public API for echoSMs.""" -from .utils import k, eta, h1, df_from_dict, da_from_dict +from .utils import k, eta, h1, as_dataframe, as_dataarray from .scattermodelbase import ScatterModelBase from .benchmarkdata import BenchmarkData from .referencemodels import ReferenceModels @@ -8,4 +8,4 @@ from .dcmmodel import DCMModel __all__ = ['ScatterModelBase', 'BenchmarkData', 'ReferenceModels', 'MSSModel', 'PSMSModel', - 'DCMModel', 'k', 'eta', 'h1', 'da_from_dict', 'df_from_dict'] + 'DCMModel', 'k', 'eta', 'h1', 'as_dataframe', 'as_dataarray'] diff --git a/src/echosms/referencemodels.py b/src/echosms/referencemodels.py index 446cfb2..1b4dbc6 100644 --- a/src/echosms/referencemodels.py +++ b/src/echosms/referencemodels.py @@ -70,8 +70,8 @@ def specification(self, name): def parameters(self, name): """Model parameters for a particular model. - Model parameters are a subset of the model specification where the non-numerical - items have been removed. + Model parameters are a subset of the model specification where the metadata items have + been removed. Parameters ---------- @@ -94,6 +94,5 @@ def parameters(self, name): del p['name'] del p['shape'] del p['description'] - del p['model_type'] del p['source'] return p diff --git a/src/echosms/scattermodelbase.py b/src/echosms/scattermodelbase.py index c43d925..89ef0a0 100644 --- a/src/echosms/scattermodelbase.py +++ b/src/echosms/scattermodelbase.py @@ -2,7 +2,7 @@ import abc import numpy as np -from .utils import df_from_dict +from .utils import as_dataframe import pandas as pd import xarray as xr @@ -42,7 +42,7 @@ def __init__(self): # An indication of the maximum ka value that this model provides accurate results for self.max_ka = np.nan # [1] - def calculate_ts(self, data, model_type, multiprocess=False): + def calculate_ts(self, data, multiprocess=False): """Calculate the TS for many parameter sets. Parameters @@ -54,10 +54,6 @@ def calculate_ts(self, data, model_type, multiprocess=False): parameters in calculate_ts_single(). The TS will be calculated for all combinations of the coordinate variables. If dictionary, it will be converted to a DataFrame first. - model_type : string - The type of model boundary to apply. Valid values are given in the model_types class - variable. - multiprocess : boolean Split the ts calculation across CPU cores. @@ -68,7 +64,7 @@ def calculate_ts(self, data, model_type, multiprocess=False): """ if isinstance(data, dict): - data = df_from_dict(data) + data = as_dataframe(data) elif isinstance(data, pd.DataFrame): pass elif isinstance(data, xr.DataArray): @@ -80,19 +76,20 @@ def calculate_ts(self, data, model_type, multiprocess=False): if multiprocess: # Using mapply: - # ts = mapply(data, self.__ts_helper, args=(model_type,), axis=1) + # ts = mapply(data, self.__ts_helper, axis=1) # Using swifter - # ts = df.swifter.apply(self.__ts_helper, args=(model_type,), axis=1) - ts = data.apply(self.__ts_helper, args=(model_type,), axis=1) + # ts = df.swifter.apply(self.__ts_helper, axis=1) + ts = data.apply(self.__ts_helper, axis=1) else: # this uses just one CPU - ts = data.apply(self.__ts_helper, args=(model_type,), axis=1) + # ts = data.apply(self.__ts_helper, args=(model_type,), axis=1) + ts = data.apply(self.__ts_helper, axis=1) return ts.to_numpy() # TODO - return data type that matches the input data type def __ts_helper(self, *args): """Convert function arguments and call calculate_ts_single().""" p = args[0].to_dict() # so we can use it for keyword arguments - return self.calculate_ts_single(**p, model_type=args[1]) + return self.calculate_ts_single(**p) @abc.abstractmethod def calculate_ts_single(self): diff --git a/src/echosms/utils.py b/src/echosms/utils.py index fc6a26d..31c6b93 100644 --- a/src/echosms/utils.py +++ b/src/echosms/utils.py @@ -3,9 +3,10 @@ import pandas as pd import xarray as xr from scipy.special import spherical_jn, spherical_yn +from collections.abc import Iterable -def df_from_dict(params: dict) -> pd.DataFrame: +def as_dataframe(params: dict) -> pd.DataFrame: """Convert model parameters from dict form to a Pandas DataFrame. Parameters @@ -20,14 +21,15 @@ def df_from_dict(params: dict) -> pd.DataFrame: input dict. """ - # Use meshgrid to do the Cartesian product, then reshape into a 2D array, then create a - # Pandas DataFrame() from that - return pd.DataFrame(np.array( - np.meshgrid(*tuple(params.values()))).T.reshape(-1, len(params)), - columns=params.keys()) + # Use meshgrid to do the Cartesian product then create a Pandas DataFrame from that, having + # flattened the multidimensional arrays and using a dict to provide column names. + # This preserves the differing dtypes in each column compared to other ways of + # constructing the DataFrame). + return pd.DataFrame({k: t.flatten() + for k, t in zip(params.keys(), np.meshgrid(*tuple(params.values())))}) -def da_from_dict(params: dict) -> xr.DataArray: +def as_dataarray(params: dict) -> xr.DataArray: """Convert model parameters from dict form to a Xarray DataArray. Parameters @@ -44,8 +46,9 @@ def da_from_dict(params: dict) -> xr.DataArray: """ # Convert scalars to iterables so xarray is happier later on for k, v in params.items(): - if not hasattr(v, '__iter__'): + if not isinstance(v, Iterable) or isinstance(v, str): params[k] = [v] + # Lengths of each parameter array sz = [len(v) for k, v in params.items()] # Create the DataArray diff --git a/src/example_code.py b/src/example_code.py index 5490b27..c853cfd 100644 --- a/src/example_code.py +++ b/src/example_code.py @@ -7,7 +7,7 @@ from echosms import MSSModel, PSMSModel, DCMModel from echosms import BenchmarkData from echosms import ReferenceModels -from echosms import df_from_dict, da_from_dict +from echosms import as_dataframe, as_dataarray # Load the reference model defintiions rm = ReferenceModels() @@ -60,14 +60,14 @@ for name in names: # Get the model parameters used in Jech et al. (2015) for a particular model. s = rm.specification(name[0]) - m = rm.parameters(name[0]) # the subset of s with string items removed + m = rm.parameters(name[0]) # Add frequencies and angle to the model parameters m['f'] = bm.freq_dataset['Frequency_kHz']*1e3 # [Hz] m['theta'] = 90.0 # and run these - ts = mod.calculate_ts(m, model_type=s['model_type']) + ts = mod.calculate_ts(m) jech_index = np.mean(np.abs(ts - bmf[name[1]])) @@ -108,14 +108,14 @@ for name in names: # Get the model parameters used in Jech et al. (2015) for a particular model. s = rm.specification(name[0]) - m = rm.parameters(name[0]) # the subset of s with string items removed + m = rm.parameters(name[0]) # Add frequencies and angle to the model parameters m['f'] = 38000 # [Hz] m['theta'] = bmt['Angle_deg'] # and run these - ts = mod.calculate_ts(m, model_type=s['model_type']) + ts = mod.calculate_ts(m) jech_index = np.mean(np.abs(ts - bmt[name[1]])) @@ -146,13 +146,13 @@ m['target_rho'] = np.arange(1020, 1030, 1) # [kg/m^3] m['theta'] = [0, 90.0, 180.0] # can convert this to a dataframe -models_df = df_from_dict(m) +models_df = as_dataframe(m) # could also make a DataFrame of parameters that are not just the combination of all input # parameters. This offers a way to specify a more tailored set of model parameters. print(f'Running {len(models_df)} models') # and run -ts = mss.calculate_ts(models_df, model_type='fluid filled', multiprocess=True) +ts = mss.calculate_ts(models_df, multiprocess=True) # And can then add the ts to the params dataframe for ease of selecting and plotting the results models_df['ts'] = ts @@ -174,14 +174,19 @@ 'f': np.linspace(12, 100, num=400) * 1000, 'theta': np.arange(0, 180, 1), 'a': 0.07, + 'model_type': 'fluid filled', 'target_c': 1450, 'target_rho': 1250} # Instead of converting those to a dataframe, an xarray can be used. -params_xa = da_from_dict(params) +params_xa = as_dataarray(params) # how many models runs would that be? print(f'Running {np.prod(params_xa.shape)} models!') # and is called the same way as for the dataframe -ts = mss.calculate_ts(params_xa, model_type='fluid filled', multiprocess=True) +if False: # cause it takes a long time to run (as multiprocess is not enabled internally) + ts = mss.calculate_ts(params_xa, multiprocess=True) + +# and it can be inserted into params_xa +# TODO once the data is returned in an appropriate form