Skip to content

Commit

Permalink
Polish API and prepare for a release (#21)
Browse files Browse the repository at this point in the history
* Add a new page about other modelling software

* add more modelling software

* add more software

* Revert "add more software"

This reverts commit 0b9bf6c.

* Resources dir for docs, better coordinate system figure

* Simplify API for BenchmarkData and ReferenceModels

* Update to work with API changes

* API polishing

Move model_type into the dataframe/dataset function parameter
Make reference model API more pythonic
Update example code to match these changes

* Up the version number
  • Loading branch information
gavinmacaulay authored Aug 21, 2024
1 parent bb5f2ab commit 853656f
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 35 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ packages = ["src/echosms"]

[project]
name = 'echosms'
version = '0.0.3'
version = '0.1.0'
license = {file = "LICENSE"}
keywords = ["acoustic", "backscatter", "model"]
authors = [
Expand Down
4 changes: 2 additions & 2 deletions src/echosms/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Setup the public API for echoSMs."""
from .utils import k, eta, h1, df_from_dict, da_from_dict
from .utils import k, eta, h1, as_dataframe, as_dataarray
from .scattermodelbase import ScatterModelBase
from .benchmarkdata import BenchmarkData
from .referencemodels import ReferenceModels
Expand All @@ -8,4 +8,4 @@
from .dcmmodel import DCMModel

__all__ = ['ScatterModelBase', 'BenchmarkData', 'ReferenceModels', 'MSSModel', 'PSMSModel',
'DCMModel', 'k', 'eta', 'h1', 'da_from_dict', 'df_from_dict']
'DCMModel', 'k', 'eta', 'h1', 'as_dataframe', 'as_dataarray']
5 changes: 2 additions & 3 deletions src/echosms/referencemodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ def specification(self, name):
def parameters(self, name):
"""Model parameters for a particular model.
Model parameters are a subset of the model specification where the non-numerical
items have been removed.
Model parameters are a subset of the model specification where the metadata items have
been removed.
Parameters
----------
Expand All @@ -94,6 +94,5 @@ def parameters(self, name):
del p['name']
del p['shape']
del p['description']
del p['model_type']
del p['source']
return p
21 changes: 9 additions & 12 deletions src/echosms/scattermodelbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import abc
import numpy as np
from .utils import df_from_dict
from .utils import as_dataframe
import pandas as pd
import xarray as xr

Expand Down Expand Up @@ -42,7 +42,7 @@ def __init__(self):
# An indication of the maximum ka value that this model provides accurate results for
self.max_ka = np.nan # [1]

def calculate_ts(self, data, model_type, multiprocess=False):
def calculate_ts(self, data, multiprocess=False):
"""Calculate the TS for many parameter sets.
Parameters
Expand All @@ -54,10 +54,6 @@ def calculate_ts(self, data, model_type, multiprocess=False):
parameters in calculate_ts_single(). The TS will be calculated for all combinations of
the coordinate variables. If dictionary, it will be converted to a DataFrame first.
model_type : string
The type of model boundary to apply. Valid values are given in the model_types class
variable.
multiprocess : boolean
Split the ts calculation across CPU cores.
Expand All @@ -68,7 +64,7 @@ def calculate_ts(self, data, model_type, multiprocess=False):
"""
if isinstance(data, dict):
data = df_from_dict(data)
data = as_dataframe(data)
elif isinstance(data, pd.DataFrame):
pass
elif isinstance(data, xr.DataArray):
Expand All @@ -80,19 +76,20 @@ def calculate_ts(self, data, model_type, multiprocess=False):

if multiprocess:
# Using mapply:
# ts = mapply(data, self.__ts_helper, args=(model_type,), axis=1)
# ts = mapply(data, self.__ts_helper, axis=1)
# Using swifter
# ts = df.swifter.apply(self.__ts_helper, args=(model_type,), axis=1)
ts = data.apply(self.__ts_helper, args=(model_type,), axis=1)
# ts = df.swifter.apply(self.__ts_helper, axis=1)
ts = data.apply(self.__ts_helper, axis=1)
else: # this uses just one CPU
ts = data.apply(self.__ts_helper, args=(model_type,), axis=1)
# ts = data.apply(self.__ts_helper, args=(model_type,), axis=1)
ts = data.apply(self.__ts_helper, axis=1)

return ts.to_numpy() # TODO - return data type that matches the input data type

def __ts_helper(self, *args):
"""Convert function arguments and call calculate_ts_single()."""
p = args[0].to_dict() # so we can use it for keyword arguments
return self.calculate_ts_single(**p, model_type=args[1])
return self.calculate_ts_single(**p)

@abc.abstractmethod
def calculate_ts_single(self):
Expand Down
19 changes: 11 additions & 8 deletions src/echosms/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
import pandas as pd
import xarray as xr
from scipy.special import spherical_jn, spherical_yn
from collections.abc import Iterable


def df_from_dict(params: dict) -> pd.DataFrame:
def as_dataframe(params: dict) -> pd.DataFrame:
"""Convert model parameters from dict form to a Pandas DataFrame.
Parameters
Expand All @@ -20,14 +21,15 @@ def df_from_dict(params: dict) -> pd.DataFrame:
input dict.
"""
# Use meshgrid to do the Cartesian product, then reshape into a 2D array, then create a
# Pandas DataFrame() from that
return pd.DataFrame(np.array(
np.meshgrid(*tuple(params.values()))).T.reshape(-1, len(params)),
columns=params.keys())
# Use meshgrid to do the Cartesian product then create a Pandas DataFrame from that, having
# flattened the multidimensional arrays and using a dict to provide column names.
# This preserves the differing dtypes in each column compared to other ways of
# constructing the DataFrame).
return pd.DataFrame({k: t.flatten()
for k, t in zip(params.keys(), np.meshgrid(*tuple(params.values())))})


def da_from_dict(params: dict) -> xr.DataArray:
def as_dataarray(params: dict) -> xr.DataArray:
"""Convert model parameters from dict form to a Xarray DataArray.
Parameters
Expand All @@ -44,8 +46,9 @@ def da_from_dict(params: dict) -> xr.DataArray:
"""
# Convert scalars to iterables so xarray is happier later on
for k, v in params.items():
if not hasattr(v, '__iter__'):
if not isinstance(v, Iterable) or isinstance(v, str):
params[k] = [v]

# Lengths of each parameter array
sz = [len(v) for k, v in params.items()]
# Create the DataArray
Expand Down
23 changes: 14 additions & 9 deletions src/example_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from echosms import MSSModel, PSMSModel, DCMModel
from echosms import BenchmarkData
from echosms import ReferenceModels
from echosms import df_from_dict, da_from_dict
from echosms import as_dataframe, as_dataarray

# Load the reference model defintiions
rm = ReferenceModels()
Expand Down Expand Up @@ -60,14 +60,14 @@
for name in names:
# Get the model parameters used in Jech et al. (2015) for a particular model.
s = rm.specification(name[0])
m = rm.parameters(name[0]) # the subset of s with string items removed
m = rm.parameters(name[0])

# Add frequencies and angle to the model parameters
m['f'] = bm.freq_dataset['Frequency_kHz']*1e3 # [Hz]
m['theta'] = 90.0

# and run these
ts = mod.calculate_ts(m, model_type=s['model_type'])
ts = mod.calculate_ts(m)

jech_index = np.mean(np.abs(ts - bmf[name[1]]))

Expand Down Expand Up @@ -108,14 +108,14 @@
for name in names:
# Get the model parameters used in Jech et al. (2015) for a particular model.
s = rm.specification(name[0])
m = rm.parameters(name[0]) # the subset of s with string items removed
m = rm.parameters(name[0])

# Add frequencies and angle to the model parameters
m['f'] = 38000 # [Hz]
m['theta'] = bmt['Angle_deg']

# and run these
ts = mod.calculate_ts(m, model_type=s['model_type'])
ts = mod.calculate_ts(m)

jech_index = np.mean(np.abs(ts - bmt[name[1]]))

Expand Down Expand Up @@ -146,13 +146,13 @@
m['target_rho'] = np.arange(1020, 1030, 1) # [kg/m^3]
m['theta'] = [0, 90.0, 180.0]
# can convert this to a dataframe
models_df = df_from_dict(m)
models_df = as_dataframe(m)
# could also make a DataFrame of parameters that are not just the combination of all input
# parameters. This offers a way to specify a more tailored set of model parameters.

print(f'Running {len(models_df)} models')
# and run
ts = mss.calculate_ts(models_df, model_type='fluid filled', multiprocess=True)
ts = mss.calculate_ts(models_df, multiprocess=True)

# And can then add the ts to the params dataframe for ease of selecting and plotting the results
models_df['ts'] = ts
Expand All @@ -174,14 +174,19 @@
'f': np.linspace(12, 100, num=400) * 1000,
'theta': np.arange(0, 180, 1),
'a': 0.07,
'model_type': 'fluid filled',
'target_c': 1450,
'target_rho': 1250}

# Instead of converting those to a dataframe, an xarray can be used.
params_xa = da_from_dict(params)
params_xa = as_dataarray(params)

# how many models runs would that be?
print(f'Running {np.prod(params_xa.shape)} models!')

# and is called the same way as for the dataframe
ts = mss.calculate_ts(params_xa, model_type='fluid filled', multiprocess=True)
if False: # cause it takes a long time to run (as multiprocess is not enabled internally)
ts = mss.calculate_ts(params_xa, multiprocess=True)

# and it can be inserted into params_xa
# TODO once the data is returned in an appropriate form

0 comments on commit 853656f

Please sign in to comment.