Skip to content

Commit

Permalink
Update of docstring formatting (#183)
Browse files Browse the repository at this point in the history
  • Loading branch information
ericspod authored May 18, 2021
1 parent 6242dcd commit 8ef52e8
Show file tree
Hide file tree
Showing 13 changed files with 502 additions and 463 deletions.
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'classic'
html_theme = 'sphinx_rtd_theme'

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
Expand Down
2 changes: 1 addition & 1 deletion docs/exetera.core.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ exetera.core.dataset module
:show-inheritance:

exetera.core.dataframe module
---------------------------
-----------------------------

.. automodule:: exetera.core.dataframe
:members:
Expand Down
8 changes: 8 additions & 0 deletions docs/exetera.processing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@ exetera.processing.numpy\_buffer module
:members:
:undoc-members:
:show-inheritance:

exetera.processing.date\_time_\helpers module
---------------------------------------------

.. automodule:: exetera.processing.date_time_helpers
:members:
:undoc-members:
:show-inheritance:

Module contents
---------------
Expand Down
5 changes: 5 additions & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@
Welcome to ExeTera's documentation!
===================================

ExeTera is a tool for creating reproducible analysis pipelines for large tabular datasets.

This documentation is for the API of the `ExeTera Package <https://github.com/KCL-BMEIS/ExeTera>`_.
The `wiki <https://github.com/KCL-BMEIS/ExeTera/wiki>`_ contains in-depth documentation on the concepts behind this software, usage examples, and developer resources such as the roadmap for future releases.

.. toctree::
:maxdepth: 2
:caption: Contents:
Expand Down
29 changes: 15 additions & 14 deletions exetera/bin/exetera_perf_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,17 +350,18 @@ def read_fields_from_hdf5(file_name, field_count):
data = field.data[:]


# generate_dataset(1 << 25, 16)

new_prep(1<<28, 4)
# new_to_hdf5(16)
# new_hs_test(4)
new_pd_test(1)
# pd_test_1(1 << 24, 64)
#hs_test_1(1 << 21, 16)
# raw_np_test_1(1 << 29, 4)
# minimal_test_1(1 << 29, 16)
# iterator_test_1(1 << 24)

# read_id_from_csv('/home/ben/covid/patients_export_geocodes_20200830040058.csv', 1)
# read_fields_from_hdf5('/home/ben/covid/ds_20200830_full.hdf5', 16)
if __name__ == "__main__":
# generate_dataset(1 << 25, 16)

new_prep(1<<28, 4)
# new_to_hdf5(16)
# new_hs_test(4)
new_pd_test(1)
# pd_test_1(1 << 24, 64)
#hs_test_1(1 << 21, 16)
# raw_np_test_1(1 << 29, 4)
# minimal_test_1(1 << 29, 16)
# iterator_test_1(1 << 24)

# read_id_from_csv('/home/ben/covid/patients_export_geocodes_20200830040058.csv', 1)
# read_fields_from_hdf5('/home/ben/covid/ds_20200830_full.hdf5', 16)
17 changes: 9 additions & 8 deletions exetera/bin/journal_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
from exetera.core.session import Session
from exetera.core.journal import journal_test_harness

schema_fn = '/home/ben/covid/covid_schema.json'
old_fn = '/home/ben/covid/ds_20200801_base.hdf5'
new_fn = '/home/ben/covid/ds_20200901_base.hdf5'
dest_fn = '/home/ben/covid/ds_journal.hdf5'

with open(schema_fn) as f:
schema = load_schema(f)
journal_test_harness(Session(), schema, old_fn, new_fn, dest_fn)
if __name__ == "__main__":
schema_fn = '/home/ben/covid/covid_schema.json'
old_fn = '/home/ben/covid/ds_20200801_base.hdf5'
new_fn = '/home/ben/covid/ds_20200901_base.hdf5'
dest_fn = '/home/ben/covid/ds_journal.hdf5'

with open(schema_fn) as f:
schema = load_schema(f)
journal_test_harness(Session(), schema, old_fn, new_fn, dest_fn)
79 changes: 40 additions & 39 deletions exetera/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@ class HDF5DataFrame(DataFrame):
For a detailed explanation of DataFrame along with examples of its use, please refer to the
wiki documentation at
https://github.com/KCL-BMEIS/ExeTera/wiki/DataFrame-API
:param name: name of the dataframe.
:param dataset: a dataset object, where this dataframe belongs to.
:param h5group: the h5group object to store the fields. If the h5group is not empty, acquire data from h5group
object directly. The h5group structure is h5group<-h5group-dataset structure, the later group has a
'fieldtype' attribute and only one dataset named 'values'. So that the structure is mapped to
Dataframe<-Field-Field.data automatically.
:param dataframe: optional - replicate data from another dictionary of (name:str, field: Field).
"""
def __init__(self,
dataset: Dataset,
Expand All @@ -44,14 +52,6 @@ def __init__(self,
"""
Create a Dataframe object, that contains a dictionary of fields. User should always create dataframe by
dataset.create_dataframe, otherwise the dataframe is not stored in the dataset.
:param name: name of the dataframe.
:param dataset: a dataset object, where this dataframe belongs to.
:param h5group: the h5group object to store the fields. If the h5group is not empty, acquire data from h5group
object directly. The h5group structure is h5group<-h5group-dataset structure, the later group has a
'fieldtype' attribute and only one dataset named 'values'. So that the structure is mapped to
Dataframe<-Field-Field.data automatically.
:param dataframe: optional - replicate data from another dictionary of (name:str, field: Field).
"""

self.name = name
Expand Down Expand Up @@ -207,7 +207,7 @@ def __contains__(self, name):
:param name: the name of the field to check
:return: A boolean value indicating whether this DataFrame contains a Field with the
name in question
name in question
"""
if not isinstance(name, str):
raise TypeError("The name must be a str object.")
Expand Down Expand Up @@ -309,21 +309,22 @@ def rename(self,
a single field to be renamed or you can provide a dictionary with a set of fields to be
renamed.
```
# rename a single field
df.rename('a', 'b')
# rename multiple fields
df.rename({'a': 'b', 'b': 'c', 'c': 'a'})
```
Example::
# rename a single field
df.rename('a', 'b')
# rename multiple fields
df.rename({'a': 'b', 'b': 'c', 'c': 'a'})
Field renaming can fail if the resulting set of renamed fields would have name clashes. If
this is the case, none of the rename operations go ahead and the dataframe remains unmodified.
:param field: Either a string or a dictionary of name pairs, each of which is the existing
field name and the destination field name
field name and the destination field name
:param field_to: Optional parameter containing a string, if `field` is a string. If 'field'
is a dictionary, parameter should not be set.
Field references remain valid after this operation and reflect their renaming.
is a dictionary, parameter should not be set.
Field references remain valid after this operation and reflect their renaming.
:return: None
"""

Expand Down Expand Up @@ -498,26 +499,26 @@ def merge(left: DataFrame,
Fields are written to the destination dataframe. If the field names clash, they will get
appended with the strings specified in 'left_suffix' and 'right_suffix' respectively.
:params left: The left dataframe
:params right: The right dataframe
:left_on: The field corresponding to the left key used to perform the join. This is either the
the name of the field, or a field object. If it is a field object, it can be from another
dataframe but it must be the same length as the fields being joined. This can also be a tuple
of such values when performing joins on compound keys
:right_on: The field corresponding to the right key used to perform the join. This is either
the name of the field, or a field object. If it is a field object, it can be from another
dataframe but it must be the same length as the fields being joined. This can also be a tuple
of such values when performing joins on compound keys
:left_fields: Optional parameter listing which fields are to be joined from the left table. If
this is not set, all fields from the left table are joined
:right_fields: Optional parameter listing which fields are to be joined from the right table.
If this is not set, all fields from the right table are joined
:left_suffix: A string to be appended to fields from the left table if they clash with fields
from the right table.
:right_suffix: A string to be appended to fields from the right table if they clash with fields
from the left table.
:how: Optional parameter specifying the merge mode. It must be one of ('left', 'right',
'inner', 'outer' or 'cross). If not set, the 'left' join is performed.
:param left: The left dataframe
:param right: The right dataframe
:param left_on: The field corresponding to the left key used to perform the join. This is either the
the name of the field, or a field object. If it is a field object, it can be from another
dataframe but it must be the same length as the fields being joined. This can also be a tuple
of such values when performing joins on compound keys
:param right_on: The field corresponding to the right key used to perform the join. This is either
the name of the field, or a field object. If it is a field object, it can be from another
dataframe but it must be the same length as the fields being joined. This can also be a tuple
of such values when performing joins on compound keys
:param left_fields: Optional parameter listing which fields are to be joined from the left table. If
this is not set, all fields from the left table are joined
:param right_fields: Optional parameter listing which fields are to be joined from the right table.
If this is not set, all fields from the right table are joined
:param left_suffix: A string to be appended to fields from the left table if they clash with fields from the
right table.
:param right_suffix: A string to be appended to fields from the right table if they clash with fields from the
left table.
:param how: Optional parameter specifying the merge mode. It must be one of ('left', 'right',
'inner', 'outer' or 'cross). If not set, the 'left' join is performed.
"""

Expand Down
16 changes: 9 additions & 7 deletions exetera/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,20 @@ class HDF5Dataset(Dataset):
For a detailed explanation of Dataset along with examples of its use, please refer to the
wiki documentation at
https://github.com/KCL-BMEIS/ExeTera/wiki/Dataset-API
:param session: The session instance to include this dataset to.
:param dataset_path: The path of HDF5 file.
:param mode: the mode in which the dataset should be opened. This is one of "r", "r+" or "w".
:param name: the name that is associated with this dataset. This can be used to retrieve the dataset when
calling :py:meth:`~session.Session.get_dataset`.
:return: A HDF5Dataset instance.
"""

def __init__(self, session, dataset_path, mode, name):
"""
Create a Dataset instance that contains dataframes. The dataframes are represented in a dict() with the
name(str) as a key. The construction should always be called by Session.open_dataset() otherwise the instance
is not included in Session.datasets. If the HDF5 datafile contains group, the content in loaded into dataframes.
:param session: The session instance to include this dataset to.
:param dataset_path: The path of HDF5 file.
:param mode: the mode in which the dataset should be opened. This is one of "r", "r+" or "w".
:param name: the name that is associated with this dataset. This can be used to retrieve the dataset when
calling :py:meth:`~session.Session.get_dataset`.
:return: A HDF5Dataset instance.
"""
self.name = name
self._session = session
Expand Down Expand Up @@ -202,6 +202,7 @@ def __setitem__(self, name: str, dataframe: DataFrame):
def __delitem__(self, name: str):
"""
Delete a dataframe by del dataset[name].
:param name: The name of dataframe to delete.
:return: Boolean if the dataframe is deleted.
"""
Expand All @@ -215,6 +216,7 @@ def __delitem__(self, name: str):
def delete_dataframe(self, dataframe: DataFrame):
"""
Remove dataframe from this dataset by the dataframe object.
:param dataframe: The dataframe instance to delete.
:return: Boolean if the dataframe is deleted.
"""
Expand Down
Loading

0 comments on commit 8ef52e8

Please sign in to comment.