Update of docstring formatting (#183)

KCL-BMEIS · May 18, 2021 · 8ef52e8 · 8ef52e8
1 parent 6242dcd
commit 8ef52e8
Show file tree

Hide file tree

Showing 13 changed files with 502 additions and 463 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -45,7 +45,7 @@
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'classic'
+html_theme = 'sphinx_rtd_theme'
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,

diff --git a/docs/exetera.core.rst b/docs/exetera.core.rst
@@ -29,7 +29,7 @@ exetera.core.dataset module
    :show-inheritance:
 
 exetera.core.dataframe module
----------------------------
+-----------------------------
 
 .. automodule:: exetera.core.dataframe
    :members:

diff --git a/docs/exetera.processing.rst b/docs/exetera.processing.rst
@@ -11,6 +11,14 @@ exetera.processing.numpy\_buffer module
    :members:
    :undoc-members:
    :show-inheritance:
+
+exetera.processing.date\_time_\helpers module
+---------------------------------------------
+
+.. automodule:: exetera.processing.date_time_helpers
+   :members:
+   :undoc-members:
+   :show-inheritance:
 
 Module contents
 ---------------

diff --git a/docs/index.rst b/docs/index.rst
@@ -6,6 +6,11 @@
 Welcome to ExeTera's documentation!
 ===================================
 
+ExeTera is a tool for creating reproducible analysis pipelines for large tabular datasets.
+
+This documentation is for the API of the `ExeTera Package <https://github.com/KCL-BMEIS/ExeTera>`_. 
+The `wiki <https://github.com/KCL-BMEIS/ExeTera/wiki>`_ contains in-depth documentation on the concepts behind this software, usage examples, and developer resources such as the roadmap for future releases.
+
 .. toctree::
    :maxdepth: 2
    :caption: Contents:

diff --git a/exetera/bin/exetera_perf_tests.py b/exetera/bin/exetera_perf_tests.py
@@ -350,17 +350,18 @@ def read_fields_from_hdf5(file_name, field_count):
                     data = field.data[:]
 
 
-# generate_dataset(1 << 25, 16)
-
-new_prep(1<<28, 4)
-# new_to_hdf5(16)
-# new_hs_test(4)
-new_pd_test(1)
-# pd_test_1(1 << 24, 64)
-#hs_test_1(1 << 21, 16)
-# raw_np_test_1(1 << 29, 4)
-# minimal_test_1(1 << 29, 16)
-# iterator_test_1(1 << 24)
-
-# read_id_from_csv('/home/ben/covid/patients_export_geocodes_20200830040058.csv', 1)
-# read_fields_from_hdf5('/home/ben/covid/ds_20200830_full.hdf5', 16)
+if __name__ == "__main__":
+    # generate_dataset(1 << 25, 16)
+
+    new_prep(1<<28, 4)
+    # new_to_hdf5(16)
+    # new_hs_test(4)
+    new_pd_test(1)
+    # pd_test_1(1 << 24, 64)
+    #hs_test_1(1 << 21, 16)
+    # raw_np_test_1(1 << 29, 4)
+    # minimal_test_1(1 << 29, 16)
+    # iterator_test_1(1 << 24)
+
+    # read_id_from_csv('/home/ben/covid/patients_export_geocodes_20200830040058.csv', 1)
+    # read_fields_from_hdf5('/home/ben/covid/ds_20200830_full.hdf5', 16)
diff --git a/exetera/bin/journal_test.py b/exetera/bin/journal_test.py
@@ -3,11 +3,12 @@
 from exetera.core.session import Session
 from exetera.core.journal import journal_test_harness
 
-schema_fn = '/home/ben/covid/covid_schema.json'
-old_fn = '/home/ben/covid/ds_20200801_base.hdf5'
-new_fn = '/home/ben/covid/ds_20200901_base.hdf5'
-dest_fn = '/home/ben/covid/ds_journal.hdf5'
-
-with open(schema_fn) as f:
-    schema = load_schema(f)
-journal_test_harness(Session(), schema, old_fn, new_fn, dest_fn)
+if __name__ == "__main__":
+    schema_fn = '/home/ben/covid/covid_schema.json'
+    old_fn = '/home/ben/covid/ds_20200801_base.hdf5'
+    new_fn = '/home/ben/covid/ds_20200901_base.hdf5'
+    dest_fn = '/home/ben/covid/ds_journal.hdf5'
+
+    with open(schema_fn) as f:
+        schema = load_schema(f)
+    journal_test_harness(Session(), schema, old_fn, new_fn, dest_fn)
diff --git a/exetera/core/dataframe.py b/exetera/core/dataframe.py
@@ -36,6 +36,14 @@ class HDF5DataFrame(DataFrame):
     For a detailed explanation of DataFrame along with examples of its use, please refer to the
     wiki documentation at
     https://github.com/KCL-BMEIS/ExeTera/wiki/DataFrame-API
+    
+    :param name: name of the dataframe.
+    :param dataset: a dataset object, where this dataframe belongs to.
+    :param h5group: the h5group object to store the fields. If the h5group is not empty, acquire data from h5group
+        object directly. The h5group structure is h5group<-h5group-dataset structure, the later group has a
+        'fieldtype' attribute and only one dataset named 'values'. So that the structure is mapped to
+        Dataframe<-Field-Field.data automatically.
+    :param dataframe: optional - replicate data from another dictionary of (name:str, field: Field).
     """
     def __init__(self,
                  dataset: Dataset,
@@ -44,14 +52,6 @@ def __init__(self,
         """
         Create a Dataframe object, that contains a dictionary of fields. User should always create dataframe by
         dataset.create_dataframe, otherwise the dataframe is not stored in the dataset.
-
-        :param name: name of the dataframe.
-        :param dataset: a dataset object, where this dataframe belongs to.
-        :param h5group: the h5group object to store the fields. If the h5group is not empty, acquire data from h5group
-        object directly. The h5group structure is h5group<-h5group-dataset structure, the later group has a
-        'fieldtype' attribute and only one dataset named 'values'. So that the structure is mapped to
-        Dataframe<-Field-Field.data automatically.
-        :param dataframe: optional - replicate data from another dictionary of (name:str, field: Field).
         """
 
         self.name = name
@@ -207,7 +207,7 @@ def __contains__(self, name):
 
         :param name: the name of the field to check
         :return: A boolean value indicating whether this DataFrame contains a Field with the
-        name in question
+            name in question
         """
         if not isinstance(name, str):
             raise TypeError("The name must be a str object.")
@@ -309,21 +309,22 @@ def rename(self,
         a single field to be renamed or you can provide a dictionary with a set of fields to be
         renamed.
 
-        ```
-        # rename a single field
-        df.rename('a', 'b')
-
-        # rename multiple fields
-        df.rename({'a': 'b', 'b': 'c', 'c': 'a'})
-        ```
+        Example::
+        
+            # rename a single field
+            df.rename('a', 'b')
+    
+            # rename multiple fields
+            df.rename({'a': 'b', 'b': 'c', 'c': 'a'})
 
         Field renaming can fail if the resulting set of renamed fields would have name clashes. If
         this is the case, none of the rename operations go ahead and the dataframe remains unmodified.
+        
         :param field: Either a string or a dictionary of name pairs, each of which is the existing
-        field name and the destination field name
+            field name and the destination field name
         :param field_to: Optional parameter containing a string, if `field` is a string. If 'field'
-        is a dictionary, parameter should not be set.
-        Field references remain valid after this operation and reflect their renaming.
+            is a dictionary, parameter should not be set.
+            Field references remain valid after this operation and reflect their renaming.
         :return: None
         """
 
@@ -498,26 +499,26 @@ def merge(left: DataFrame,
     Fields are written to the destination dataframe. If the field names clash, they will get
     appended with the strings specified in 'left_suffix' and 'right_suffix' respectively.
 
-    :params left: The left dataframe
-    :params right: The right dataframe
-    :left_on: The field corresponding to the left key used to perform the join. This is either the
-    the name of the field, or a field object. If it is a field object, it can be from another
-    dataframe but it must be the same length as the fields being joined. This can also be a tuple
-    of such values when performing joins on compound keys
-    :right_on: The field corresponding to the right key used to perform the join. This is either
-    the name of the field, or a field object. If it is a field object, it can be from another
-    dataframe but it must be the same length as the fields being joined. This can also be a tuple
-    of such values when performing joins on compound keys
-    :left_fields: Optional parameter listing which fields are to be joined from the left table. If
-    this is not set, all fields from the left table are joined
-    :right_fields: Optional parameter listing which fields are to be joined from the right table.
-    If this is not set, all fields from the right table are joined
-    :left_suffix: A string to be appended to fields from the left table if they clash with fields
-    from the right table.
-    :right_suffix: A string to be appended to fields from the right table if they clash with fields
-    from the left table.
-    :how: Optional parameter specifying the merge mode. It must be one of ('left', 'right',
-    'inner', 'outer' or 'cross). If not set, the 'left' join is performed.
+    :param left: The left dataframe
+    :param right: The right dataframe
+    :param left_on: The field corresponding to the left key used to perform the join. This is either the
+        the name of the field, or a field object. If it is a field object, it can be from another
+        dataframe but it must be the same length as the fields being joined. This can also be a tuple
+        of such values when performing joins on compound keys
+    :param right_on: The field corresponding to the right key used to perform the join. This is either
+        the name of the field, or a field object. If it is a field object, it can be from another
+        dataframe but it must be the same length as the fields being joined. This can also be a tuple
+        of such values when performing joins on compound keys
+    :param left_fields: Optional parameter listing which fields are to be joined from the left table. If
+        this is not set, all fields from the left table are joined
+    :param right_fields: Optional parameter listing which fields are to be joined from the right table.
+        If this is not set, all fields from the right table are joined
+    :param left_suffix: A string to be appended to fields from the left table if they clash with fields from the 
+        right table.
+    :param right_suffix: A string to be appended to fields from the right table if they clash with fields from the 
+        left table.
+    :param how: Optional parameter specifying the merge mode. It must be one of ('left', 'right',
+        'inner', 'outer' or 'cross). If not set, the 'left' join is performed.
 
     """
 

diff --git a/exetera/core/dataset.py b/exetera/core/dataset.py
@@ -28,20 +28,20 @@ class HDF5Dataset(Dataset):
     For a detailed explanation of Dataset along with examples of its use, please refer to the
     wiki documentation at
     https://github.com/KCL-BMEIS/ExeTera/wiki/Dataset-API
+
+    :param session: The session instance to include this dataset to.
+    :param dataset_path: The path of HDF5 file.
+    :param mode: the mode in which the dataset should be opened. This is one of "r", "r+" or "w".
+    :param name: the name that is associated with this dataset. This can be used to retrieve the dataset when
+        calling :py:meth:`~session.Session.get_dataset`.
+    :return: A HDF5Dataset instance.
     """
 
     def __init__(self, session, dataset_path, mode, name):
         """
         Create a Dataset instance that contains dataframes. The dataframes are represented in a dict() with the
         name(str) as a key. The construction should always be called by Session.open_dataset() otherwise the instance
         is not included in Session.datasets. If the HDF5 datafile contains group, the content in loaded into dataframes.
-
-        :param session: The session instance to include this dataset to.
-        :param dataset_path: The path of HDF5 file.
-        :param mode: the mode in which the dataset should be opened. This is one of "r", "r+" or "w".
-        :param name: the name that is associated with this dataset. This can be used to retrieve the dataset when
-        calling :py:meth:`~session.Session.get_dataset`.
-        :return: A HDF5Dataset instance.
         """
         self.name = name
         self._session = session
@@ -202,6 +202,7 @@ def __setitem__(self, name: str, dataframe: DataFrame):
     def __delitem__(self, name: str):
         """
         Delete a dataframe by del dataset[name].
+        
         :param name: The name of dataframe to delete.
         :return: Boolean if the dataframe is deleted.
         """
@@ -215,6 +216,7 @@ def __delitem__(self, name: str):
     def delete_dataframe(self, dataframe: DataFrame):
         """
         Remove dataframe from this dataset by the dataframe object.
+        
         :param dataframe: The dataframe instance to delete.
         :return: Boolean if the dataframe is deleted.
         """