Skip to content

Commit

Permalink
Removing persistence code from the repo (#286)
Browse files Browse the repository at this point in the history
Removal of obsolete code from Exetera. This is primarily relating to:

 - early code for processing the dataset as csvs (before the codebase morphed into Exetera)
 - the readers / writers and old implementation of dataset, that later were refactored into field classes and the session class, respectively. These were documented as being deprecated but left in for compatibility while the scripts that relied on them were modernised.
 - support code for the above

Files that have been removed:
 - core/csvdataset.py
 - core/exporter.py
 - core/filtered_field.py
 - core/persistence.py
 - core/readerwriter.py
 - core/regression.py
 - core/split.py
  • Loading branch information
atbenmurray authored Apr 20, 2022
1 parent 24cc402 commit 8628710
Show file tree
Hide file tree
Showing 28 changed files with 164 additions and 5,463 deletions.
32 changes: 0 additions & 32 deletions docs/exetera.core.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,6 @@ exetera.core.fields module
:undoc-members:
:show-inheritance:

exetera.core.filtered\_field module
-----------------------------------

.. automodule:: exetera.core.filtered_field
:members:
:undoc-members:
:show-inheritance:

exetera.core.indexed\_array module
----------------------------------

Expand All @@ -76,22 +68,6 @@ exetera.core.operations module
:undoc-members:
:show-inheritance:

exetera.core.persistence module
-------------------------------

.. automodule:: exetera.core.persistence
:members:
:undoc-members:
:show-inheritance:

exetera.core.readerwriter module
--------------------------------

.. automodule:: exetera.core.readerwriter
:members:
:undoc-members:
:show-inheritance:

exetera.core.regression module
------------------------------

Expand All @@ -108,14 +84,6 @@ exetera.core.session module
:undoc-members:
:show-inheritance:

exetera.core.split module
-------------------------

.. automodule:: exetera.core.split
:members:
:undoc-members:
:show-inheritance:

exetera.core.utils module
-------------------------

Expand Down
119 changes: 0 additions & 119 deletions exetera/bin/add_imd.py

This file was deleted.

2 changes: 1 addition & 1 deletion exetera/bin/journaling_prototype.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import numpy as np

from exetera.core import csvdataset
# from exetera.core import csvdataset
from exetera.core import utils


Expand Down
33 changes: 23 additions & 10 deletions exetera/bin/journalling_prototype_import.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import argparse
import os
import sys

import h5py

from exetera.core.importer import DatasetImporter
from exetera.core.persistence import DataStore
from exetera.covidspecific import data_schemas
from exetera.io import importer
# from exetera.core.importer import DatasetImporter
# from exetera.core.persistence import DataStore
from exetera.core.session import Session
# from exetera.covidspecific import data_schemas


def consolidate(datastore, existing_group, new_group):
Expand Down Expand Up @@ -41,17 +44,27 @@ def import_and_consolidate(datastore, dataset, source_file, data_schema, timesta


if __name__ == '__main__':
if len(sys.argv) != 4:
print("Usage: check_for_duplicates.py <datastore> <directory> <pattern>")
exit(1)

parser = argparse.ArgumentParser()
parser.add_argument('--schema', help='The path and name of the schema file')
parser.add_argument('--source_dir', help='The directory containing the source files')
parser.add_argument('--pattern', help="The pattern that identifies files of interest in '--source_dir'")
parser.add_argument('--dest', help='The path and name of the datatset to be created or appended to')

# if len(sys.argv) != 4:
# print("Usage: check_for_duplicates.py <datastore> <directory> <pattern>")
# exit(1)

args = parser.parse_args()


show_progress_every = 500000

filenames = sorted(fn for fn in os.listdir(sys.argv[2]) if sys.argv[3] in fn)

with h5py.File(sys.argv[1], 'w') as dataset:
datastore = DataStore()
data_schema = data_schemas.DataSchema(1)
with Session() as s:
dataset = s.open_dataset(sys.argv[1], 'w', 'dataset')

for fn in filenames:
with open(fn) as src:
import_and_consolidate(datastore, dataset, src, data_schema)
import_and_consolidate(s, dataset, src, data_schema)
8 changes: 6 additions & 2 deletions exetera/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@

from . import data_writer, csvdataset, exporter, fields, filtered_field, operations, \
persistence, readerwriter, regression, session, split, utils, validation
from . import (data_writer,
fields,
operations,
session,
utils,
validation)
5 changes: 0 additions & 5 deletions exetera/core/abstract_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,11 +426,6 @@ def join(self, destination_pkey, fkey_indices, values_to_join,
writer=None, fkey_index_spans=None):
raise NotImplementedError()

@abstractmethod
def predicate_and_join(self, predicate, destination_pkey, fkey_indices,
reader=None, writer=None, fkey_index_spans=None):
raise NotImplementedError()

@abstractmethod
def get(self, field):
raise NotImplementedError()
Expand Down
Loading

0 comments on commit 8628710

Please sign in to comment.