Skip to content

Commit

Permalink
more unittests for code coverge (#279)
Browse files Browse the repository at this point in the history
* init commit for more unittests for code coverge

* Test tweaks

* add numeric field data ops, timestamp

* done tests for memfields, gonna add dataset and dataframe

* more tests for dataframes and datasets

* minor typo fix

* update on test memory field apply_span_ ops

* minor update remove duplicate

* minor update remove duplicates

* add tests for general functions including created_like, get_spans, is_sorted, unique

* update test session: create_like, list_datasets

* more tests capture exceptions

* more tests for fields

* more tests for session

* more tests

* more test for utils

* confirm updates with Ben

* remove njit deco as field is not avilable in numba

* updates on Eric\"s comment

Co-authored-by: eric <eric@bioeng471-lap.isd.kcl.ac.uk>
  • Loading branch information
deng113jie and eric authored Apr 21, 2022
1 parent 8628710 commit 49411f2
Show file tree
Hide file tree
Showing 11 changed files with 1,185 additions and 241 deletions.
21 changes: 9 additions & 12 deletions exetera/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def add(self,
:param field: field to add to this dataframe, copy the underlying dataset
"""
dname = field.name[field.name.index('/', 1)+1:]
dname = field.name if '/' not in field.name else field.name[field.name.index('/', 1)+1:]
nfield = field.create_like(self, dname)
if field.indexed:
nfield.indices.write(field.indices[:])
Expand Down Expand Up @@ -330,10 +330,7 @@ def delete_field(self, field):
if field.dataframe != self:
raise ValueError("This field is owned by a different dataframe")
name = field.name
if name is None:
raise ValueError("This dataframe does not contain the field to delete.")
else:
self.__delitem__(name)
self.__delitem__(name)

def keys(self):
"""
Expand Down Expand Up @@ -390,7 +387,7 @@ def rename(self,
"""

if not isinstance(field, (str, dict)):
raise ValueError("'field' must be of type str or dict but is {}").format(type(field))
raise ValueError("'field' must be of type str or dict but is {}".format(type(field)))

dict_ = None
if isinstance(field, dict):
Expand Down Expand Up @@ -518,8 +515,6 @@ def apply_index(self, index_to_apply, ddf=None):
:returns: a dataframe contains all the fields re-indexed, self if ddf is not set
"""
if ddf is not None:
val.validate_all_field_length_in_df(ddf)

if not isinstance(ddf, DataFrame):
raise TypeError("The destination object must be an instance of DataFrame.")
for name, field in self._columns.items():
Expand Down Expand Up @@ -1589,7 +1584,7 @@ def _ordered_merge(left: DataFrame,
ops.generate_ordered_map_to_left_left_unique_streamed(
a_on[0], b_on[0], a_result, b_result, invalid, rdtype=npdtype)
else:
if right_keys_unique:
if b_unique:
b_result = dest.create_numeric('_b_map', strdtype)
ops.generate_ordered_map_to_left_right_unique_streamed(
a_on[0], b_on[0], b_result, invalid, rdtype=npdtype)
Expand All @@ -1600,12 +1595,14 @@ def _ordered_merge(left: DataFrame,
a_on[0], b_on[0], a_result, b_result, invalid, rdtype=npdtype)

if how == 'right':
dest.rename('_a_map', '_right_map')
if "_a_map" in dest:
dest.rename('_a_map', '_right_map')
dest.rename('_b_map', '_left_map')
else:
dest.rename('_a_map', '_left_map')
if "_a_map" in dest:
dest.rename('_a_map', '_left_map')
dest.rename('_b_map', '_right_map')
else:
else: # how = inner
left_result = dest.create_numeric('_left_map', strdtype)
right_result = dest.create_numeric('_right_map', strdtype)
if left_keys_unique:
Expand Down
32 changes: 27 additions & 5 deletions exetera/core/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,9 +479,13 @@ def __len__(self):
@property
def dtype(self):
"""
Get datatype of field.
Get datatype of field. Please note constructing a numpy array from IndexedString data can be very memory expensive.
"""
return self._dtype
if len(self._indices) > 0:
max_len = np.max(self._indices[1:] - self._indices[:-1])
else:
max_len = 0
return np.dtype('S'+str(max_len))

def __getitem__(self, item):
"""
Expand Down Expand Up @@ -579,10 +583,14 @@ def __len__(self):
@property
def dtype(self):
"""
Returns datatype of field
Returns datatype of field. Please note constructing a numpy array from IndexedString data can be very memory expensive.
:return: dtype
"""
return self._dtype
if len(self._indices) > 0:
max_len = np.max(self._indices[1:] - self._indices[:-1])
else:
max_len = 0
return np.dtype('S' + str(max_len))

def __getitem__(self, item):
"""
Expand Down Expand Up @@ -1528,6 +1536,13 @@ def remap(self, key_map, new_key):
:param key_map: The mapping rule of convert the old key into the new key.
:param new_key: The new key.
:return: A CategoricalMemField with the new key.
Example::
cat_field = df.create_categorical('cat', 'int32', {"a": 1, "b": 2})
cat_field.data.write([1,2,1,2])
newfield = cat_field.remap([(1, 4), (2, 5)], {"a": 4, "b": 5})
print(newfield.data[:]) # [4,5,4,5]
"""
# make sure all key values are included in the key_map
for k in self._keys.values():
Expand Down Expand Up @@ -1917,7 +1932,7 @@ def __le__(self, value):
def __eq__(self, value):
return FieldDataOps.equal(self._session, self, value)

def __eq__(self, value):
def __ne__(self, value):
return FieldDataOps.not_equal(self._session, self, value)

def __gt__(self, value):
Expand Down Expand Up @@ -2943,6 +2958,13 @@ def remap(self, key_map, new_key):
:param key_map: The mapping rule of convert the old key into the new key.
:param new_key: The new key.
:return: A CategoricalMemField with the new key.
Example::
cat_field = df.create_categorical('cat', 'int32', {"a": 1, "b": 2})
cat_field.data.write([1,2,1,2])
newfield = cat_field.remap([(1, 4), (2, 5)], {"a": 4, "b": 5})
print(newfield.data[:])
"""
self._ensure_valid()
# make sure all key values are included in the key_map
Expand Down
Loading

0 comments on commit 49411f2

Please sign in to comment.