Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Datetimes #684

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions anndata/_io/specs/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,59 @@ def read_nullable_boolean(elem):
return pd.array(read_elem(elem["values"]))


#############
# datetimes #
#############


@_REGISTRY.register_write(
ZarrGroup, (views.ArrayView, "M"), IOSpec("datetime-array", "0.1.0")
)
@_REGISTRY.register_write(
H5Group, (views.ArrayView, "M"), IOSpec("datetime-array", "0.1.0")
)
@_REGISTRY.register_write(
ZarrGroup, (np.ndarray, "M"), IOSpec("datetime-array", "0.1.0")
)
@_REGISTRY.register_write(H5Group, (np.ndarray, "M"), IOSpec("datetime-array", "0.1.0"))
def write_datetime_array(f, k, elem, dataset_kwargs=MappingProxyType({})):
unit, step = np.datetime_data(elem.dtype)

if step != 1:
raise ValueError(
f"Datetime had non-unit step '{step}', which is currently unsupported."
)

dset = f.create_dataset(k, data=elem.view("uint64"), **dataset_kwargs)
dset.attrs["unit"] = unit


@_REGISTRY.register_write(
ZarrGroup, pd.arrays.DatetimeArray, IOSpec("datetime-array", "0.1.0")
)
@_REGISTRY.register_write(
H5Group, pd.arrays.DatetimeArray, IOSpec("datetime-array", "0.1.0")
)
@_REGISTRY.register_write(
ZarrGroup, pd.DatetimeIndex, IOSpec("datetime-array", "0.1.0")
)
@_REGISTRY.register_write(H5Group, pd.DatetimeIndex, IOSpec("datetime-array", "0.1.0"))
def write_datetime_array_from_pandas(f, k, elem, dataset_kwargs=MappingProxyType({})):
if elem.tz is not None:
raise NotImplementedError(
"Datetime had timezone specified, which is currently not supported."
)

write_elem(f, k, elem.to_numpy(), dataset_kwargs=dataset_kwargs)


@_REGISTRY.register_read(ZarrArray, IOSpec("datetime-array", "0.1.0"))
@_REGISTRY.register_read(H5Array, IOSpec("datetime-array", "0.1.0"))
def read_np_datetime_array(elem):
unit = _read_attr(elem.attrs, "unit")
return elem[()].view(f"datetime64[{unit}]")


###########
# Scalars #
###########
Expand Down
34 changes: 17 additions & 17 deletions anndata/_io/specs/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,28 @@ def _register(func):

return _register

def get_writer(self, dest_type, typ, modifiers=frozenset()):
def get_writer(self, dest, elem, modifiers=frozenset()):
import h5py

typ = type(elem)
dest_type = type(dest)

if dest_type is h5py.File:
dest_type = h5py.Group
modifiers = frozenset(modifiers)

if (dest_type, typ, modifiers) not in self.write:
if (
hasattr(elem, "dtype")
and (dest_type, (typ, elem.dtype.kind), modifiers) in _REGISTRY.write
):
return self.write[(dest_type, (typ, elem.dtype.kind), modifiers)]
elif (dest_type, typ, modifiers) in _REGISTRY.write:
return self.write[(dest_type, typ, modifiers)]
else:
raise TypeError(
f"No method has been defined for writing {typ} elements to {dest_type}"
)

return self.write[(dest_type, typ, modifiers)]

def has_writer(self, dest_type, typ, modifiers):
modifiers = frozenset(modifiers)
return (dest_type, typ, modifiers) in self.write
Expand Down Expand Up @@ -156,23 +164,15 @@ def write_elem(
elem
The element to write as k to f.
"""
dest_type = type(f)
if elem is None:
return
t = type(elem)
if k == "/":
f.clear()
elif k in f:
del f[k]
if (
hasattr(elem, "dtype")
and (dest_type, (t, elem.dtype.kind), modifiers) in _REGISTRY.write
):
_REGISTRY.get_writer(dest_type, (t, elem.dtype.kind), modifiers)(
f, k, elem, *args, **kwargs
)
else:
_REGISTRY.get_writer(dest_type, t, modifiers)(f, k, elem, *args, **kwargs)

if elem is None:
return

_REGISTRY.get_writer(f, elem, modifiers)(f, k, elem, *args, **kwargs)


def read_elem(elem, modifiers: frozenset(str) = frozenset()):
Expand Down
22 changes: 22 additions & 0 deletions anndata/tests/test_io_elementwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,17 @@ def store(request):
"nullable-boolean",
),
(pd.array([True, False, True, True]), "nullable-boolean"),
(pd.array(pd.date_range("2018-01-01", periods=3, freq="H")), "datetime-array"),
(
pd.Categorical(
np.repeat(pd.date_range("2018-01-01", periods=3, freq="Y"), 3)
),
"categorical",
),
(
pd.date_range("2000-01-01", periods=5, freq="ms").to_numpy(),
"datetime-array",
),
# (bytes, b"some bytes", "bytes"), # Does not work for zarr
# TODO consider how specific encodings should be. Should we be fully describing the written type?
# Currently the info we add is: "what you wouldn't be able to figure out yourself"
Expand Down Expand Up @@ -110,3 +121,14 @@ def test_write_to_root(store):

assert "anndata" == _read_attr(store.attrs, "encoding-type")
assert_equal(from_disk, adata)


def test_write_none_clears(store):
write_elem(store, "key", np.ones(3))

assert "key" in store

write_elem(store, "key", None)

# Would also be fine if read_elem(store["key"]) returned None
assert "key" not in store