Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pin numpy version and update python version #49

Merged
merged 10 commits into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.9", "3.10", "3.11"]

steps:
- uses: actions/checkout@v2
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/deploy_public.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ jobs:
steps:
- uses: actions/checkout@v3

- name: Set up Python 3.9
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: "3.9"
python-version: "3.10"

- name: Install dependencies
run: |
Expand Down
4 changes: 2 additions & 2 deletions docs/user_guide/concepts_utils/data_source.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ next time it is requested.
drio_client,
labels,
cache='.cache'
cache_size=pd.to_timedelta("3H")
cache_size=pd.to_timedelta("3h")
)

.. tip::
Expand Down Expand Up @@ -137,7 +137,7 @@ the index is datetime-like, fixed-frequency start and end index pairs can be gen


start, end = iter_index.date_range(
start="2020-01-01 00:00", end="2020-02-01 00:00", freq="1H"
start="2020-01-01 00:00", end="2020-02-01 00:00", freq="1h"
)

for index_i, data_i in source.iter(start, end):
Expand Down
2 changes: 1 addition & 1 deletion docs/user_guide/concepts_utils/example.rst
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ are stored in *Azure Blob Storage* using the :class:`~fourinsight.engineroom.uti
end = pd.to_datetime("now", utc=True)

# Iterate over the data in 1-hour chunks
for index_i, data_i in source.iter(*iter_index.date_range(start, end, freq="1H")):
for index_i, data_i in source.iter(*iter_index.date_range(start, end, freq="1h")):
results.new_row(index_i)

series_a = data_i["A"]
Expand Down
6 changes: 5 additions & 1 deletion fourinsight/engineroom/utils/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,11 @@ def pull(self, raise_on_missing=True, strict=True):

self._handler.seek(0)
df_source = pd.read_csv(
self._handler, index_col=0, parse_dates=True, dtype=self._headers
self._handler,
index_col=0,
parse_dates=True,
dtype=self._headers,
date_format="ISO8601",
)

if strict and set(df_source.columns) != set(self._headers.keys()):
Expand Down
10 changes: 8 additions & 2 deletions fourinsight/engineroom/utils/_datamanage.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ class DrioDataSource(BaseDataSource):
cache : str, optional
Cache folder. If ``None`` (default), caching is disabled.
cache_size :
Cache size as an index partition (see Notes). Defaults to ``'24H'`` if the
Cache size as an index partition (see Notes). Defaults to ``'24h'`` if the
`index_type` is 'datetime', otherwise ``None`` is default.
**get_kwargs : optional
Keyword arguments that will be passed on to the ``drio_client.get`` method.
Expand Down Expand Up @@ -578,7 +578,7 @@ def __init__(

if index_type == "datetime":
index_converter = DatetimeIndexConverter()
cache_size = cache_size or "24H"
cache_size = cache_size or "24h"
elif index_type == "integer":
index_converter = IntegerIndexConverter()
elif isinstance(index_type, BaseIndexConverter):
Expand Down Expand Up @@ -798,4 +798,10 @@ def get(self, start, end, refresh_cache=False):
source_i.get(start_i, end_i, refresh_cache=refresh_cache)
for (start_i, end_i), source_i in zip(pairwise(index_list), sources_list)
]

data_list = [df.dropna(how="all", axis=1) for df in data_list if not df.empty]

if not data_list:
return pd.DataFrame([], columns=self._labels)

return pd.concat(data_list).infer_objects()
5 changes: 3 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,18 @@ url = https://4insight.io/
classifiers =
License :: OSI Approved :: MIT License
Operating System :: OS Independent
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11

[options]
packages = find_namespace:
python_requires = >=3.8
python_requires = >=3.9
install_requires =
pandas
azure-storage-blob >= 12.4.0
pyarrow
numpy < 2.0.0

[options.packages.find]
include =
Expand Down
20 changes: 10 additions & 10 deletions tests/test_datamanage.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,7 +689,7 @@ def test_partition_start_end_float(self):
def test_partition_start_end_datetime(self):
start = pd.to_datetime("2020-01-01 03:00", utc=True)
end = pd.to_datetime("2020-01-01 09:00", utc=True)
partition = pd.to_timedelta("3H")
partition = pd.to_timedelta("3h")
reference = pd.to_datetime("2020-01-01 02:00", utc=True)
out = BaseDataSourceForTesting._partition_start_end(
start, end, partition, reference
Expand All @@ -705,15 +705,15 @@ def test_partition_start_end_datetime(self):
def test__is_cached_false(self, tmp_path):
cache_dir = tmp_path / ".cache"
source = BaseDataSourceForTesting(
DatetimeIndexConverter(), cache=cache_dir, cache_size="1H"
DatetimeIndexConverter(), cache=cache_dir, cache_size="1h"
)
assert cache_dir.exists()
assert source._is_cached("filename") is False

def test__is_cached_true(self, tmp_path):
cache_dir = tmp_path / ".cache"
source = BaseDataSourceForTesting(
DatetimeIndexConverter(), cache=cache_dir, cache_size="1H"
DatetimeIndexConverter(), cache=cache_dir, cache_size="1h"
)
assert cache_dir.exists()
(cache_dir / "filename").touch()
Expand All @@ -722,7 +722,7 @@ def test__is_cached_true(self, tmp_path):
def test__cache_read(self, tmp_path):
cache_dir = tmp_path / ".cache"
source = BaseDataSourceForTesting(
DatetimeIndexConverter(), cache=cache_dir, cache_size="1H"
DatetimeIndexConverter(), cache=cache_dir, cache_size="1h"
)

df = pd.DataFrame(data={"filename": [2, 4, 6], "a": [1, 2, 3]})
Expand All @@ -735,7 +735,7 @@ def test__cache_read(self, tmp_path):
def test__cache_write(self, tmp_path):
cache_dir = tmp_path / ".cache"
source = BaseDataSourceForTesting(
DatetimeIndexConverter(), cache=cache_dir, cache_size="1H"
DatetimeIndexConverter(), cache=cache_dir, cache_size="1h"
)

df = pd.DataFrame(data={"a": [1, 2, 3]}, index=[2, 4, 6])
Expand Down Expand Up @@ -967,7 +967,7 @@ def test__init__(self, tmp_path):
assert source._get_kwargs == {"convert_date": False, "raise_empty": True}
assert isinstance(source._index_converter, DatetimeIndexConverter)
assert source._cache == Path(cache_dir)
assert source._cache_size == pd.to_timedelta("24H")
assert source._cache_size == pd.to_timedelta("24h")

def test__init__integer(self):
drio_client = Mock()
Expand Down Expand Up @@ -1579,15 +1579,15 @@ def test_to_universal_index_arraylike(self):
np.testing.assert_array_equal(out, expect)

def test_to_universal_delta(self):
out = DatetimeIndexConverter().to_universal_delta("3H")
expect = pd.to_timedelta("3H")
out = DatetimeIndexConverter().to_universal_delta("3h")
expect = pd.to_timedelta("3h")
assert out == expect

def test_to_universal_delta_arraylike(self):
out = DatetimeIndexConverter().to_universal_delta(
["3H", "24H", pd.to_timedelta("2D")]
["3h", "24h", pd.to_timedelta("2D")]
)
expect = pd.to_timedelta(["3H", "24H", pd.to_timedelta("2D")])
expect = pd.to_timedelta(["3h", "24h", pd.to_timedelta("2D")])
np.testing.assert_array_equal(out, expect)

def test_to_native_index(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_iter_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class Test_date_range:
def test_start_end_freq(self):
start = "2020-01-01 00:00"
end = "2020-01-01 05:00"
freq = "1H"
freq = "1h"
start_out, end_out = iter_index.date_range(start=start, end=end, freq=freq)

date_range = pd.date_range(start=start, end=end, freq=freq)
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ deps =
pytest-cov

[testenv:docs]
basepython = python3.9
basepython = python3.10
commands = sphinx-build -W -b html -d {toxworkdir}/docs_doctree docs {toxworkdir}/docs_out
deps =
sphinx == 5.3.0
Expand Down
Loading