Skip to content

Commit

Permalink
More robustness when dealing with pandas column multi-index.
Browse files Browse the repository at this point in the history
  • Loading branch information
yohplala committed Aug 21, 2024
1 parent 010bed4 commit 3e841fb
Showing 1 changed file with 43 additions and 0 deletions.
43 changes: 43 additions & 0 deletions oups/store/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,45 @@ def to_midx(idx: Index, levels: List[str] = None) -> MultiIndex:
return MultiIndex.from_tuples(tuples, names=levels)


def check_cmidx(chunk):
"""
Check cmidx to have it complying with fastparquet limitations.
Most notably, fastparquet requires names for each level in a Multiindex.
If these are not set, there are set to '', an empty string.
Multiindex is modified in-place.
Parameters
----------
chunk : DataFrame
DataFrame which multi-index is to check.
Returns
-------
None
"""
cmidx = chunk.columns
# If an item of the column name is not a string, turn it into a string.
# Using 'set_levels()' instead of rconstructing a MultiIndex to preserve
# index names directly.
level_updated_idx = []
level_updated = []
for i, level in enumerate(cmidx.levels):
str_level = [name if isinstance(name, str) else str(name) for name in level]
if level.to_list() != str_level:
level_updated_idx.append(i)
level_updated.append(str_level)
if level_updated:
chunk.columns = chunk.columns.set_levels(level_updated, level=level_updated_idx)
# If a name is 'None', set it to '' instead.
cmidx = chunk.columns
for i, name in enumerate(cmidx.names):
if name is None:
cmidx.set_names("", level=i, inplace=True)


def write_metadata(
pf: ParquetFile,
metadata: Dict[str, str] = None,
Expand Down Expand Up @@ -563,6 +602,10 @@ def write(
chunk = next(iter_data)
if cmidx_expand:
chunk.columns = to_midx(chunk.columns, cmidx_levels)
# In case multi-index is used, check that it complies with fastparquet
# limitations.
if isinstance(chunk.columns, MultiIndex):
check_cmidx(chunk)
fp_write(
dirpath,
chunk,
Expand Down

0 comments on commit 3e841fb

Please sign in to comment.