Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename unnamed index columns as col_{depth} in Arrow arrays and dataframes #492

Merged
merged 1 commit into from
Feb 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion spinedb_api/arrow_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def crawl_map_uneven(
typed_xs = []
ys = []
max_nested_depth = 0
index_names = root_index_names + [loaded_value.get("index_name", "x")]
index_names = root_index_names + [loaded_value.get("index_name", f"col_{depth}")]
index_metadata = {}
deepest_nested_index_names = []
index_type = loaded_value["index_type"]
Expand Down
12 changes: 6 additions & 6 deletions tests/test_arrow_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ def test_empty_map(self):
value, value_type = parameter_value.to_database(parameter_value.Map([], [], str))
map_ = arrow_value.from_database(value, value_type)
self.assertEqual(len(map_), 0)
self.assertEqual(map_.column_names, ["x", "value"])
self.assertEqual(map_.column("x").type, pyarrow.string())
self.assertEqual(map_.column_names, ["col_1", "value"])
self.assertEqual(map_.column("col_1").type, pyarrow.string())
self.assertEqual(map_.column("value").type, pyarrow.null())

def test_string_to_string_map_with_index_name(self):
Expand Down Expand Up @@ -141,15 +141,15 @@ def test_unevenly_nested_map_with_fixed_resolution_time_series(self):
)
map_ = arrow_value.from_database(value, value_type)
self.assertEqual(len(map_), 6)
self.assertEqual(map_.column_names, ["main index", "nested index", "x", "t", "value"])
self.assertEqual(map_.column_names, ["main index", "nested index", "col_3", "t", "value"])
self.assertEqual(map_.column("main index").type, pyarrow.string())
self.assertEqual(
map_.column("main index").to_pylist(),
["not nested", "strings", "time series", "time series", "time series", "floats"],
)
self.assertEqual(map_.column("nested index").to_pylist(), [None, 11.0, "ts", "ts", "no ts", "key"])
self.assertEqual(
map_.column("x").to_pylist(),
map_.column("col_3").to_pylist(),
[
None,
None,
Expand Down Expand Up @@ -191,7 +191,7 @@ def test_unevenly_nested_map(self):
)
map_ = arrow_value.from_database(value, value_type)
self.assertEqual(len(map_), 6)
self.assertEqual(map_.column_names, ["main index", "nested index", "x", "value"])
self.assertEqual(map_.column_names, ["main index", "nested index", "col_3", "value"])
self.assertEqual(map_.column("main index").type, pyarrow.string())
self.assertEqual(
map_.column("main index").to_pylist(),
Expand All @@ -201,7 +201,7 @@ def test_unevenly_nested_map(self):
map_.column("nested index").to_pylist(), [None, 11.0, "date time", "more date time", "non nested", "key"]
)
self.assertEqual(
map_.column("x").to_pylist(),
map_.column("col_3").to_pylist(),
[None, None, "time of my life", datetime.datetime.fromisoformat("2024-02-26T17:45:00"), None, None],
)
self.assertEqual(
Expand Down
27 changes: 27 additions & 0 deletions tests/test_dataframes.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,33 @@ def test_simple_map_value(self):
)
self.assertTrue(dataframe.equals(expected))

def test_unnamed_indexes_are_named_as_col_x(self):
with DatabaseMapping("sqlite://", create=True) as db_map:
self._assert_success(db_map.add_entity_class_item(name="Object"))
self._assert_success(db_map.add_parameter_definition_item(name="y", entity_class_name="Object"))
self._assert_success(db_map.add_entity_item(name="fork", entity_class_name="Object"))
value_item = self._assert_success(
db_map.add_parameter_value_item(
entity_class_name="Object",
entity_byname=("fork",),
parameter_definition_name="y",
alternative_name="Base",
parsed_value=Map(["A"], [1.1]),
)
)
dataframe = spine_df.to_dataframe(value_item)
expected = pd.DataFrame(
{
"entity_class_name": pd.Series(["Object"], dtype="category"),
"Object": pd.Series(["fork"], dtype="string"),
"parameter_definition_name": pd.Series(["y"], dtype="category"),
"alternative_name": pd.Series(["Base"], dtype="category"),
"col_1": ["A"],
"value": [1.1],
}
)
self.assertTrue(dataframe.equals(expected))

def test_time_series_value(self):
with DatabaseMapping("sqlite://", create=True) as db_map:
self._assert_success(db_map.add_entity_class_item(name="Object"))
Expand Down