From 5a7c84396f5c9afa591cea1215b3abe89aafa266 Mon Sep 17 00:00:00 2001 From: Antti Soininen Date: Fri, 11 Oct 2024 15:24:33 +0300 Subject: [PATCH 1/2] Fix importing pivoted data with skip rows Re spine-tools/Spine-Toolbox#2962 --- spinedb_api/import_mapping/generator.py | 35 ++++++++++++++++++------- tests/import_mapping/test_generator.py | 22 ++++++++++++++++ 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/spinedb_api/import_mapping/generator.py b/spinedb_api/import_mapping/generator.py index afba9de2..999ed9f9 100644 --- a/spinedb_api/import_mapping/generator.py +++ b/spinedb_api/import_mapping/generator.py @@ -13,14 +13,13 @@ """ Contains `get_mapped_data()` that converts rows of tabular data into a dictionary for import to a Spine DB, using ``import_functions.import_data()`` - """ from copy import deepcopy from operator import itemgetter from ..exception import ParameterValueFormatError from ..helpers import string_to_bool -from ..mapping import Position +from ..mapping import Position, is_pivoted from ..parameter_value import ( Array, Map, @@ -97,36 +96,47 @@ def get_mapped_data( mapping_names = [] _ensure_mapping_name_consistency(mappings, mapping_names) for mapping, mapping_name in zip(mappings, mapping_names): - read_state = {} mapping = deepcopy(mapping) mapping.polish(table_name, data_header, mapping_name, column_count) mapping_errors = check_validity(mapping) if mapping_errors: errors += mapping_errors continue + read_state = {} # Find pivoted and unpivoted mappings pivoted, non_pivoted, pivoted_from_header, last = _split_mapping(mapping) # If there are no pivoted mappings, we can just feed the rows to our mapping directly if not (pivoted or pivoted_from_header): start_pos = mapping.read_start_row - for k, row in enumerate(rows[mapping.read_start_row :]): + for k, row in enumerate(rows[start_pos:]): if not _is_valid_row(row): continue row = _convert_row(row, column_convert_fns, start_pos + k, errors) mapping.import_row(row, read_state, mapped_data) continue # There are pivoted mappings. We unpivot the table + pivoted_by_leaf = all( + not is_pivoted(m.position) and m.position != Position.header for m in mapping.flatten()[:-1] + ) unpivoted_rows, pivoted_pos, non_pivoted_pos, unpivoted_column_pos = _unpivot_rows( - rows, data_header, pivoted, non_pivoted, pivoted_from_header, mapping.skip_columns + rows, + data_header, + pivoted, + non_pivoted, + pivoted_from_header, + mapping.skip_columns, + mapping.read_start_row, + pivoted_by_leaf, ) if not unpivoted_column_pos: continue + if not is_pivoted(last.position): + last.position = -1 # Reposition row convert functions row_convert_fns = {k: row_convert_fns[pos] for k, pos in enumerate(pivoted_pos) if pos in row_convert_fns} # If there are only pivoted mappings, we can just feed the unpivoted rows if not non_pivoted: # Reposition pivoted mappings: - last.position = -1 for k, m in enumerate(pivoted): m.position = k for k, row in enumerate(unpivoted_rows): @@ -140,7 +150,6 @@ def get_mapped_data( # - The last mapping (typically, parameter value) will read from the last position in the row # - The pivoted mappings will read from positions to the left of that k = None - last.position = -1 for k, m in enumerate(reversed(pivoted)): m.position = -(k + 2) # Feed rows: To each regular row, we append each unpivoted row, plus the item at the intersection, @@ -226,7 +235,9 @@ def _split_mapping(mapping): return pivoted, non_pivoted, pivoted_from_header, flattened[-1] -def _unpivot_rows(rows, data_header, pivoted, non_pivoted, pivoted_from_header, skip_columns): +def _unpivot_rows( + rows, data_header, pivoted, non_pivoted, pivoted_from_header, skip_columns, read_start_row, pivoted_by_leaf +): """Unpivots rows. Args: @@ -235,6 +246,9 @@ def _unpivot_rows(rows, data_header, pivoted, non_pivoted, pivoted_from_header, pivoted (list of ImportMapping): Pivoted mappings (reading from rows) non_pivoted (list of ImportMapping): Non-pivoted mappings ('regular', reading from columns) pivoted_from_header (list of ImportMapping): Mappings pivoted from header + skip_columns (list of int): columns that should be skipped + read_start_row (int): first row to include + pivoted_by_leaf (bool): whether only the leaf mapping is pivoted Returns: list of list: Unpivoted rows @@ -246,7 +260,10 @@ def _unpivot_rows(rows, data_header, pivoted, non_pivoted, pivoted_from_header, pivoted_pos = [-(m.position + 1) for m in pivoted] # (-1) -> (0), (-2) -> (1), (-3) -> (2), etc. non_pivoted_pos = [m.position for m in non_pivoted] # Collect pivoted rows - pivoted_rows = [rows[pos] for pos in pivoted_pos] if non_pivoted_pos else rows + if not pivoted_by_leaf: + pivoted_rows = [rows[pos] for pos in pivoted_pos] if non_pivoted_pos else rows + else: + pivoted_rows = [rows[pos + read_start_row] for pos in pivoted_pos] if non_pivoted_pos else rows[read_start_row:] # Prepend as many headers as needed for m in pivoted_from_header: pivoted.insert(0, m) diff --git a/tests/import_mapping/test_generator.py b/tests/import_mapping/test_generator.py index 04888b29..ab69edcd 100644 --- a/tests/import_mapping/test_generator.py +++ b/tests/import_mapping/test_generator.py @@ -955,6 +955,28 @@ def test_import_parameter_types(self): }, ) + def test_skip_first_row_when_importing_pivoted_data(self): + data_source = iter( + [ + [None, "alternative1", "alternative2", "alternative3"], + ["Scenario1", "Base", "fixed_prices", None], + ] + ) + mappings = [ + [ + {"map_type": "Scenario", "position": 0, "read_start_row": 1}, + {"map_type": "ScenarioAlternative", "position": -1}, + ] + ] + convert_function_specs = {0: "string", 1: "string", 2: "string", 3: "string"} + convert_functions = {column: value_to_convert_spec(spec) for column, spec in convert_function_specs.items()} + mapped_data, errors = get_mapped_data(data_source, mappings, column_convert_fns=convert_functions) + self.assertEqual(errors, []) + self.assertEqual( + mapped_data, + {"scenario_alternatives": [["Scenario1", "Base"], ["Scenario1", "fixed_prices"]]}, + ) + if __name__ == "__main__": unittest.main() From 9d1c7632ccef071619fcda9d3c9b65a48ee643c5 Mon Sep 17 00:00:00 2001 From: Antti Soininen Date: Wed, 16 Oct 2024 10:12:55 +0300 Subject: [PATCH 2/2] Make the "before alternative" import/export mappings legacy The "before alternative" concept is not very user-friendly and not widely used (except for the Excel import in DB editor). We now promote importing exporting scenario alternatives as correctly ordered data. Re spine-tools/Spine-Toolbox#2962 --- spinedb_api/export_mapping/export_mapping.py | 13 ++++++++ spinedb_api/export_mapping/generator.py | 5 +-- spinedb_api/export_mapping/settings.py | 9 ++---- spinedb_api/import_mapping/import_mapping.py | 3 +- tests/export_mapping/test_export_mapping.py | 34 +++++++++++++++++++- 5 files changed, 50 insertions(+), 14 deletions(-) diff --git a/spinedb_api/export_mapping/export_mapping.py b/spinedb_api/export_mapping/export_mapping.py index a8f186bb..7ceac7eb 100644 --- a/spinedb_api/export_mapping/export_mapping.py +++ b/spinedb_api/export_mapping/export_mapping.py @@ -1309,12 +1309,25 @@ class ScenarioAlternativeMapping(ExportMapping): MAP_TYPE = "ScenarioAlternative" def add_query_columns(self, db_map, query): + if self._child is None: + return query.add_columns( + db_map.ext_scenario_sq.c.alternative_id, + db_map.ext_scenario_sq.c.alternative_name, + db_map.ext_scenario_sq.c.rank, + ) + # Legacy: expecting child to be ScenarioBeforeAlternativeMapping return query.add_columns( db_map.ext_linked_scenario_alternative_sq.c.alternative_id, db_map.ext_linked_scenario_alternative_sq.c.alternative_name, ) def filter_query(self, db_map, query): + if self._child is None: + return query.outerjoin( + db_map.ext_scenario_sq, + db_map.ext_scenario_sq.c.id == db_map.scenario_sq.c.id, + ).order_by(db_map.ext_scenario_sq.c.name, db_map.ext_scenario_sq.c.rank) + # Legacy: expecting child to be ScenarioBeforeAlternativeMapping return query.outerjoin( db_map.ext_linked_scenario_alternative_sq, db_map.ext_linked_scenario_alternative_sq.c.scenario_id == db_map.scenario_sq.c.id, diff --git a/spinedb_api/export_mapping/generator.py b/spinedb_api/export_mapping/generator.py index e1c359e3..5d3bd3ff 100644 --- a/spinedb_api/export_mapping/generator.py +++ b/spinedb_api/export_mapping/generator.py @@ -9,10 +9,7 @@ # Public License for more details. You should have received a copy of the GNU Lesser General Public License along with # this program. If not, see . ###################################################################################################################### -""" -Contains generator functions that convert a Spine database into rows of tabular data. - -""" +""" Contains generator functions that convert a Spine database into rows of tabular data. """ from copy import deepcopy from ..mapping import Position from .export_mapping import pair_header_buddies diff --git a/spinedb_api/export_mapping/settings.py b/spinedb_api/export_mapping/settings.py index e2e4973c..8a01fa73 100644 --- a/spinedb_api/export_mapping/settings.py +++ b/spinedb_api/export_mapping/settings.py @@ -333,24 +333,19 @@ def scenario_export( return scenario_mapping -def scenario_alternative_export( - scenario_position=Position.hidden, alternative_position=Position.hidden, before_alternative_position=Position.hidden -): +def scenario_alternative_export(scenario_position=Position.hidden, alternative_position=Position.hidden): """ Sets up export mappings for exporting scenario alternatives. Args: scenario_position (int or Position): position of scenarios alternative_position (int or Position): position of alternatives - before_alternative_position (int or Position): position of 'before' alternatives - (for each row, the 'alternative' goes *before* the 'before alternative' in the scenario rank) Returns: Mapping: root mapping """ scenario_mapping = ScenarioMapping(scenario_position) - alternative_mapping = scenario_mapping.child = ScenarioAlternativeMapping(alternative_position) - alternative_mapping.child = ScenarioBeforeAlternativeMapping(before_alternative_position) + scenario_mapping.child = ScenarioAlternativeMapping(alternative_position) return scenario_mapping diff --git a/spinedb_api/import_mapping/import_mapping.py b/spinedb_api/import_mapping/import_mapping.py index 9eaf416a..310f9f5f 100644 --- a/spinedb_api/import_mapping/import_mapping.py +++ b/spinedb_api/import_mapping/import_mapping.py @@ -974,8 +974,7 @@ def _default_scenario_alternative_mapping(): ScenarioAlternativeMapping: root mapping """ root_mapping = ScenarioMapping(Position.hidden) - scen_alt_mapping = root_mapping.child = ScenarioAlternativeMapping(Position.hidden) - scen_alt_mapping.child = ScenarioBeforeAlternativeMapping(Position.hidden) + root_mapping.child = ScenarioAlternativeMapping(Position.hidden) return root_mapping diff --git a/tests/export_mapping/test_export_mapping.py b/tests/export_mapping/test_export_mapping.py index cefa351d..8688047f 100644 --- a/tests/export_mapping/test_export_mapping.py +++ b/tests/export_mapping/test_export_mapping.py @@ -55,6 +55,7 @@ ParameterValueMapping, ParameterValueTypeMapping, ScenarioAlternativeMapping, + ScenarioBeforeAlternativeMapping, ScenarioDescriptionMapping, ScenarioMapping, drop_non_positioned_tail, @@ -972,7 +973,38 @@ def test_scenario_alternative_mapping(self): tables = {} for title, title_key in titles(scenario_mapping, db_map): tables[title] = list(rows(scenario_mapping, db_map, title_key)) - self.assertEqual(tables, {None: [["s1", "a1"], ["s1", "a2"], ["s2", "a2"], ["s2", "a3"]]}) + self.assertEqual(tables, {None: [["s1", "a1"], ["s1", "a2"], ["s2", "a3"], ["s2", "a2"]]}) + db_map.close() + + def test_scenario_alternative_mapping_exports_alternatives_in_correct_order(self): + db_map = DatabaseMapping("sqlite://", create=True) + import_alternatives(db_map, ("a1", "a2", "a3")) + import_scenarios(db_map, ("s1",)) + import_scenario_alternatives(db_map, (("s1", "a2"), ("s1", "a1", "a2"), ("s1", "a3", "a2"))) + db_map.commit_session("Add test data.") + scenario_mapping = ScenarioMapping(0) + scenario_alternative_mapping = ScenarioAlternativeMapping(1) + scenario_mapping.child = scenario_alternative_mapping + tables = {} + for title, title_key in titles(scenario_mapping, db_map): + tables[title] = list(rows(scenario_mapping, db_map, title_key)) + self.assertEqual(tables, {None: [["s1", "a1"], ["s1", "a3"], ["s1", "a2"]]}) + db_map.close() + + def test_legacy_scenario_alternative_mapping_with_before_alternatives(self): + db_map = DatabaseMapping("sqlite://", create=True) + import_alternatives(db_map, ("a1", "a2", "a3")) + import_scenarios(db_map, ("s1",)) + import_scenario_alternatives(db_map, (("s1", "a2"), ("s1", "a1", "a2"), ("s1", "a3", "a2"))) + db_map.commit_session("Add test data.") + scenario_mapping = ScenarioMapping(0) + scenario_alternative_mapping = ScenarioAlternativeMapping(1) + scenario_mapping.child = scenario_alternative_mapping + scenario_alternative_mapping.child = ScenarioBeforeAlternativeMapping(2) + tables = {} + for title, title_key in titles(scenario_mapping, db_map): + tables[title] = list(rows(scenario_mapping, db_map, title_key)) + self.assertEqual(tables, {None: [["s1", "a1", "a3"], ["s1", "a3", "a2"]]}) db_map.close() def test_header(self):