Skip to content

Commit

Permalink
Bugfix when LLM doesn't return all schema fields during field mapping
Browse files Browse the repository at this point in the history
  • Loading branch information
pipliggins committed Dec 20, 2024
1 parent 707d405 commit 4edb6b3
Showing 2 changed files with 24 additions and 20 deletions.
37 changes: 22 additions & 15 deletions src/adtl/autoparser/mapping.py
Original file line number Diff line number Diff line change
@@ -213,7 +213,7 @@ def match_values_to_schema(self) -> pd.DataFrame:

values_tuples = []
for f in self.target_fields:
s = self.common_values_mapped[f]
s = self.common_values_mapped.get(f)
t = self.target_values[f]
if s and t:
values_tuples.append((f, s, t))
@@ -260,6 +260,10 @@ def create_mapping(self, save=True, file_name="mapping_file") -> pd.DataFrame:
mapped_vals = self.match_values_to_schema()

mapping_dict.drop(columns=["source_type"], inplace=True)

# reindex to add in any schema fields that weren't returned by the LLM
mapping_dict = mapping_dict.reindex(self.target_fields)

mapping_dict["target_values"] = mapping_dict.index.map(self.target_values)
mapping_dict["value_mapping"] = mapping_dict.index.map(mapped_vals)

@@ -269,23 +273,22 @@ def create_mapping(self, save=True, file_name="mapping_file") -> pd.DataFrame:
f"The following schema fields have not been mapped: {list(unmapped)}",
UserWarning,
)

# turn lists & dicts into strings for consistancy with saved CSV
mapping_dict["target_values"] = mapping_dict["target_values"].apply(
lambda x: (", ".join(str(item) for item in x) if isinstance(x, list) else x)
)
mapping_dict["value_mapping"] = mapping_dict["value_mapping"].apply(
lambda x: (
", ".join(f"{k}={v}" for k, v in x.items())
if isinstance(x, dict)
else x
)
)

if save is False:
return mapping_dict
else:
# turn lists & dicts into strings to save to CSV
mapping_dict["target_values"] = mapping_dict["target_values"].apply(
lambda x: (
", ".join(str(item) for item in x) if isinstance(x, list) else x
)
)
mapping_dict["value_mapping"] = mapping_dict["value_mapping"].apply(
lambda x: (
", ".join(f"{k}={v}" for k, v in x.items())
if isinstance(x, dict)
else x
)
)

# Write to CSV
if not file_name.endswith(".csv"):
file_name += ".csv"
@@ -324,6 +327,10 @@ def create_mapping(
Which LLM to use, currently only 'openai' is supported.
config
Path to a JSON file containing the configuration for autoparser.
save
Whether to save the mapping to a CSV file.
file_name
Name of the file to save the mapping to.
Returns
-------
7 changes: 2 additions & 5 deletions tests/test_autoparser/test_mapper.py
Original file line number Diff line number Diff line change
@@ -344,11 +344,8 @@ def test_class_create_mapping_no_save():
"source_description": "Pet Animal",
"source_field": "AnimalDeCompagnie",
"common_values": "Oui, Non, non",
"target_values": ["True", "False", "None"],
"value_mapping": {
"oui": "True",
"non": "False",
},
"target_values": "True, False, None",
"value_mapping": "oui=True, non=False",
},
name="pet",
)

0 comments on commit 4edb6b3

Please sign in to comment.