diff --git a/adtl/__init__.py b/adtl/__init__.py index 7783079..3df939a 100644 --- a/adtl/__init__.py +++ b/adtl/__init__.py @@ -134,6 +134,10 @@ def get_value_unhashed(row: StrDict, rule: Rule, ctx: Context = None) -> Any: value = rule["values"].get(value, value) else: value = rule["values"].get(value) + + # recheck if value is empty after mapping (use to map values to None) + if value == "": + return None # Either source_unit / unit OR source_date / date triggers conversion # do not parse units if value is empty if "source_unit" in rule and "unit" in rule: @@ -1055,6 +1059,10 @@ def main(argv=None): include_defs = args.include_def or [] spec = Parser(args.spec, include_defs=include_defs, quiet=args.quiet) + # check for incompatible options + if spec.header.get("returnUnmatched") and args.parquet: + raise ValueError("returnUnmatched and parquet options are incompatible") + # run adtl adtl_output = spec.parse(args.file, encoding=args.encoding) adtl_output.save(args.output or spec.name, args.parquet) diff --git a/docs/specification.md b/docs/specification.md index 716cf80..26ddcfd 100644 --- a/docs/specification.md +++ b/docs/specification.md @@ -58,8 +58,8 @@ if not present in a datafile, following the same syntax as `fieldPattern` key. terminal describing the error in the transformation. Transformations requiring multiple parameters will only return the current field value that was not transformed. > :warning: This is likely to return columns with non-matching datatypes. External json - validation may fail, as will attempting to use the `--parquet` option to save outputs as - parquet files (which required a consistent type down each column). + validation may fail. This option is incompatible with the `--parquet` option to save + outputs as parquet files (which required a consistent type down each column). ## Validation diff --git a/tests/test_parser.py b/tests/test_parser.py index 9640e02..da6fe3b 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1227,6 +1227,23 @@ def test_main_parquet(): Path("output-table.parquet").unlink() +def test_main_parquet_error(): + + ARG = [ + str(TEST_PARSERS_PATH / "return-unmapped.toml"), + str(TEST_SOURCES_PATH / "return-unmapped.csv"), + "-o", + "output", + "--encoding", + "utf-8", + ] + + with pytest.raises( + ValueError, match="returnUnmatched and parquet options are incompatible" + ): + parser.main(ARG + ["--parquet"]) + + @responses.activate def test_main_web_schema(snapshot): # test with schema on the web