diff --git a/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java b/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java index a3cc60eba..b9a7c5faa 100644 --- a/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java +++ b/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java @@ -19,6 +19,8 @@ package org.apache.iceberg.avro; +import java.util.ArrayList; +import java.util.Collection; import java.util.Deque; import java.util.List; import java.util.Map; @@ -103,7 +105,8 @@ public Schema struct(Types.StructType struct, List fieldSchemas) { String fieldName = isValidFieldName ? origFieldName : AvroSchemaUtil.sanitize(origFieldName); Object defaultValue = structField.hasDefaultValue() ? structField.getDefaultValue() : (structField.isOptional() ? JsonProperties.NULL_VALUE : null); - Schema.Field field = new Schema.Field(fieldName, fieldSchemas.get(i), structField.doc(), defaultValue); + Schema.Field field = new Schema.Field(fieldName, fieldSchemas.get(i), structField.doc(), + convertToJsonNull(defaultValue)); if (!isValidFieldName) { field.addProp(AvroSchemaUtil.ICEBERG_FIELD_NAME_PROP, origFieldName); } @@ -232,4 +235,32 @@ public Schema primitive(Type.PrimitiveType primitive) { return primitiveSchema; } + + // This function ensures that all nested null are converted to JsonProperties.NULL_VALUE + // to make sure JacksonUtils.toJsonNode() converts them properly. + private Object convertToJsonNull(Object defaultValue) { + if (defaultValue instanceof Map) { + for (Map.Entry entry : ((Map) defaultValue).entrySet()) { + if (entry.getValue() instanceof Map || entry.getValue() instanceof Collection) { + entry.setValue(convertToJsonNull(entry.getValue())); + } else { + entry.setValue(JsonProperties.NULL_VALUE); + } + } + return defaultValue; + } else if (defaultValue instanceof List) { + List originalList = (List) defaultValue; + List copiedList = new ArrayList<>(); + + for (Object element : originalList) { + if (element instanceof Map || element instanceof Collection) { + copiedList.add(convertToJsonNull(element)); + } else { + copiedList.add(JsonProperties.NULL_VALUE); + } + } + return copiedList; + } + return defaultValue; + } } diff --git a/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java b/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java index b1ebe5bd3..560a54b70 100644 --- a/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java +++ b/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java @@ -516,4 +516,72 @@ public void testVariousTypesDefaultValues() { Assert.assertTrue(IntStream.range(0, roundTripiSchema.columns().size()) .allMatch(i -> roundTripiSchema.columns().get(i).equals(iSchema.columns().get(i)))); } + + @Test + public void testConversionOfRecordWithNestedSubElement() { + String schemaString = "{\n" + + " \"type\": \"record\",\n" + + " \"name\": \"Root\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\": \"OuterRecord1\",\n" + + " \"type\": [\n" + + " \"null\",\n" + + " {\n" + + " \"type\": \"record\",\n" + + " \"name\": \"InnerElement\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\": \"InnerField\",\n" + + " \"type\": {\n" + + " \"type\": \"record\",\n" + + " \"name\": \"InnerField1\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\": \"InnerField1Param\",\n" + + " \"type\": [\n" + + " \"null\",\n" + + " \"string\"\n" + + " ],\n" + + " \"default\": null\n" + + " }\n" + + " ]\n" + + " },\n" + + " \"default\": {\n" + + " \"InnerField1Param\": null\n" + + " }\n" + + " }\n" + + " ]\n" + + " }\n" + + " ],\n" + + " \"default\": null\n" + + " },\n" + + " {\n" + + " \"name\": \"InnerElementV2\",\n" + + " \"type\": [\n" + + " \"null\",\n" + + " {\n" + + " \"type\": \"record\",\n" + + " \"name\": \"InnerElementV2\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\": \"InnerField2\",\n" + + " \"type\": {\n" + + " \"type\": \"array\",\n" + + " \"items\": \"InnerElement\"\n" + + " },\n" + + " \"default\": []\n" + + " }\n" + + " ]\n" + + " }\n" + + " ],\n" + + " \"default\": null\n" + + " }\n" + + " ]\n" + + "}"; + Schema schema = new Schema.Parser().parse(schemaString); + org.apache.iceberg.Schema iSchema = AvroSchemaUtil.toIceberg(schema); + String jSchema = SchemaParser.toJson(iSchema); + org.apache.iceberg.Schema roundTripiSchema = SchemaParser.fromJson(jSchema); + } }