From 397aefa173ce508d7e803b1b35cbaafdb65842e3 Mon Sep 17 00:00:00 2001 From: jsolanke Date: Thu, 23 Jan 2025 16:44:32 -0800 Subject: [PATCH 1/7] Adding config option to disable utf8 encoding --- .../avroutil1/builder/SchemaBuilder.java | 17 ++- .../operations/codegen/CodeGenOpConfig.java | 45 ++++++ .../codegen/own/AvroUtilCodeGenPlugin.java | 2 +- avro-builder/tests/codegen-110/build.gradle | 3 +- .../codegen-no-utf8-encoding/build.gradle | 133 ++++++++++++++++++ .../charseqmethod/ArrayOfStringRecord.avsc | 15 ++ .../avro/charseqmethod/HasNoSimpleString.avsc | 23 +++ .../avro/charseqmethod/TestCollections.avsc | 95 +++++++++++++ avro-builder/tests/tests-allavro/build.gradle | 3 + .../avroutil1/builder/SpecificRecordTest.java | 103 ++++++++++++++ .../codegen/SpecificRecordClassGenerator.java | 82 +++++------ .../SpecificRecordGenerationConfig.java | 58 +++++++- .../codegen/SpecificRecordGeneratorUtil.java | 17 ++- settings.gradle | 23 +-- 14 files changed, 558 insertions(+), 61 deletions(-) create mode 100644 avro-builder/tests/codegen-no-utf8-encoding/build.gradle create mode 100644 avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/charseqmethod/ArrayOfStringRecord.avsc create mode 100644 avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/charseqmethod/HasNoSimpleString.avsc create mode 100644 avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/charseqmethod/TestCollections.avsc diff --git a/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java b/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java index e266b27e2..0197372a5 100644 --- a/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java +++ b/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java @@ -51,6 +51,8 @@ public static void main(String[] args) throws Exception { long optionParseStart = System.currentTimeMillis(); OptionParser parser = new OptionParser(); + + // ADD NEW OPTION HERE(1 option) OptionSpec inputOpt = parser.accepts("input", "Schema or directory of schemas to compile [REQUIRED]") .withRequiredArg().required() .describedAs("file"); @@ -114,6 +116,11 @@ public static void main(String[] args) throws Exception { .defaultsTo("false") .describedAs("true/false"); + OptionSpec enableUtf8Encoding = parser.accepts("enableUtf8Encoding", "adds codegen of UTF8 type for strings.") + .withOptionalArg() + .defaultsTo("true") + .describedAs("true/false"); + //allow plugins to add CLI options for (BuilderPlugin plugin : plugins) { plugin.customizeCLI(parser); @@ -238,6 +245,13 @@ public static void main(String[] args) throws Exception { skipCodegenIfSchemaOnClasspath = Boolean.TRUE.equals(Boolean.parseBoolean(value)); } + boolean handleUtf8Encoding = true; + if (options.has(enableUtf8Encoding)) { + String value = options.valueOf(enableUtf8Encoding); + handleUtf8Encoding = Boolean.TRUE.equals(Boolean.parseBoolean(value)); + handleUtf8EncodingInPutByIndex = handleUtf8Encoding; + } + //allow plugins to parse and validate their own added options for (BuilderPlugin plugin : plugins) { plugin.parseAndValidateOptions(options); @@ -258,7 +272,8 @@ public static void main(String[] args) throws Exception { minAvroVer, handleAvro702, handleUtf8EncodingInPutByIndex, - skipCodegenIfSchemaOnClasspath + skipCodegenIfSchemaOnClasspath, + handleUtf8Encoding ); opConfig.validateParameters(); diff --git a/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/operations/codegen/CodeGenOpConfig.java b/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/operations/codegen/CodeGenOpConfig.java index 9826bb2ae..f60912686 100644 --- a/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/operations/codegen/CodeGenOpConfig.java +++ b/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/operations/codegen/CodeGenOpConfig.java @@ -49,6 +49,7 @@ public class CodeGenOpConfig { boolean avro702Handling; boolean utf8EncodingPutByIndex; boolean skipCodegenIfSchemaOnClasspath; + boolean enableUtf8Encoding; @Deprecated public CodeGenOpConfig( @@ -77,6 +78,7 @@ public CodeGenOpConfig( this.minAvroVersion = minAvroVersion; this.avro702Handling = avro702Handling; this.utf8EncodingPutByIndex = true; + this.enableUtf8Encoding = true; } @Deprecated @@ -107,6 +109,7 @@ public CodeGenOpConfig( this.minAvroVersion = minAvroVersion; this.avro702Handling = avro702Handling; this.utf8EncodingPutByIndex = true; + this.enableUtf8Encoding = true; } @Deprecated @@ -136,6 +139,7 @@ public CodeGenOpConfig(List inputRoots, this.minAvroVersion = minAvroVersion; this.avro702Handling = avro702Handling; this.utf8EncodingPutByIndex = handleUtf8EncodingInPutByIndex; + this.enableUtf8Encoding = true; } @Deprecated @@ -167,8 +171,10 @@ public CodeGenOpConfig(List inputRoots, this.avro702Handling = avro702Handling; this.utf8EncodingPutByIndex = handleUtf8EncodingInPutByIndex; this.skipCodegenIfSchemaOnClasspath = skipCodegenIfSchemaOnClasspath; + this.enableUtf8Encoding = true; } + @Deprecated public CodeGenOpConfig(List inputRoots, List nonImportableSourceRoots, boolean includeClasspath, @@ -199,6 +205,41 @@ public CodeGenOpConfig(List inputRoots, this.avro702Handling = avro702Handling; this.utf8EncodingPutByIndex = handleUtf8EncodingInPutByIndex; this.skipCodegenIfSchemaOnClasspath = skipCodegenIfSchemaOnClasspath; + this.enableUtf8Encoding = true; + } + + public CodeGenOpConfig(List inputRoots, + List nonImportableSourceRoots, + boolean includeClasspath, + File outputSpecificRecordClassesRoot, + File outputExpandedSchemasRoot, + List resolverPath, + CodeGenerator generatorType, + DuplicateSchemaBehaviour dupBehaviour, + List duplicateSchemasToIgnore, + StringRepresentation stringRepresentation, + StringRepresentation methodStringRepresentation, + AvroVersion minAvroVersion, + boolean avro702Handling, + boolean handleUtf8EncodingInPutByIndex, + boolean skipCodegenIfSchemaOnClasspath, + boolean handleUtf8Encoding) { + this.inputRoots = inputRoots; + this.nonImportableSourceRoots = nonImportableSourceRoots; + this.includeClasspath = includeClasspath; + this.outputSpecificRecordClassesRoot = outputSpecificRecordClassesRoot; + this.outputExpandedSchemasRoot = outputExpandedSchemasRoot; + this.resolverPath = resolverPath; + this.generatorType = generatorType; + this.dupBehaviour = dupBehaviour; + this.duplicateSchemasToIgnore = duplicateSchemasToIgnore; + this.stringRepresentation = stringRepresentation; + this.methodStringRepresentation = methodStringRepresentation; + this.minAvroVersion = minAvroVersion; + this.avro702Handling = avro702Handling; + this.utf8EncodingPutByIndex = handleUtf8EncodingInPutByIndex; + this.skipCodegenIfSchemaOnClasspath = skipCodegenIfSchemaOnClasspath; + this.enableUtf8Encoding = handleUtf8Encoding; } /** @@ -312,6 +353,10 @@ public boolean shouldSkipCodegenIfSchemaOnClasspath() { return skipCodegenIfSchemaOnClasspath; } + public boolean isUtf8EncodingEnabled() { + return enableUtf8Encoding; + } + private void validateInput(Collection files, String desc) { for (File f : files) { if (!f.exists()) { diff --git a/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/operations/codegen/own/AvroUtilCodeGenPlugin.java b/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/operations/codegen/own/AvroUtilCodeGenPlugin.java index 141ed62b5..1c793e3d7 100644 --- a/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/operations/codegen/own/AvroUtilCodeGenPlugin.java +++ b/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/operations/codegen/own/AvroUtilCodeGenPlugin.java @@ -102,7 +102,7 @@ private void generateCode(OperationContext opContext) { SpecificRecordGenerationConfig.getBroadCompatibilitySpecificRecordGenerationConfig( AvroJavaStringRepresentation.fromJson(config.getStringRepresentation().toString()), AvroJavaStringRepresentation.fromJson(config.getMethodStringRepresentation().toString()), - config.getMinAvroVersion(), config.isUtf8EncodingPutByIndexEnabled()); + config.getMinAvroVersion(), config.isUtf8EncodingPutByIndexEnabled(), config.isUtf8EncodingEnabled()); final SpecificRecordClassGenerator generator = new SpecificRecordClassGenerator(); List generatedClasses = allNamedSchemas.stream().collect(StreamUtil.toParallelStream(namedSchema -> { try { diff --git a/avro-builder/tests/codegen-110/build.gradle b/avro-builder/tests/codegen-110/build.gradle index 742492946..4f8788e80 100644 --- a/avro-builder/tests/codegen-110/build.gradle +++ b/avro-builder/tests/codegen-110/build.gradle @@ -43,7 +43,8 @@ task runOwnCodegen { "--input", "$projectDir/src/main/avro", "--output", "$buildDir/generated/sources/avro/java/main", "--generator", "AVRO_UTIL", - "--minAvroVer", "1.4" + "--minAvroVer", "1.4", + "--enableUtf8Encoding", "true" ] } } diff --git a/avro-builder/tests/codegen-no-utf8-encoding/build.gradle b/avro-builder/tests/codegen-no-utf8-encoding/build.gradle new file mode 100644 index 000000000..d4c0ad921 --- /dev/null +++ b/avro-builder/tests/codegen-no-utf8-encoding/build.gradle @@ -0,0 +1,133 @@ +/* + * Copyright 2022 LinkedIn Corp. + * Licensed under the BSD 2-Clause License (the "License"). + * See License in the project root for license information. + */ + +plugins { + id "java-library" +} + +configurations { + codegen +} + +sourceSets { + main { + java { + srcDir 'src/main/java' + srcDir "$buildDir/generated/sources/avro/java/main" + } + resources { + srcDirs = [ + "src/main/avro" + ] + } + } +} + +dependencies { + codegen project(":avro-builder:builder") +} + +task runOwnCodegen { + description = 'generate specific classes using own codegen utility' + + dependsOn configurations.codegen + + doLast { + javaexec { + classpath configurations.codegen + main = 'com.linkedin.avroutil1.builder.SchemaBuilder' + args = [ + "--input", "$projectDir/src/main/avro", + "--output", "$buildDir/generated/sources/avro/java/main", + "--generator", "AVRO_UTIL", + "--stringRepresentation", "CharSequence", + "--methodStringRepresentation", "CharSequence", + "--enableUtf8Encoding", "false" + ] + } + } +} + +compileJava.dependsOn runOwnCodegen + +dependencies { + codegen project(":avro-builder:builder") + + implementation("org.apache.avro:avro:1.4.1") { + exclude group: "org.mortbay.jetty" + exclude group: "org.apache.velocity" + exclude group: "commons-lang" + exclude group: "org.jboss.netty" + exclude group: "com.thoughtworks.paranamer", module: "paranamer-ant" + } + //required because generated code depends on the helper + implementation project(":helper:helper") +} + +// +// plugins { +// id "java-library" +// } +// +// configurations { +// codegen +// } +// +// sourceSets { +// main { +// java { +// srcDir 'src/main/java' +// srcDir "$buildDir/generated/sources/avro/java/main" +// } +// resources { +// srcDirs = [ +// "src/main/avro" +// ] +// } +// } +// } +// +// dependencies { +// codegen project(":avro-builder:builder") +// } +// +// task runOwnCodegen { +// description = 'generate specific classes using own codegen utility' +// +// dependsOn configurations.codegen +// +// doLast { +// javaexec { +// classpath configurations.codegen +// main = 'com.linkedin.avroutil1.builder.SchemaBuilder' +// args = [ +// "--input", "$projectDir/src/main/avro", +// "--output", "$buildDir/generated/sources/avro/java/main", +// "--generator", "AVRO_UTIL", +// "--stringRepresentation", "CharSequence", +// "--methodStringRepresentation", "CharSequence", +// "--enableUtf8Encoding", "false" +// ] +// } +// } +// } +// +// compileJava.dependsOn runOwnCodegen +// +// dependencies { +// codegen project(":avro-builder:builder") +// +// implementation ("org.apache.avro:avro:1.4.1") { +// exclude group: "org.mortbay.jetty" +// exclude group: "org.apache.velocity" +// exclude group: "commons-lang" +// exclude group: "org.jboss.netty" +// exclude group: "com.thoughtworks.paranamer", module: "paranamer-ant" +// } +// //required because generated code depends on the helper +// implementation project(":helper:helper") +// } +//} \ No newline at end of file diff --git a/avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/charseqmethod/ArrayOfStringRecord.avsc b/avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/charseqmethod/ArrayOfStringRecord.avsc new file mode 100644 index 000000000..0ae046f34 --- /dev/null +++ b/avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/charseqmethod/ArrayOfStringRecord.avsc @@ -0,0 +1,15 @@ +{ + "type": "record", + "namespace": "noutf8encoding", + "name": "ArrayOfStringRecord", + "doc": "Array of String Record", + "fields": [ + { + "name": "arOfRec", + "type": { + "type":"array", + "items": "string" + } + } + ] +} \ No newline at end of file diff --git a/avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/charseqmethod/HasNoSimpleString.avsc b/avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/charseqmethod/HasNoSimpleString.avsc new file mode 100644 index 000000000..05d8fa7ce --- /dev/null +++ b/avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/charseqmethod/HasNoSimpleString.avsc @@ -0,0 +1,23 @@ +{ + "type": "record", + "namespace": "noutf8encoding", + "name": "HasNoSimpleString", + "fields": [ + { + "name": "one", + "type": "float" + }, + { + "name": "two", + "type": ["string"] + }, + { + "name": "three", + "type": "int" + }, + { + "name": "four", + "type": "boolean" + } + ] +} \ No newline at end of file diff --git a/avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/charseqmethod/TestCollections.avsc b/avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/charseqmethod/TestCollections.avsc new file mode 100644 index 000000000..576d1314d --- /dev/null +++ b/avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/charseqmethod/TestCollections.avsc @@ -0,0 +1,95 @@ +{ + "name": "TestCollections", + "namespace": "noutf8encoding", + "fields": [ + { + "name": "str", + "type": "string" + }, + { + "name": "strAr", + "type": { + "type": "array", + "items": "string" + } + }, + { + "name": "strArAr", + "type": { + "type": "array", + "items": { + "type": "array", + "items": "string" + } + } + }, + { + "name": "unionOfArray", + "type": [ + "null", + { + "type": "array", + "items": "string" + } + ] + }, + { + "name": "arOfMap", + "type": { + "type": "array", + "items": { + "type": "map", + "values": "string" + } + } + }, + { + "name": "unionOfMap", + "type": [ + "null", + { + "type": "map", + "values": "string" + } + ] + }, + { + "name": "arOfUnionOfStr", + "type": { + "type": "array", + "items": ["null", "string"] + } + }, + { + "name": "arOfMapOfUnionOfArray", + "type": { + "type": "array", + "items": { + "type": "map", + "values": ["null", { + "type": "array", + "items": "string" + }] + } + } + }, + { + "name": "intAr", + "type": { + "type": "array", + "items": "int" + } + }, + { + "name": "unionOfIntMap", + "type": [ + "null", + { + "type": "map", + "values": "int" + } + ] + } + ], + "type": "record" +} \ No newline at end of file diff --git a/avro-builder/tests/tests-allavro/build.gradle b/avro-builder/tests/tests-allavro/build.gradle index c58cb0840..ccadff37d 100644 --- a/avro-builder/tests/tests-allavro/build.gradle +++ b/avro-builder/tests/tests-allavro/build.gradle @@ -80,6 +80,9 @@ dependencies { testImplementation (project(":avro-builder:tests:codegen-no-utf8-in-putbyindex")) { exclude group: "org.apache.avro" } + testImplementation (project(":avro-builder:tests:codegen-no-utf8-encoding")) { + exclude group: "org.apache.avro" + } testImplementation "com.google.guava:guava:28.2-jre" testImplementation "org.mockito:mockito-core:3.2.4" diff --git a/avro-builder/tests/tests-allavro/src/test/java/com/linkedin/avroutil1/builder/SpecificRecordTest.java b/avro-builder/tests/tests-allavro/src/test/java/com/linkedin/avroutil1/builder/SpecificRecordTest.java index ebdcc4928..956adee7a 100644 --- a/avro-builder/tests/tests-allavro/src/test/java/com/linkedin/avroutil1/builder/SpecificRecordTest.java +++ b/avro-builder/tests/tests-allavro/src/test/java/com/linkedin/avroutil1/builder/SpecificRecordTest.java @@ -1891,6 +1891,37 @@ public void modifiablePrimitiveCollectionTest() { Assert.assertEquals((int) instance.intAr.get(instance.getIntAr().size() - 1), Integer.MAX_VALUE); } + @Test + public void modifiablePrimitiveCollectionTestNot() { + String tba = "NewElement"; + RandomRecordGenerator generator = new RandomRecordGenerator(); + charseqmethod.TestCollections instance = + generator.randomSpecific(charseqmethod.TestCollections.class, RecordGenerationConfig.newConfig().withAvoidNulls(true)); + + // array of string + instance.getStrAr().add(tba); + Assert.assertTrue(instance.getStrAr().contains(tba)); + Assert.assertTrue(instance.strAr.contains(new Utf8(tba))); + + // union[null, List] + instance.getUnionOfArray().add(tba); + Assert.assertTrue(instance.getUnionOfArray().contains(tba)); + Assert.assertTrue(instance.unionOfArray.contains(new Utf8(tba))); + + // array (union[null, string]) + instance.getArOfUnionOfStr().add(tba); + Assert.assertTrue(instance.getArOfUnionOfStr().contains(tba)); + Assert.assertTrue(instance.arOfUnionOfStr.contains(new Utf8(tba))); + + + // Union (null, Map) + Assert.assertThrows(UnsupportedOperationException.class, () -> instance.getUnionOfMap().put("key1", tba)); + + instance.getIntAr().add(Integer.MAX_VALUE); + Assert.assertEquals((int) instance.getIntAr().get(instance.getIntAr().size() - 1), Integer.MAX_VALUE); + Assert.assertEquals((int) instance.intAr.get(instance.getIntAr().size() - 1), Integer.MAX_VALUE); + } + @Test public void modifiablePrimitiveCollectionTestForCharSeq() { String tba = "NewElement"; @@ -1965,6 +1996,78 @@ public void testIfSerializable() throws IOException { } } + /** + * Tests that both String and UTF8 fields are supported in the generated classes and can be accessed + * interchangeably directly and through getters. + * @throws IOException + */ + @Test + public void testNoUtf8Encoding() throws IOException { + RandomRecordGenerator generator = new RandomRecordGenerator(); + noutf8encoding.TestCollections instance = generator.randomSpecific(noutf8encoding.TestCollections.class, + RecordGenerationConfig.newConfig().withAvoidNulls(true)); + + // fields should contain String values + Assert.assertTrue(instance.str instanceof CharSequence); + + // getter should return String values + Assert.assertTrue(instance.getStr() instanceof CharSequence); + + // Set String and Get String + String strValue = "strValue"; + Utf8 utf8Value = new Utf8("utf8Value"); + + instance.getStrAr().add(strValue); + instance.getStrAr().add(utf8Value); + //instance + Assert.assertTrue(instance.getStrAr().contains(strValue)); + Assert.assertTrue(instance.strAr.contains(strValue)); + Assert.assertTrue(instance.getStrAr().contains(utf8Value)); + Assert.assertTrue(instance.strAr.contains(utf8Value)); + + // array (array (union[null, string]) + instance.getStrArAr().add(Arrays.asList(strValue)); + Assert.assertTrue(instance.getStrArAr().get(instance.getStrArAr().size() - 1).contains(strValue)); + Assert.assertTrue(instance.strArAr.get(instance.getStrArAr().size() - 1).contains(strValue)); + instance.getStrArAr().add(Arrays.asList(utf8Value)); + Assert.assertTrue(instance.getStrArAr().get(instance.getStrArAr().size() - 1).contains(utf8Value)); + Assert.assertTrue(instance.strArAr.get(instance.getStrArAr().size() - 1).contains(utf8Value)); + + // union[null, List] + instance.getUnionOfArray().add(strValue); + Assert.assertTrue(instance.getUnionOfArray().contains(strValue)); + Assert.assertTrue(instance.unionOfArray.contains(strValue)); + + instance.getUnionOfArray().add(utf8Value); + Assert.assertTrue(instance.getUnionOfArray().contains(utf8Value)); + Assert.assertTrue(instance.unionOfArray.contains(utf8Value)); + + // array (union[null, string]) + instance.getArOfUnionOfStr().add(strValue); + Assert.assertTrue(instance.getArOfUnionOfStr().contains(strValue)); + Assert.assertTrue(instance.arOfUnionOfStr.contains(strValue)); + + instance.getArOfUnionOfStr().add(utf8Value); + Assert.assertTrue(instance.getArOfUnionOfStr().contains(utf8Value)); + Assert.assertTrue(instance.arOfUnionOfStr.contains(utf8Value)); + + // Union (null, Map) + Map mapOfStr = new HashMap() {{ + put("key1", strValue); + put("key2", utf8Value); + }}; + instance.setUnionOfMap(mapOfStr); + Assert.assertTrue(instance.getUnionOfMap().containsValue(strValue)); + Assert.assertTrue(instance.getUnionOfMap().containsValue(utf8Value)); + + // array (Map>) + instance.setArOfMap(Arrays.asList(mapOfStr)); + Assert.assertTrue(instance.getArOfMap().get(0).containsValue(strValue)); + Assert.assertTrue(instance.arOfMap.get(0).containsValue(strValue)); + Assert.assertTrue(instance.getArOfMap().get(0).containsValue(utf8Value)); + Assert.assertTrue(instance.arOfMap.get(0).containsValue(strValue)); + } + @BeforeClass public void setup() { System.setProperty("org.apache.avro.specific.use_custom_coders", "true"); diff --git a/avro-codegen/src/main/java/com/linkedin/avroutil1/codegen/SpecificRecordClassGenerator.java b/avro-codegen/src/main/java/com/linkedin/avroutil1/codegen/SpecificRecordClassGenerator.java index 8bc637460..5e5ca3b17 100644 --- a/avro-codegen/src/main/java/com/linkedin/avroutil1/codegen/SpecificRecordClassGenerator.java +++ b/avro-codegen/src/main/java/com/linkedin/avroutil1/codegen/SpecificRecordClassGenerator.java @@ -460,13 +460,13 @@ protected JavaFile generateSpecificRecord(AvroRecordSchema recordSchema, Specifi if (!recordSchema.getFields().isEmpty()) { // add all arg constructor if #args < 254 - addAllArgsConstructor(recordSchema, config.getDefaultMethodStringRepresentation(), classBuilder); + addAllArgsConstructor(recordSchema, config.getDefaultMethodStringRepresentation(), classBuilder, !config.isUtf8EncodingEnabled()); - if (SpecificRecordGeneratorUtil.recordHasSimpleStringField(recordSchema)) { + if (config.isUtf8EncodingEnabled() && SpecificRecordGeneratorUtil.recordHasSimpleStringField(recordSchema)) { addAllArgsConstructor(recordSchema, config.getDefaultMethodStringRepresentation().equals(AvroJavaStringRepresentation.STRING) ? AvroJavaStringRepresentation.CHAR_SEQUENCE : AvroJavaStringRepresentation.STRING, - classBuilder); + classBuilder, !config.isUtf8EncodingEnabled()); } // Add public/private fields @@ -485,7 +485,7 @@ protected JavaFile generateSpecificRecord(AvroRecordSchema recordSchema, Specifi // setters classBuilder.addMethod(getSetterMethodSpec(field, config)); MethodSpec overloadedSetterIfString = getOverloadedSetterSpecIfStringField(field, config); - if(overloadedSetterIfString != null) { + if(config.isUtf8EncodingEnabled() && overloadedSetterIfString != null) { classBuilder.addMethod(getOverloadedSetterSpecIfStringField(field, config)); } } @@ -542,22 +542,22 @@ protected JavaFile generateSpecificRecord(AvroRecordSchema recordSchema, Specifi } private void addAllArgsConstructor(AvroRecordSchema recordSchema, - AvroJavaStringRepresentation defaultMethodStringRepresentation, TypeSpec.Builder classBuilder) { + AvroJavaStringRepresentation defaultMethodStringRepresentation, TypeSpec.Builder classBuilder, boolean disableStringTransform) { if(recordSchema.getFields().size() < 254) { MethodSpec.Builder allArgsConstructorBuilder = MethodSpec.constructorBuilder().addModifiers(Modifier.PUBLIC); for (AvroSchemaField field : recordSchema.getFields()) { //if declared schema, use fully qualified class (no import) String escapedFieldName = getFieldNameWithSuffix(field); allArgsConstructorBuilder.addParameter(getParameterSpecForField(field, defaultMethodStringRepresentation)); - if(SpecificRecordGeneratorUtil.isNullUnionOf(AvroType.STRING, field.getSchema())) { + if(!disableStringTransform && SpecificRecordGeneratorUtil.isNullUnionOf(AvroType.STRING, field.getSchema())) { allArgsConstructorBuilder.addStatement( "this.$1L = com.linkedin.avroutil1.compatibility.StringConverterUtil.getUtf8($1L)", escapedFieldName); - } else if (SpecificRecordGeneratorUtil.isListTransformerApplicableForSchema(field.getSchema())) { + } else if (SpecificRecordGeneratorUtil.isListTransformerApplicableForSchema(field.getSchema(), disableStringTransform)) { allArgsConstructorBuilder.addStatement( "this.$1L = com.linkedin.avroutil1.compatibility.collectiontransformer.ListTransformer.convertToUtf8($1L)", escapedFieldName); - } else if (SpecificRecordGeneratorUtil.isMapTransformerApplicable(field.getSchema())) { + } else if (SpecificRecordGeneratorUtil.isMapTransformerApplicable(field.getSchema(), disableStringTransform)) { allArgsConstructorBuilder.addStatement( "this.$1L = com.linkedin.avroutil1.compatibility.collectiontransformer.MapTransformer.convertToUtf8($1L)", escapedFieldName); @@ -570,20 +570,20 @@ private void addAllArgsConstructor(AvroRecordSchema recordSchema, // if union might contain string value in runtime for (SchemaOrRef unionMemberSchema : ((AvroUnionSchema) field.getSchema()).getTypes()) { - if (SpecificRecordGeneratorUtil.isNullUnionOf(AvroType.STRING, unionMemberSchema.getSchema())) { + if (!disableStringTransform && SpecificRecordGeneratorUtil.isNullUnionOf(AvroType.STRING, unionMemberSchema.getSchema())) { allArgsConstructorBuilder.beginControlFlow("else if($1L instanceof $2T)", escapedFieldName, CharSequence.class) .addStatement("this.$1L = com.linkedin.avroutil1.compatibility.StringConverterUtil.getUtf8($1L)", escapedFieldName) .endControlFlow(); } else if (SpecificRecordGeneratorUtil.isListTransformerApplicableForSchema( - unionMemberSchema.getSchema())) { + unionMemberSchema.getSchema(), disableStringTransform)) { allArgsConstructorBuilder.beginControlFlow("else if($1L instanceof $2T)", escapedFieldName, List.class) .addStatement( "this.$1L = com.linkedin.avroutil1.compatibility.collectiontransformer.ListTransformer.convertToUtf8($1L)", escapedFieldName) .endControlFlow(); - } else if (SpecificRecordGeneratorUtil.isMapTransformerApplicable(unionMemberSchema.getSchema())) { + } else if (SpecificRecordGeneratorUtil.isMapTransformerApplicable(unionMemberSchema.getSchema(), disableStringTransform)) { allArgsConstructorBuilder.beginControlFlow("else if($1L instanceof $2T)", escapedFieldName, Map.class) .addStatement( "this.$1L = com.linkedin.avroutil1.compatibility.collectiontransformer.MapTransformer.convertToUtf8($1L)", @@ -641,7 +641,7 @@ private void populateBuilderClassBuilder(TypeSpec.Builder recordBuilder, AvroRec if (fieldClass != null) { fieldBuilder = FieldSpec.builder(fieldClass, escapedFieldName, Modifier.PRIVATE); - if(AvroType.STRING.equals(fieldSchema.type()) || SpecificRecordGeneratorUtil.isNullUnionOf(AvroType.STRING, field.getSchema())) { + if(config.isUtf8EncodingEnabled() && (AvroType.STRING.equals(fieldSchema.type()) || SpecificRecordGeneratorUtil.isNullUnionOf(AvroType.STRING, field.getSchema()))) { buildMethodCodeBlockBuilder.addStatement( "record.$1L = fieldSetFlags()[$2L] ? " + "com.linkedin.avroutil1.compatibility.StringConverterUtil.getUtf8(this.$1L) : " @@ -691,7 +691,7 @@ private void populateBuilderClassBuilder(TypeSpec.Builder recordBuilder, AvroRec SpecificRecordGeneratorUtil.getJavaClassForAvroTypeIfApplicable(AvroType.STRING, config.getDefaultFieldStringRepresentation(), true)) .endControlFlow(); - } else if (SpecificRecordGeneratorUtil.isListTransformerApplicableForSchema(unionMemberSchema.getSchema())) { + } else if (SpecificRecordGeneratorUtil.isListTransformerApplicableForSchema(unionMemberSchema.getSchema(), !config.isUtf8EncodingEnabled())) { buildMethodCodeBlockBuilder.beginControlFlow("else if($1L instanceof $2T)", escapedFieldName, List.class) .addStatement( "record.$1L = fieldSetFlags()[$2L] ? " @@ -700,7 +700,7 @@ private void populateBuilderClassBuilder(TypeSpec.Builder recordBuilder, AvroRec escapedFieldName, fieldIndex, SpecificRecordGeneratorUtil.getTypeName(field.getSchema(), fieldAvroType, true, config.getDefaultFieldStringRepresentation())) .endControlFlow(); - } else if (SpecificRecordGeneratorUtil.isMapTransformerApplicable(unionMemberSchema.getSchema())) { + } else if (SpecificRecordGeneratorUtil.isMapTransformerApplicable(unionMemberSchema.getSchema(), !config.isUtf8EncodingEnabled())) { buildMethodCodeBlockBuilder.beginControlFlow("else if($1L instanceof $2T)", escapedFieldName, Map.class) .addStatement( "record.$1L = fieldSetFlags()[$2L] ? " @@ -1460,7 +1460,7 @@ private void addPutByIndexMethod(TypeSpec.Builder classBuilder, AvroRecordSchema "case $L: this.$L = com.linkedin.avroutil1.compatibility.StringConverterUtil.getUtf8(value); break", fieldIndex++, escapedFieldName); } - } else if (config.isUtf8EncodingInPutByIndexEnabled() && SpecificRecordGeneratorUtil.isListTransformerApplicableForSchema(field.getSchema())) { + } else if (config.isUtf8EncodingInPutByIndexEnabled() && SpecificRecordGeneratorUtil.isListTransformerApplicableForSchema(field.getSchema(),!config.isUtf8EncodingEnabled())) { if (config.getDefaultFieldStringRepresentation().equals(AvroJavaStringRepresentation.STRING)) { switchBuilder.addStatement( "case $1L: this.$2L = ($3T) com.linkedin.avroutil1.compatibility.collectiontransformer.ListTransformer.convertToString(value); break", @@ -1474,7 +1474,7 @@ private void addPutByIndexMethod(TypeSpec.Builder classBuilder, AvroRecordSchema SpecificRecordGeneratorUtil.getTypeName(field.getSchemaOrRef().getSchema(), field.getSchemaOrRef().getSchema().type(), true, config.getDefaultFieldStringRepresentation())); } - } else if (config.isUtf8EncodingInPutByIndexEnabled() && SpecificRecordGeneratorUtil.isMapTransformerApplicable(field.getSchema())) { + } else if (config.isUtf8EncodingInPutByIndexEnabled() && SpecificRecordGeneratorUtil.isMapTransformerApplicable(field.getSchema(),!config.isUtf8EncodingEnabled())) { if (config.getDefaultFieldStringRepresentation().equals(AvroJavaStringRepresentation.STRING)) { switchBuilder.addStatement( "case $1L: this.$2L = ($3T) com.linkedin.avroutil1.compatibility.collectiontransformer.MapTransformer.convertToString(value); break", @@ -1507,7 +1507,7 @@ private void addPutByIndexMethod(TypeSpec.Builder classBuilder, AvroRecordSchema } switchBuilder.endControlFlow(); - } else if (config.isUtf8EncodingInPutByIndexEnabled() && SpecificRecordGeneratorUtil.isListTransformerApplicableForSchema(unionMemberSchema.getSchema())) { + } else if (config.isUtf8EncodingInPutByIndexEnabled() && SpecificRecordGeneratorUtil.isListTransformerApplicableForSchema(unionMemberSchema.getSchema(),!config.isUtf8EncodingEnabled())) { switchBuilder.beginControlFlow("else if($1L instanceof $2T)", escapedFieldName, List.class); if (config.getDefaultFieldStringRepresentation().equals(AvroJavaStringRepresentation.STRING)) { switchBuilder.addStatement( @@ -1522,7 +1522,7 @@ private void addPutByIndexMethod(TypeSpec.Builder classBuilder, AvroRecordSchema } switchBuilder.endControlFlow(); - } else if (config.isUtf8EncodingInPutByIndexEnabled() && SpecificRecordGeneratorUtil.isMapTransformerApplicable(unionMemberSchema.getSchema())) { + } else if (config.isUtf8EncodingInPutByIndexEnabled() && SpecificRecordGeneratorUtil.isMapTransformerApplicable(unionMemberSchema.getSchema(),!config.isUtf8EncodingEnabled())) { switchBuilder.beginControlFlow("else if($1L instanceof $2T)", escapedFieldName, Map.class); if (config.getDefaultFieldStringRepresentation().equals(AvroJavaStringRepresentation.STRING)) { switchBuilder.addStatement( @@ -1570,16 +1570,16 @@ private void addGetByIndexMethod(TypeSpec.Builder classBuilder, AvroRecordSchema switchBuilder.beginControlFlow("switch (field)"); for (AvroSchemaField field : recordSchema.getFields()) { String escapedFieldName = getFieldNameWithSuffix(field); - if (SpecificRecordGeneratorUtil.isNullUnionOf(AvroType.STRING, field.getSchema())) { + if (config.isUtf8EncodingEnabled() && SpecificRecordGeneratorUtil.isNullUnionOf(AvroType.STRING, field.getSchema())) { Class fieldClass = SpecificRecordGeneratorUtil.getJavaClassForAvroTypeIfApplicable(AvroType.STRING, config.getDefaultMethodStringRepresentation(), false); switchBuilder.addStatement("case $L: return com.linkedin.avroutil1.compatibility.StringConverterUtil.get$L(this.$L)", fieldIndex++, fieldClass.getSimpleName(), escapedFieldName); - } else if (SpecificRecordGeneratorUtil.isListTransformerApplicableForSchema(field.getSchema())) { + } else if (SpecificRecordGeneratorUtil.isListTransformerApplicableForSchema(field.getSchema(), !config.isUtf8EncodingEnabled())) { switchBuilder.addStatement( "case $L: return com.linkedin.avroutil1.compatibility.collectiontransformer.ListTransformer.get$LList(this.$L, $L)", fieldIndex++, config.getDefaultMethodStringRepresentation().getJsonValue(), escapedFieldName, - SpecificRecordGeneratorUtil.isCollectionSchemaValuePrimitive(field.getSchema())); - } else if (SpecificRecordGeneratorUtil.isMapTransformerApplicable(field.getSchema())) { + SpecificRecordGeneratorUtil.isCollectionSchemaValuePrimitive(field.getSchema(), !config.isUtf8EncodingEnabled())); + } else if (SpecificRecordGeneratorUtil.isMapTransformerApplicable(field.getSchema(), !config.isUtf8EncodingEnabled())) { switchBuilder.addStatement( "case $L: return com.linkedin.avroutil1.compatibility.collectiontransformer.MapTransformer.get$LMap(this.$L)", fieldIndex++, config.getDefaultMethodStringRepresentation().getJsonValue(), escapedFieldName); @@ -1592,21 +1592,21 @@ private void addGetByIndexMethod(TypeSpec.Builder classBuilder, AvroRecordSchema // if union might contain string value in runtime for (SchemaOrRef unionMemberSchema : ((AvroUnionSchema) field.getSchema()).getTypes()) { - if (SpecificRecordGeneratorUtil.isNullUnionOf(AvroType.STRING, unionMemberSchema.getSchema())) { + if (config.isUtf8EncodingEnabled() && SpecificRecordGeneratorUtil.isNullUnionOf(AvroType.STRING, unionMemberSchema.getSchema())) { switchBuilder.beginControlFlow("else if($1L instanceof $2T)", escapedFieldName, CharSequence.class) .addStatement("return com.linkedin.avroutil1.compatibility.StringConverterUtil.get$1L($2L)", config.getDefaultMethodStringRepresentation().getJsonValue(), escapedFieldName) .endControlFlow(); } else if (SpecificRecordGeneratorUtil.isListTransformerApplicableForSchema( - unionMemberSchema.getSchema())) { + unionMemberSchema.getSchema(),!config.isUtf8EncodingEnabled())) { switchBuilder.beginControlFlow("else if($1L instanceof $2T)", escapedFieldName, List.class) .addStatement( "return com.linkedin.avroutil1.compatibility.collectiontransformer.ListTransformer.get$1LList($2L, $3L)", config.getDefaultMethodStringRepresentation().getJsonValue(), escapedFieldName, - SpecificRecordGeneratorUtil.isCollectionSchemaValuePrimitive(field.getSchema())) + SpecificRecordGeneratorUtil.isCollectionSchemaValuePrimitive(field.getSchema(), !config.isUtf8EncodingEnabled())) .endControlFlow(); - } else if (SpecificRecordGeneratorUtil.isMapTransformerApplicable(unionMemberSchema.getSchema())) { + } else if (SpecificRecordGeneratorUtil.isMapTransformerApplicable(unionMemberSchema.getSchema(),!config.isUtf8EncodingEnabled())) { switchBuilder.beginControlFlow("else if($1L instanceof $2T)", escapedFieldName, Map.class) .addStatement( "return com.linkedin.avroutil1.compatibility.collectiontransformer.MapTransformer.get$1LMap($2L)", @@ -1632,7 +1632,7 @@ private MethodSpec getOverloadedSetterSpecIfStringField(AvroSchemaField field, SpecificRecordGenerationConfig config) { MethodSpec.Builder stringSetter = null; String escapedFieldName = getFieldNameWithSuffix(field); - if (SpecificRecordGeneratorUtil.isNullUnionOf(AvroType.STRING, field.getSchema())) { + if (config.isUtf8EncodingEnabled() && SpecificRecordGeneratorUtil.isNullUnionOf(AvroType.STRING, field.getSchema())) { Class fieldClass = SpecificRecordGeneratorUtil.getJavaClassForAvroTypeIfApplicable(field.getSchemaOrRef().getSchema().type(), config.getDefaultMethodStringRepresentation().equals(AvroJavaStringRepresentation.STRING) @@ -1703,7 +1703,7 @@ private MethodSpec getSetterMethodSpec(AvroSchemaField field, SpecificRecordGene } // false if field type is reference - if (SpecificRecordGeneratorUtil.isNullUnionOf(AvroType.STRING, field.getSchema())) { + if (config.isUtf8EncodingEnabled() && SpecificRecordGeneratorUtil.isNullUnionOf(AvroType.STRING, field.getSchema())) { if (config.getDefaultFieldStringRepresentation().equals(AvroJavaStringRepresentation.STRING)) { methodSpecBuilder.addStatement( "this.$1L = com.linkedin.avroutil1.compatibility.StringConverterUtil.getString($1L)", escapedFieldName); @@ -1712,7 +1712,7 @@ private MethodSpec getSetterMethodSpec(AvroSchemaField field, SpecificRecordGene "this.$1L = com.linkedin.avroutil1.compatibility.StringConverterUtil.getUtf8($1L)", escapedFieldName); } - } else if (SpecificRecordGeneratorUtil.isListTransformerApplicableForSchema(field.getSchema())) { + } else if (SpecificRecordGeneratorUtil.isListTransformerApplicableForSchema(field.getSchema(), !config.isUtf8EncodingEnabled())) { if (config.getDefaultFieldStringRepresentation().equals(AvroJavaStringRepresentation.STRING)) { methodSpecBuilder.addStatement( "this.$1L = ($2T) com.linkedin.avroutil1.compatibility.collectiontransformer.ListTransformer.convertToString($1L)", @@ -1724,7 +1724,7 @@ private MethodSpec getSetterMethodSpec(AvroSchemaField field, SpecificRecordGene escapedFieldName, SpecificRecordGeneratorUtil.getTypeName(field.getSchemaOrRef().getSchema(), field.getSchemaOrRef().getSchema().type(), true, config.getDefaultFieldStringRepresentation())); } - } else if (SpecificRecordGeneratorUtil.isMapTransformerApplicable(field.getSchema())) { + } else if (SpecificRecordGeneratorUtil.isMapTransformerApplicable(field.getSchema(), !config.isUtf8EncodingEnabled())) { if (config.getDefaultFieldStringRepresentation().equals(AvroJavaStringRepresentation.STRING)) { methodSpecBuilder.addStatement( "this.$1L = ($2T) com.linkedin.avroutil1.compatibility.collectiontransformer.MapTransformer.convertToString($1L)", @@ -1743,7 +1743,7 @@ private MethodSpec getSetterMethodSpec(AvroSchemaField field, SpecificRecordGene // if union might contain string value in runtime for (SchemaOrRef unionMemberSchema : ((AvroUnionSchema) field.getSchema()).getTypes()) { - if (SpecificRecordGeneratorUtil.isNullUnionOf(AvroType.STRING, unionMemberSchema.getSchema())) { + if (config.isUtf8EncodingEnabled() && SpecificRecordGeneratorUtil.isNullUnionOf(AvroType.STRING, unionMemberSchema.getSchema())) { methodSpecBuilder.beginControlFlow("else if($1L instanceof $2T)", escapedFieldName, CharSequence.class); if (config.getDefaultFieldStringRepresentation().equals(AvroJavaStringRepresentation.STRING)) { methodSpecBuilder.addStatement( @@ -1754,7 +1754,7 @@ private MethodSpec getSetterMethodSpec(AvroSchemaField field, SpecificRecordGene } methodSpecBuilder.endControlFlow(); - } else if (SpecificRecordGeneratorUtil.isListTransformerApplicableForSchema(unionMemberSchema.getSchema())) { + } else if (SpecificRecordGeneratorUtil.isListTransformerApplicableForSchema(unionMemberSchema.getSchema(), !config.isUtf8EncodingEnabled())) { methodSpecBuilder.beginControlFlow("else if($1L instanceof $2T)", escapedFieldName, List.class); if (config.getDefaultFieldStringRepresentation().equals(AvroJavaStringRepresentation.STRING)) { methodSpecBuilder.addStatement( @@ -1767,7 +1767,7 @@ private MethodSpec getSetterMethodSpec(AvroSchemaField field, SpecificRecordGene } methodSpecBuilder.endControlFlow(); - } else if (SpecificRecordGeneratorUtil.isMapTransformerApplicable(unionMemberSchema.getSchema())) { + } else if (SpecificRecordGeneratorUtil.isMapTransformerApplicable(unionMemberSchema.getSchema(), !config.isUtf8EncodingEnabled())) { methodSpecBuilder.beginControlFlow("else if($1L instanceof $2T)", escapedFieldName, Map.class); if (config.getDefaultFieldStringRepresentation().equals(AvroJavaStringRepresentation.STRING)) { methodSpecBuilder.addStatement( @@ -1813,16 +1813,16 @@ private MethodSpec getGetterMethodSpec(AvroSchemaField field, SpecificRecordGene } // if fieldRepresentation != methodRepresentation for String field // false if field type is reference - if (SpecificRecordGeneratorUtil.isNullUnionOf(AvroType.STRING, field.getSchema())) { + if (config.isUtf8EncodingEnabled() && SpecificRecordGeneratorUtil.isNullUnionOf(AvroType.STRING, field.getSchema())) { methodSpecBuilder.addStatement( "return this.$1L == null ? null : com.linkedin.avroutil1.compatibility.StringConverterUtil.get$2L(this.$1L)", escapedFieldName, config.getDefaultMethodStringRepresentation().getJsonValue()); - } else if (SpecificRecordGeneratorUtil.isListTransformerApplicableForSchema(field.getSchema())) { + } else if (SpecificRecordGeneratorUtil.isListTransformerApplicableForSchema(field.getSchema(), !config.isUtf8EncodingEnabled())) { methodSpecBuilder.addStatement( "return com.linkedin.avroutil1.compatibility.collectiontransformer.ListTransformer.get$LList(this.$L, $L)", config.getDefaultMethodStringRepresentation().getJsonValue(), escapedFieldName, - SpecificRecordGeneratorUtil.isCollectionSchemaValuePrimitive(field.getSchema())); - } else if (SpecificRecordGeneratorUtil.isMapTransformerApplicable(field.getSchema())) { + SpecificRecordGeneratorUtil.isCollectionSchemaValuePrimitive(field.getSchema(), !config.isUtf8EncodingEnabled())); + } else if (SpecificRecordGeneratorUtil.isMapTransformerApplicable(field.getSchema(), !config.isUtf8EncodingEnabled())) { methodSpecBuilder.addStatement( "return com.linkedin.avroutil1.compatibility.collectiontransformer.MapTransformer.get$LMap(this.$L)", config.getDefaultMethodStringRepresentation().getJsonValue(), @@ -1835,19 +1835,19 @@ private MethodSpec getGetterMethodSpec(AvroSchemaField field, SpecificRecordGene // if union might contain string value in runtime for (SchemaOrRef unionMemberSchema : ((AvroUnionSchema) field.getSchema()).getTypes()) { - if (SpecificRecordGeneratorUtil.isNullUnionOf(AvroType.STRING, unionMemberSchema.getSchema())) { + if (config.isUtf8EncodingEnabled() && SpecificRecordGeneratorUtil.isNullUnionOf(AvroType.STRING, unionMemberSchema.getSchema())) { methodSpecBuilder.beginControlFlow("else if($1L instanceof $2T)", escapedFieldName, CharSequence.class) .addStatement("return com.linkedin.avroutil1.compatibility.StringConverterUtil.get$2L($1L)", escapedFieldName, config.getDefaultMethodStringRepresentation().getJsonValue()) .endControlFlow(); - } else if (SpecificRecordGeneratorUtil.isListTransformerApplicableForSchema(unionMemberSchema.getSchema())) { + } else if (SpecificRecordGeneratorUtil.isListTransformerApplicableForSchema(unionMemberSchema.getSchema(),!config.isUtf8EncodingEnabled())) { methodSpecBuilder.beginControlFlow("else if($1L instanceof $2T)", escapedFieldName, List.class) .addStatement( "return com.linkedin.avroutil1.compatibility.collectiontransformer.ListTransformer.get$2LList($1L, $3L)", escapedFieldName, config.getDefaultMethodStringRepresentation().getJsonValue(), - SpecificRecordGeneratorUtil.isCollectionSchemaValuePrimitive(field.getSchema())) + SpecificRecordGeneratorUtil.isCollectionSchemaValuePrimitive(field.getSchema(), !config.isUtf8EncodingEnabled())) .endControlFlow(); - } else if (SpecificRecordGeneratorUtil.isMapTransformerApplicable(unionMemberSchema.getSchema())) { + } else if (SpecificRecordGeneratorUtil.isMapTransformerApplicable(unionMemberSchema.getSchema(), !config.isUtf8EncodingEnabled())) { methodSpecBuilder.beginControlFlow("else if($1L instanceof $2T)", escapedFieldName, Map.class) .addStatement( "return com.linkedin.avroutil1.compatibility.collectiontransformer.MapTransformer.get$2LMap($1L)", diff --git a/avro-codegen/src/main/java/com/linkedin/avroutil1/codegen/SpecificRecordGenerationConfig.java b/avro-codegen/src/main/java/com/linkedin/avroutil1/codegen/SpecificRecordGenerationConfig.java index 54cec2f4c..bc5383a7d 100644 --- a/avro-codegen/src/main/java/com/linkedin/avroutil1/codegen/SpecificRecordGenerationConfig.java +++ b/avro-codegen/src/main/java/com/linkedin/avroutil1/codegen/SpecificRecordGenerationConfig.java @@ -74,6 +74,27 @@ public final static SpecificRecordGenerationConfig getBroadCompatibilitySpecific ); } + public final static SpecificRecordGenerationConfig getBroadCompatibilitySpecificRecordGenerationConfig( + AvroJavaStringRepresentation defaultFieldStringRepresentation, + AvroJavaStringRepresentation defaultMethodStringRepresentation, + AvroVersion minimumSupportedAvroVersion, + boolean utf8EncodingPutByIndex, + boolean utf8EncodingEnabled + ) { + return new SpecificRecordGenerationConfig( + BROAD_COMPATIBILITY.publicFields, + BROAD_COMPATIBILITY.getters, + BROAD_COMPATIBILITY.setters, + BROAD_COMPATIBILITY.builders, + BROAD_COMPATIBILITY.honorStringTypeHints, + defaultFieldStringRepresentation, + defaultMethodStringRepresentation, + minimumSupportedAvroVersion, + utf8EncodingPutByIndex, + utf8EncodingEnabled + ); + } + /** * true to make generated fields public */ @@ -112,6 +133,11 @@ public final static SpecificRecordGenerationConfig getBroadCompatibilitySpecific */ private boolean utf8EncodingPutByIndex; + /** + * should enable utf8Encoding for strings or not. + */ + private boolean utf8EncodingEnabled; + public SpecificRecordGenerationConfig( boolean publicFields, boolean getters, @@ -130,7 +156,8 @@ public SpecificRecordGenerationConfig( this.defaultFieldStringRepresentation = defaultFieldStringRepresentation; this.defaultMethodStringRepresentation = defaultMethodStringRepresentation; this.minimumSupportedAvroVersion = minimumSupportedAvroVersion; - this.utf8EncodingPutByIndex = true; + this.utf8EncodingPutByIndex = true; // Question: Should this also be false and maybe overridden? + this.utf8EncodingEnabled = true; } public SpecificRecordGenerationConfig( @@ -153,6 +180,31 @@ public SpecificRecordGenerationConfig( this.defaultMethodStringRepresentation = defaultMethodStringRepresentation; this.minimumSupportedAvroVersion = minimumSupportedAvroVersion; this.utf8EncodingPutByIndex = utf8EncodingPutByIndex; + this.utf8EncodingEnabled = true; + } + + public SpecificRecordGenerationConfig( + boolean publicFields, + boolean getters, + boolean setters, + boolean builders, + boolean honorStringTypeHints, + AvroJavaStringRepresentation defaultFieldStringRepresentation, + AvroJavaStringRepresentation defaultMethodStringRepresentation, + AvroVersion minimumSupportedAvroVersion, + boolean utf8EncodingPutByIndex, + boolean utf8EncodingEnabled + ) { + this.publicFields = publicFields; + this.getters = getters; + this.setters = setters; + this.builders = builders; + this.honorStringTypeHints = honorStringTypeHints; + this.defaultFieldStringRepresentation = defaultFieldStringRepresentation; + this.defaultMethodStringRepresentation = defaultMethodStringRepresentation; + this.minimumSupportedAvroVersion = minimumSupportedAvroVersion; + this.utf8EncodingPutByIndex = utf8EncodingPutByIndex; + this.utf8EncodingEnabled = utf8EncodingEnabled; } public AvroVersion getMinimumSupportedAvroVersion() { @@ -174,4 +226,8 @@ public AvroJavaStringRepresentation getDefaultMethodStringRepresentation() { public boolean isUtf8EncodingInPutByIndexEnabled() { return utf8EncodingPutByIndex; } + + public boolean isUtf8EncodingEnabled() { + return utf8EncodingEnabled; + } } diff --git a/avro-codegen/src/main/java/com/linkedin/avroutil1/codegen/SpecificRecordGeneratorUtil.java b/avro-codegen/src/main/java/com/linkedin/avroutil1/codegen/SpecificRecordGeneratorUtil.java index e8bbcb014..bd998f737 100644 --- a/avro-codegen/src/main/java/com/linkedin/avroutil1/codegen/SpecificRecordGeneratorUtil.java +++ b/avro-codegen/src/main/java/com/linkedin/avroutil1/codegen/SpecificRecordGeneratorUtil.java @@ -240,12 +240,19 @@ public static boolean isNullUnionOf(AvroType type, AvroSchema schema) { * Handles list , union of list * @return true for List of String and List of Union of String */ - public static boolean isListTransformerApplicableForSchema(AvroSchema schema) { - if(schema == null) return false; + // change this + public static boolean isListTransformerApplicableForSchema(AvroSchema schema, boolean disableStringTransform) { + if (disableStringTransform || schema == null) { + return false; + } return isNullUnionOf(AvroType.ARRAY, schema) && schemaContainsString(schema); } - public static boolean isMapTransformerApplicable(AvroSchema schema) { + // change this + public static boolean isMapTransformerApplicable(AvroSchema schema, boolean disableTransform) { + if (disableTransform) { + return false; + } return isNullUnionOf(AvroType.MAP, schema); } @@ -273,8 +280,8 @@ private static boolean canBeHandledAsPrimitiveType(AvroSchema schema) { * schema type must be either a List of Map (or a null union of List/Map) * @return true if schema value is primitive */ - public static boolean isCollectionSchemaValuePrimitive(AvroSchema schema) { - if(!isListTransformerApplicableForSchema(schema) && !isMapTransformerApplicable(schema)) { + public static boolean isCollectionSchemaValuePrimitive(AvroSchema schema, boolean disableStringTransform) { + if(!isListTransformerApplicableForSchema(schema, disableStringTransform) && !isMapTransformerApplicable(schema, disableStringTransform)) { return false; } if (schema.type().equals(AvroType.UNION)) { diff --git a/settings.gradle b/settings.gradle index b0913dba9..2df9e597f 100644 --- a/settings.gradle +++ b/settings.gradle @@ -81,21 +81,22 @@ include 'avro-builder:tests:codegen-110' include 'avro-builder:tests:codegen-111' include 'avro-builder:tests:codegen-charseq-method' include 'avro-builder:tests:codegen-no-utf8-in-putbyindex' +include 'avro-builder:tests:codegen-no-utf8-encoding' include 'avro-builder:tests:tests-allavro' include 'demos:spotbugs-demo' -include 'fastserde:avro-fastserde' -include 'fastserde:avro-fastserde-jmh' -include 'fastserde:avro-fastserde-tests-common' -include 'fastserde:avro-fastserde-tests14' -include 'fastserde:avro-fastserde-tests15' -include 'fastserde:avro-fastserde-tests16' -include 'fastserde:avro-fastserde-tests17' -include 'fastserde:avro-fastserde-tests18' -include 'fastserde:avro-fastserde-tests19' -include 'fastserde:avro-fastserde-tests110' -include 'fastserde:avro-fastserde-tests111' +//include 'fastserde:avro-fastserde' +//include 'fastserde:avro-fastserde-jmh' +//include 'fastserde:avro-fastserde-tests-common' +//include 'fastserde:avro-fastserde-tests14' +//include 'fastserde:avro-fastserde-tests15' +//include 'fastserde:avro-fastserde-tests16' +//include 'fastserde:avro-fastserde-tests17' +//include 'fastserde:avro-fastserde-tests18' +//include 'fastserde:avro-fastserde-tests19' +//include 'fastserde:avro-fastserde-tests110' +//include 'fastserde:avro-fastserde-tests111' gradleEnterprise { buildScan { From 4257bb122ac2c0e1b952c9caf34808e3dbe76896 Mon Sep 17 00:00:00 2001 From: jsolanke Date: Fri, 24 Jan 2025 11:45:57 -0800 Subject: [PATCH 2/7] Cleanup --- .../avroutil1/builder/SchemaBuilder.java | 1 - avro-builder/tests/codegen-110/build.gradle | 3 +- .../codegen-no-utf8-encoding/build.gradle | 65 ------------------- .../avroutil1/builder/SpecificRecordTest.java | 31 --------- settings.gradle | 22 +++---- 5 files changed, 12 insertions(+), 110 deletions(-) diff --git a/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java b/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java index 0197372a5..e4f4c2ddb 100644 --- a/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java +++ b/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java @@ -52,7 +52,6 @@ public static void main(String[] args) throws Exception { long optionParseStart = System.currentTimeMillis(); OptionParser parser = new OptionParser(); - // ADD NEW OPTION HERE(1 option) OptionSpec inputOpt = parser.accepts("input", "Schema or directory of schemas to compile [REQUIRED]") .withRequiredArg().required() .describedAs("file"); diff --git a/avro-builder/tests/codegen-110/build.gradle b/avro-builder/tests/codegen-110/build.gradle index 4f8788e80..742492946 100644 --- a/avro-builder/tests/codegen-110/build.gradle +++ b/avro-builder/tests/codegen-110/build.gradle @@ -43,8 +43,7 @@ task runOwnCodegen { "--input", "$projectDir/src/main/avro", "--output", "$buildDir/generated/sources/avro/java/main", "--generator", "AVRO_UTIL", - "--minAvroVer", "1.4", - "--enableUtf8Encoding", "true" + "--minAvroVer", "1.4" ] } } diff --git a/avro-builder/tests/codegen-no-utf8-encoding/build.gradle b/avro-builder/tests/codegen-no-utf8-encoding/build.gradle index d4c0ad921..ee1028d06 100644 --- a/avro-builder/tests/codegen-no-utf8-encoding/build.gradle +++ b/avro-builder/tests/codegen-no-utf8-encoding/build.gradle @@ -66,68 +66,3 @@ dependencies { //required because generated code depends on the helper implementation project(":helper:helper") } - -// -// plugins { -// id "java-library" -// } -// -// configurations { -// codegen -// } -// -// sourceSets { -// main { -// java { -// srcDir 'src/main/java' -// srcDir "$buildDir/generated/sources/avro/java/main" -// } -// resources { -// srcDirs = [ -// "src/main/avro" -// ] -// } -// } -// } -// -// dependencies { -// codegen project(":avro-builder:builder") -// } -// -// task runOwnCodegen { -// description = 'generate specific classes using own codegen utility' -// -// dependsOn configurations.codegen -// -// doLast { -// javaexec { -// classpath configurations.codegen -// main = 'com.linkedin.avroutil1.builder.SchemaBuilder' -// args = [ -// "--input", "$projectDir/src/main/avro", -// "--output", "$buildDir/generated/sources/avro/java/main", -// "--generator", "AVRO_UTIL", -// "--stringRepresentation", "CharSequence", -// "--methodStringRepresentation", "CharSequence", -// "--enableUtf8Encoding", "false" -// ] -// } -// } -// } -// -// compileJava.dependsOn runOwnCodegen -// -// dependencies { -// codegen project(":avro-builder:builder") -// -// implementation ("org.apache.avro:avro:1.4.1") { -// exclude group: "org.mortbay.jetty" -// exclude group: "org.apache.velocity" -// exclude group: "commons-lang" -// exclude group: "org.jboss.netty" -// exclude group: "com.thoughtworks.paranamer", module: "paranamer-ant" -// } -// //required because generated code depends on the helper -// implementation project(":helper:helper") -// } -//} \ No newline at end of file diff --git a/avro-builder/tests/tests-allavro/src/test/java/com/linkedin/avroutil1/builder/SpecificRecordTest.java b/avro-builder/tests/tests-allavro/src/test/java/com/linkedin/avroutil1/builder/SpecificRecordTest.java index 956adee7a..f8cbdb9bf 100644 --- a/avro-builder/tests/tests-allavro/src/test/java/com/linkedin/avroutil1/builder/SpecificRecordTest.java +++ b/avro-builder/tests/tests-allavro/src/test/java/com/linkedin/avroutil1/builder/SpecificRecordTest.java @@ -1891,37 +1891,6 @@ public void modifiablePrimitiveCollectionTest() { Assert.assertEquals((int) instance.intAr.get(instance.getIntAr().size() - 1), Integer.MAX_VALUE); } - @Test - public void modifiablePrimitiveCollectionTestNot() { - String tba = "NewElement"; - RandomRecordGenerator generator = new RandomRecordGenerator(); - charseqmethod.TestCollections instance = - generator.randomSpecific(charseqmethod.TestCollections.class, RecordGenerationConfig.newConfig().withAvoidNulls(true)); - - // array of string - instance.getStrAr().add(tba); - Assert.assertTrue(instance.getStrAr().contains(tba)); - Assert.assertTrue(instance.strAr.contains(new Utf8(tba))); - - // union[null, List] - instance.getUnionOfArray().add(tba); - Assert.assertTrue(instance.getUnionOfArray().contains(tba)); - Assert.assertTrue(instance.unionOfArray.contains(new Utf8(tba))); - - // array (union[null, string]) - instance.getArOfUnionOfStr().add(tba); - Assert.assertTrue(instance.getArOfUnionOfStr().contains(tba)); - Assert.assertTrue(instance.arOfUnionOfStr.contains(new Utf8(tba))); - - - // Union (null, Map) - Assert.assertThrows(UnsupportedOperationException.class, () -> instance.getUnionOfMap().put("key1", tba)); - - instance.getIntAr().add(Integer.MAX_VALUE); - Assert.assertEquals((int) instance.getIntAr().get(instance.getIntAr().size() - 1), Integer.MAX_VALUE); - Assert.assertEquals((int) instance.intAr.get(instance.getIntAr().size() - 1), Integer.MAX_VALUE); - } - @Test public void modifiablePrimitiveCollectionTestForCharSeq() { String tba = "NewElement"; diff --git a/settings.gradle b/settings.gradle index 2df9e597f..794ee3108 100644 --- a/settings.gradle +++ b/settings.gradle @@ -86,17 +86,17 @@ include 'avro-builder:tests:tests-allavro' include 'demos:spotbugs-demo' -//include 'fastserde:avro-fastserde' -//include 'fastserde:avro-fastserde-jmh' -//include 'fastserde:avro-fastserde-tests-common' -//include 'fastserde:avro-fastserde-tests14' -//include 'fastserde:avro-fastserde-tests15' -//include 'fastserde:avro-fastserde-tests16' -//include 'fastserde:avro-fastserde-tests17' -//include 'fastserde:avro-fastserde-tests18' -//include 'fastserde:avro-fastserde-tests19' -//include 'fastserde:avro-fastserde-tests110' -//include 'fastserde:avro-fastserde-tests111' +include 'fastserde:avro-fastserde' +include 'fastserde:avro-fastserde-jmh' +include 'fastserde:avro-fastserde-tests-common' +include 'fastserde:avro-fastserde-tests14' +include 'fastserde:avro-fastserde-tests15' +include 'fastserde:avro-fastserde-tests16' +include 'fastserde:avro-fastserde-tests17' +include 'fastserde:avro-fastserde-tests18' +include 'fastserde:avro-fastserde-tests19' +include 'fastserde:avro-fastserde-tests110' +include 'fastserde:avro-fastserde-tests111' gradleEnterprise { buildScan { From 4402dcbf89dcd54ba474f3e7bb1ed0c62e756172 Mon Sep 17 00:00:00 2001 From: jsolanke Date: Fri, 24 Jan 2025 11:54:47 -0800 Subject: [PATCH 3/7] Cleanup of comments --- .../linkedin/avroutil1/codegen/SpecificRecordGeneratorUtil.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/avro-codegen/src/main/java/com/linkedin/avroutil1/codegen/SpecificRecordGeneratorUtil.java b/avro-codegen/src/main/java/com/linkedin/avroutil1/codegen/SpecificRecordGeneratorUtil.java index bd998f737..e5eb32e57 100644 --- a/avro-codegen/src/main/java/com/linkedin/avroutil1/codegen/SpecificRecordGeneratorUtil.java +++ b/avro-codegen/src/main/java/com/linkedin/avroutil1/codegen/SpecificRecordGeneratorUtil.java @@ -240,7 +240,6 @@ public static boolean isNullUnionOf(AvroType type, AvroSchema schema) { * Handles list , union of list * @return true for List of String and List of Union of String */ - // change this public static boolean isListTransformerApplicableForSchema(AvroSchema schema, boolean disableStringTransform) { if (disableStringTransform || schema == null) { return false; @@ -248,7 +247,6 @@ public static boolean isListTransformerApplicableForSchema(AvroSchema schema, bo return isNullUnionOf(AvroType.ARRAY, schema) && schemaContainsString(schema); } - // change this public static boolean isMapTransformerApplicable(AvroSchema schema, boolean disableTransform) { if (disableTransform) { return false; From 417e5a94896026a91c42c55b62a792df3093c497 Mon Sep 17 00:00:00 2001 From: jsolanke Date: Fri, 24 Jan 2025 12:44:31 -0800 Subject: [PATCH 4/7] Cleanup of stale comments --- .../avroutil1/codegen/SpecificRecordGenerationConfig.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/avro-codegen/src/main/java/com/linkedin/avroutil1/codegen/SpecificRecordGenerationConfig.java b/avro-codegen/src/main/java/com/linkedin/avroutil1/codegen/SpecificRecordGenerationConfig.java index bc5383a7d..ebd6b213e 100644 --- a/avro-codegen/src/main/java/com/linkedin/avroutil1/codegen/SpecificRecordGenerationConfig.java +++ b/avro-codegen/src/main/java/com/linkedin/avroutil1/codegen/SpecificRecordGenerationConfig.java @@ -156,7 +156,7 @@ public SpecificRecordGenerationConfig( this.defaultFieldStringRepresentation = defaultFieldStringRepresentation; this.defaultMethodStringRepresentation = defaultMethodStringRepresentation; this.minimumSupportedAvroVersion = minimumSupportedAvroVersion; - this.utf8EncodingPutByIndex = true; // Question: Should this also be false and maybe overridden? + this.utf8EncodingPutByIndex = true; this.utf8EncodingEnabled = true; } From 2101084eba096708ec998db37cb7f33b9e680105 Mon Sep 17 00:00:00 2001 From: Jairaj Solanke Date: Tue, 28 Jan 2025 01:14:19 +0000 Subject: [PATCH 5/7] Resolved comments. PTAL. --- .../com/linkedin/avroutil1/builder/SchemaBuilder.java | 6 ++++-- .../ArrayOfStringRecord.avsc | 0 .../HasNoSimpleString.avsc | 0 .../TestCollections.avsc | 0 .../avroutil1/builder/SpecificRecordTest.java | 11 +++++++---- 5 files changed, 11 insertions(+), 6 deletions(-) rename avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/{charseqmethod => noUtf8Encoding}/ArrayOfStringRecord.avsc (100%) rename avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/{charseqmethod => noUtf8Encoding}/HasNoSimpleString.avsc (100%) rename avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/{charseqmethod => noUtf8Encoding}/TestCollections.avsc (100%) diff --git a/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java b/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java index e4f4c2ddb..24fae0b6c 100644 --- a/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java +++ b/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java @@ -115,7 +115,7 @@ public static void main(String[] args) throws Exception { .defaultsTo("false") .describedAs("true/false"); - OptionSpec enableUtf8Encoding = parser.accepts("enableUtf8Encoding", "adds codegen of UTF8 type for strings.") + OptionSpec enableUtf8Encoding = parser.accepts("enableUtf8Encoding", "enable encoding strings to their utf8 values throughout generated code.") .withOptionalArg() .defaultsTo("true") .describedAs("true/false"); @@ -248,7 +248,9 @@ public static void main(String[] args) throws Exception { if (options.has(enableUtf8Encoding)) { String value = options.valueOf(enableUtf8Encoding); handleUtf8Encoding = Boolean.TRUE.equals(Boolean.parseBoolean(value)); - handleUtf8EncodingInPutByIndex = handleUtf8Encoding; + if (methodStringRepresentation.equals(StringRepresentation.CharSeq) && stringRepresentation.equals(StringRepresentation.CharSeq)) { + handleUtf8EncodingInPutByIndex = handleUtf8Encoding; + } } //allow plugins to parse and validate their own added options diff --git a/avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/charseqmethod/ArrayOfStringRecord.avsc b/avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/noUtf8Encoding/ArrayOfStringRecord.avsc similarity index 100% rename from avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/charseqmethod/ArrayOfStringRecord.avsc rename to avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/noUtf8Encoding/ArrayOfStringRecord.avsc diff --git a/avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/charseqmethod/HasNoSimpleString.avsc b/avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/noUtf8Encoding/HasNoSimpleString.avsc similarity index 100% rename from avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/charseqmethod/HasNoSimpleString.avsc rename to avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/noUtf8Encoding/HasNoSimpleString.avsc diff --git a/avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/charseqmethod/TestCollections.avsc b/avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/noUtf8Encoding/TestCollections.avsc similarity index 100% rename from avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/charseqmethod/TestCollections.avsc rename to avro-builder/tests/codegen-no-utf8-encoding/src/main/avro/noUtf8Encoding/TestCollections.avsc diff --git a/avro-builder/tests/tests-allavro/src/test/java/com/linkedin/avroutil1/builder/SpecificRecordTest.java b/avro-builder/tests/tests-allavro/src/test/java/com/linkedin/avroutil1/builder/SpecificRecordTest.java index f8cbdb9bf..eee98abf8 100644 --- a/avro-builder/tests/tests-allavro/src/test/java/com/linkedin/avroutil1/builder/SpecificRecordTest.java +++ b/avro-builder/tests/tests-allavro/src/test/java/com/linkedin/avroutil1/builder/SpecificRecordTest.java @@ -1976,21 +1976,23 @@ public void testNoUtf8Encoding() throws IOException { noutf8encoding.TestCollections instance = generator.randomSpecific(noutf8encoding.TestCollections.class, RecordGenerationConfig.newConfig().withAvoidNulls(true)); - // fields should contain String values + // String fields should contain CharSequence values Assert.assertTrue(instance.str instanceof CharSequence); - // getter should return String values + // String getter should return CharSequence Assert.assertTrue(instance.getStr() instanceof CharSequence); - // Set String and Get String + // Setting and getting String type or Utf8 type values, both should work on CharSequence fields. + // Verifies direct field access and through getter, both works. String strValue = "strValue"; Utf8 utf8Value = new Utf8("utf8Value"); instance.getStrAr().add(strValue); instance.getStrAr().add(utf8Value); - //instance + Assert.assertTrue(instance.getStrAr().contains(strValue)); Assert.assertTrue(instance.strAr.contains(strValue)); + Assert.assertTrue(instance.getStrAr().contains(utf8Value)); Assert.assertTrue(instance.strAr.contains(utf8Value)); @@ -1998,6 +2000,7 @@ public void testNoUtf8Encoding() throws IOException { instance.getStrArAr().add(Arrays.asList(strValue)); Assert.assertTrue(instance.getStrArAr().get(instance.getStrArAr().size() - 1).contains(strValue)); Assert.assertTrue(instance.strArAr.get(instance.getStrArAr().size() - 1).contains(strValue)); + instance.getStrArAr().add(Arrays.asList(utf8Value)); Assert.assertTrue(instance.getStrArAr().get(instance.getStrArAr().size() - 1).contains(utf8Value)); Assert.assertTrue(instance.strArAr.get(instance.getStrArAr().size() - 1).contains(utf8Value)); From 99a9d08d745e1797015d4382b69ec1f5a301b5b6 Mon Sep 17 00:00:00 2001 From: jsolanke Date: Mon, 27 Jan 2025 23:36:44 -0800 Subject: [PATCH 6/7] Fixing build error. --- .../main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java b/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java index 24fae0b6c..94441f2d2 100644 --- a/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java +++ b/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java @@ -248,7 +248,7 @@ public static void main(String[] args) throws Exception { if (options.has(enableUtf8Encoding)) { String value = options.valueOf(enableUtf8Encoding); handleUtf8Encoding = Boolean.TRUE.equals(Boolean.parseBoolean(value)); - if (methodStringRepresentation.equals(StringRepresentation.CharSeq) && stringRepresentation.equals(StringRepresentation.CharSeq)) { + if (methodStringRepresentation.equals(StringRepresentation.CharSequence) && stringRepresentation.equals(StringRepresentation.CharSequence)) { handleUtf8EncodingInPutByIndex = handleUtf8Encoding; } } From 4b501bc9715b371b37bf76a1fcf8f035d17e9804 Mon Sep 17 00:00:00 2001 From: jsolanke Date: Tue, 28 Jan 2025 12:47:48 -0800 Subject: [PATCH 7/7] Throws an exception if utf8Encoding is set to true without setting methodStringRepresentation and stringRepresentation to CharSequence. --- .../linkedin/avroutil1/builder/SchemaBuilder.java | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java b/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java index 94441f2d2..9dbe02e78 100644 --- a/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java +++ b/avro-builder/builder/src/main/java/com/linkedin/avroutil1/builder/SchemaBuilder.java @@ -115,7 +115,9 @@ public static void main(String[] args) throws Exception { .defaultsTo("false") .describedAs("true/false"); - OptionSpec enableUtf8Encoding = parser.accepts("enableUtf8Encoding", "enable encoding strings to their utf8 values throughout generated code.") + OptionSpec enableUtf8Encoding = parser.accepts("enableUtf8Encoding", + "enable encoding strings to their utf8 values throughout generated code. This overrides enableUtf8EncodingInPutByIndex." + + "Requires --stringRepresentation and --methodStringRepresentation to be CharSequence") .withOptionalArg() .defaultsTo("true") .describedAs("true/false"); @@ -248,8 +250,14 @@ public static void main(String[] args) throws Exception { if (options.has(enableUtf8Encoding)) { String value = options.valueOf(enableUtf8Encoding); handleUtf8Encoding = Boolean.TRUE.equals(Boolean.parseBoolean(value)); - if (methodStringRepresentation.equals(StringRepresentation.CharSequence) && stringRepresentation.equals(StringRepresentation.CharSequence)) { - handleUtf8EncodingInPutByIndex = handleUtf8Encoding; + if (handleUtf8Encoding) { + if (methodStringRepresentation.equals(StringRepresentation.CharSequence) && stringRepresentation.equals( + StringRepresentation.CharSequence)) { + handleUtf8EncodingInPutByIndex = handleUtf8Encoding; + } else { + throw new IllegalArgumentException("--methodStringRepresentation and --stringRepresentation both need to be" + + " CharSequence for enableUtf8Encoding to be supported."); + } } }