From 5e0b6d6ca31bcfb70eec129eba24d53ea9019d03 Mon Sep 17 00:00:00 2001 From: James McMullan Date: Mon, 1 Apr 2024 15:30:15 -0400 Subject: [PATCH] HPCC4J-587 Character handling test failure (#701) - Modified test to ignore non-defined unicode chars - Modified test to discard invalid surrogate pairs - Modified fixed strings to have enough capacity for multi-char codepoints Signed-off-by: James McMullan James.McMullan@lexisnexis.com Signed-off-by: James McMullan James.McMullan@lexisnexis.com --- .../dfs/client/DFSReadWriteTest.java | 51 +++++++++++++++---- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/dfsclient/src/test/java/org/hpccsystems/dfs/client/DFSReadWriteTest.java b/dfsclient/src/test/java/org/hpccsystems/dfs/client/DFSReadWriteTest.java index 9ea4ea0a5..896fa35c0 100644 --- a/dfsclient/src/test/java/org/hpccsystems/dfs/client/DFSReadWriteTest.java +++ b/dfsclient/src/test/java/org/hpccsystems/dfs/client/DFSReadWriteTest.java @@ -92,25 +92,43 @@ public void readWithForcedTimeoutTest() throws Exception public void nullCharTests() throws Exception { // Unicode + boolean unicodePassed = true; { FieldDef recordDef = null; { FieldDef[] fieldDefs = new FieldDef[2]; fieldDefs[0] = new FieldDef("uni", FieldType.STRING, "STRING", 100, false, false, HpccSrcType.UTF16LE, new FieldDef[0]); - fieldDefs[1] = new FieldDef("fixedUni", FieldType.STRING, "STRING", 100, true, false, HpccSrcType.UTF16LE, new FieldDef[0]); + fieldDefs[1] = new FieldDef("fixedUni", FieldType.STRING, "STRING", 200, true, false, HpccSrcType.UTF16LE, new FieldDef[0]); recordDef = new FieldDef("RootRecord", FieldType.RECORD, "rec", 4, false, false, HpccSrcType.LITTLE_ENDIAN, fieldDefs); } List records = new ArrayList(); int maxUTF16BMPChar = Character.MAX_CODE_POINT; - for (int i = 0; i < maxUTF16BMPChar; i++) { + for (int i = 0; i < maxUTF16BMPChar; i++) + { + String strMidEOS = ""; - for (int j = 0; j < 98; j++, i++) { - if (j == 50) { + for (int j = 0; j < 98; j++, i++) + { + if (!Character.isValidCodePoint(i) || !Character.isDefined(i)) + { + continue; + } + + char[] chars = Character.toChars(i); + if (Character.isSurrogate(chars[0])) + { + continue; + } + + if (j == 50 && strMidEOS.length() > 0) + { strMidEOS += "\0"; } - strMidEOS += Character.toString((char) i); + + String charStr = new String(chars); + strMidEOS += charStr; } Object[] fields = {strMidEOS, strMidEOS}; @@ -123,17 +141,20 @@ public void nullCharTests() throws Exception HPCCFile file = new HPCCFile(fileName, connString , hpccUser, hpccPass); List readRecords = readFile(file, 10000, false, false, BinaryRecordReader.TRIM_STRINGS); - for (int i = 0; i < records.size(); i++) { + for (int i = 0; i < records.size(); i++) + { HPCCRecord record = records.get(i); HPCCRecord readRecord = readRecords.get(i); if (readRecord.equals(record) == false) { System.out.println("Record: " + i + " did not match\n" + record + "\n" + readRecord); + unicodePassed = false; } } } // SBC / ASCII + boolean sbcPassed = true; { FieldDef recordDef = null; { @@ -145,13 +166,16 @@ public void nullCharTests() throws Exception } List records = new ArrayList(); - for (int i = 0; i < 255; i++) { + for (int i = 0; i < 255; i++) + { String strMidEOS = ""; - for (int j = 0; j < 9; j++, i++) { - if (j == 5) { + for (int j = 0; j < 9; j++, i++) + { + if (j == 5) + { strMidEOS += "\0"; } - strMidEOS += Character.toString((char) i); + strMidEOS += new String(Character.toChars(j)); } Object[] fields = {strMidEOS, strMidEOS}; @@ -164,15 +188,20 @@ public void nullCharTests() throws Exception HPCCFile file = new HPCCFile(fileName, connString , hpccUser, hpccPass); List readRecords = readFile(file, 10000, false, false, BinaryRecordReader.TRIM_STRINGS); - for (int i = 0; i < records.size(); i++) { + for (int i = 0; i < records.size(); i++) + { HPCCRecord record = records.get(i); HPCCRecord readRecord = readRecords.get(i); if (readRecord.equals(record) == false) { System.out.println("Record: " + i + " did not match\n" + record + "\n" + readRecord); + sbcPassed = false; } } } + + assertTrue("Unicode EOS character test failed. See mismatches above.", unicodePassed); + assertTrue("Single byte EOS character test failed. See mismatches above.", sbcPassed); } @Test