From 289b43340e852305e338ebf77a9fa5e83e63e298 Mon Sep 17 00:00:00 2001 From: Shounak kulkarni Date: Thu, 2 Jan 2025 10:01:12 +0530 Subject: [PATCH 1/4] Avoid handling JSON_ARRAY as multi value JSON during transformation --- .../pinot/plugin/inputformat/json/JSONRecordExtractor.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pinot-plugins/pinot-input-format/pinot-json/src/main/java/org/apache/pinot/plugin/inputformat/json/JSONRecordExtractor.java b/pinot-plugins/pinot-input-format/pinot-json/src/main/java/org/apache/pinot/plugin/inputformat/json/JSONRecordExtractor.java index 01e73f5f4041..a770e5267b0a 100644 --- a/pinot-plugins/pinot-input-format/pinot-json/src/main/java/org/apache/pinot/plugin/inputformat/json/JSONRecordExtractor.java +++ b/pinot-plugins/pinot-input-format/pinot-json/src/main/java/org/apache/pinot/plugin/inputformat/json/JSONRecordExtractor.java @@ -67,4 +67,10 @@ public GenericRow extract(Map from, GenericRow to) { } return to; } + + @Override + protected boolean isMultiValue(Object value) { + // multi value JSON is treated as JSON_ARRAY hence it's never a multi value column + return false; + } } From 1999aee76ca0fbb60470a8526fe26025e54b60a4 Mon Sep 17 00:00:00 2001 From: Shounak kulkarni Date: Thu, 2 Jan 2025 11:45:46 +0530 Subject: [PATCH 2/4] Revert "Avoid handling JSON_ARRAY as multi value JSON during transformation" This reverts commit 289b43340e852305e338ebf77a9fa5e83e63e298. --- .../pinot/plugin/inputformat/json/JSONRecordExtractor.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pinot-plugins/pinot-input-format/pinot-json/src/main/java/org/apache/pinot/plugin/inputformat/json/JSONRecordExtractor.java b/pinot-plugins/pinot-input-format/pinot-json/src/main/java/org/apache/pinot/plugin/inputformat/json/JSONRecordExtractor.java index a770e5267b0a..01e73f5f4041 100644 --- a/pinot-plugins/pinot-input-format/pinot-json/src/main/java/org/apache/pinot/plugin/inputformat/json/JSONRecordExtractor.java +++ b/pinot-plugins/pinot-input-format/pinot-json/src/main/java/org/apache/pinot/plugin/inputformat/json/JSONRecordExtractor.java @@ -67,10 +67,4 @@ public GenericRow extract(Map from, GenericRow to) { } return to; } - - @Override - protected boolean isMultiValue(Object value) { - // multi value JSON is treated as JSON_ARRAY hence it's never a multi value column - return false; - } } From 694f0ba7668e530231bd4d63d7ca886a3eb7bf16 Mon Sep 17 00:00:00 2001 From: Shounak kulkarni Date: Thu, 2 Jan 2025 12:32:58 +0530 Subject: [PATCH 3/4] handle empty JSON array during transformation --- .../pinot/common/utils/PinotDataTypeTest.java | 17 +++++++++++++++++ .../recordtransformer/DataTypeTransformer.java | 7 ++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/pinot-common/src/test/java/org/apache/pinot/common/utils/PinotDataTypeTest.java b/pinot-common/src/test/java/org/apache/pinot/common/utils/PinotDataTypeTest.java index 245ea7235dc5..416f258eee0d 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/utils/PinotDataTypeTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/utils/PinotDataTypeTest.java @@ -18,11 +18,13 @@ */ package org.apache.pinot.common.utils; +import com.fasterxml.jackson.core.JsonProcessingException; import java.math.BigDecimal; import java.sql.Timestamp; import java.util.Arrays; import java.util.HashMap; import java.util.Map; +import org.apache.pinot.spi.utils.JsonUtils; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -220,6 +222,21 @@ public void testJSON() { assertEquals(JSON.convert(new Timestamp(1620324238610L), TIMESTAMP), "1620324238610"); } + @Test + public void testJSONArray() + throws JsonProcessingException { + assertEquals(JSON.convert(new Object[]{false}, BOOLEAN), "[false]"); + assertEquals(JSON.convert(new Object[]{true}, BOOLEAN), "[true]"); // Base64 encoding. + assertEquals(JSON.convert(new Object[]{ + JsonUtils.stringToObject("{\"bytes\":\"AAE=\"}", Map.class), + JsonUtils.stringToObject("{\"map\":{\"key1\":\"value\",\"key2\":null,\"array\":[-5.4,4,\"2\"]}}", Map.class), + JsonUtils.stringToObject("{\"timestamp\":1620324238610}", Map.class)}, JSON), + "[{\"bytes\":\"AAE=\"},{\"map\":{\"key1\":\"value\",\"key2\":null,\"array\":[-5.4,4,\"2\"]}}," + + "{\"timestamp\":1620324238610}]"); + assertEquals(JSON.convert(new Object[]{}, JSON), "[]"); + assertEquals(JSON.convert(new Object[]{new Timestamp(1620324238610L)}, TIMESTAMP), "[1620324238610]"); + } + @Test public void testObject() { assertEquals(OBJECT.toInt(new NumberObject("123")), 123); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/DataTypeTransformer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/DataTypeTransformer.java index 65019549ece2..df1722b78f1d 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/DataTypeTransformer.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/DataTypeTransformer.java @@ -94,7 +94,12 @@ public GenericRow transform(GenericRow record) { if (value instanceof Object[]) { // Multi-value column Object[] values = (Object[]) value; - source = PinotDataType.getMultiValueType(values[0].getClass()); + // JSON is not standardised for empty json array + if (dest == PinotDataType.JSON && values.length == 0) { + source = PinotDataType.JSON; + } else { + source = PinotDataType.getMultiValueType(values[0].getClass()); + } } else { // Single-value column source = PinotDataType.getSingleValueType(value.getClass()); From 5bed8c2620e40bd580d9cca7aa1a55fef9d80acf Mon Sep 17 00:00:00 2001 From: Shounak kulkarni Date: Thu, 2 Jan 2025 12:36:06 +0530 Subject: [PATCH 4/4] cosmetic --- .../java/org/apache/pinot/common/utils/PinotDataTypeTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pinot-common/src/test/java/org/apache/pinot/common/utils/PinotDataTypeTest.java b/pinot-common/src/test/java/org/apache/pinot/common/utils/PinotDataTypeTest.java index 416f258eee0d..47807d674b6f 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/utils/PinotDataTypeTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/utils/PinotDataTypeTest.java @@ -229,7 +229,8 @@ public void testJSONArray() assertEquals(JSON.convert(new Object[]{true}, BOOLEAN), "[true]"); // Base64 encoding. assertEquals(JSON.convert(new Object[]{ JsonUtils.stringToObject("{\"bytes\":\"AAE=\"}", Map.class), - JsonUtils.stringToObject("{\"map\":{\"key1\":\"value\",\"key2\":null,\"array\":[-5.4,4,\"2\"]}}", Map.class), + JsonUtils.stringToObject("{\"map\":{\"key1\":\"value\",\"key2\":null,\"array\":[-5.4,4,\"2\"]}}", + Map.class), JsonUtils.stringToObject("{\"timestamp\":1620324238610}", Map.class)}, JSON), "[{\"bytes\":\"AAE=\"},{\"map\":{\"key1\":\"value\",\"key2\":null,\"array\":[-5.4,4,\"2\"]}}," + "{\"timestamp\":1620324238610}]");