Skip to content

Commit

Permalink
Improve reported dataTypes on V1 when no rows returned (#14836)
Browse files Browse the repository at this point in the history
V1 just uses STRING as default data type for all columns when no data has been returned by servers.

This is an attempt to improve that by different strategies, being the preferred one use the schema validation implemented on V2 and reuse the data types determined by it. As a failback and only suitable for non-transformed fields, the base table schema is used. Otherwise, the default STRING type is still returned.
  • Loading branch information
albertobastos authored Jan 21, 2025
1 parent 701cc1e commit d819d8a
Show file tree
Hide file tree
Showing 6 changed files with 403 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -837,6 +837,10 @@ protected BrokerResponse handleRequest(long requestId, String query, SqlNodeAndO
1);
}

// server returns STRING as default dataType for all columns in (some) scenarios where no rows are returned
// this is an attempt to return more faithful information based on other sources
ParserUtils.fillEmptyResponseSchema(brokerResponse, _tableCache, schema, database, query);

// Set total query processing time
long totalTimeMs = System.currentTimeMillis() - requestContext.getRequestArrivalTimeMillis();
brokerResponse.setTimeUsedMs(totalTimeMs);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,10 @@ public boolean isWholeNumberArray() {
return INTEGRAL_ARRAY_TYPES.contains(this);
}

public boolean isUnknown() {
return UNKNOWN.equals(this);
}

public boolean isCompatible(ColumnDataType anotherColumnDataType) {
// All numbers are compatible with each other
return this == anotherColumnDataType || (this.isNumber() && anotherColumnDataType.isNumber()) || (
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.integration.tests;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.helix.model.HelixConfigScope;
import org.apache.helix.model.IdealState;
import org.apache.helix.model.builder.HelixConfigScopeBuilder;
import org.apache.pinot.common.utils.ServiceStatus;
import org.apache.pinot.spi.config.table.FieldConfig;
import org.apache.pinot.spi.config.table.FieldConfig.CompressionCodec;
import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.config.table.TableType;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.utils.CommonConstants;
import org.apache.pinot.spi.utils.InstanceTypeUtils;
import org.apache.pinot.spi.utils.builder.TableNameBuilder;
import org.apache.pinot.util.TestUtils;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;

import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertTrue;


/**
* Integration test that checks data types for queries with no rows returned.
*/
public class EmptyResponseIntegrationTest extends BaseClusterIntegrationTestSet {
private static final int NUM_BROKERS = 1;
private static final int NUM_SERVERS = 1;

private final List<ServiceStatus.ServiceStatusCallback> _serviceStatusCallbacks =
new ArrayList<>(getNumBrokers() + getNumServers());

protected int getNumBrokers() {
return NUM_BROKERS;
}

protected int getNumServers() {
return NUM_SERVERS;
}

@Override
protected List<FieldConfig> getFieldConfigs() {
return Collections.singletonList(
new FieldConfig("DivAirports", FieldConfig.EncodingType.DICTIONARY, Collections.emptyList(),
CompressionCodec.MV_ENTRY_DICT, null));
}

@BeforeClass
public void setUp()
throws Exception {
TestUtils.ensureDirectoriesExistAndEmpty(_tempDir, _segmentDir, _tarDir);

// Start the Pinot cluster
startZk();
startController();
// Set hyperloglog log2m value to 12.
HelixConfigScope scope =
new HelixConfigScopeBuilder(HelixConfigScope.ConfigScopeProperty.CLUSTER).forCluster(getHelixClusterName())
.build();
_helixManager.getConfigAccessor()
.set(scope, CommonConstants.Helix.DEFAULT_HYPERLOGLOG_LOG2M_KEY, Integer.toString(12));
startBrokers();
startServers();

// Create and upload the schema and table config
Schema schema = createSchema();
addSchema(schema);
TableConfig tableConfig = createOfflineTableConfig();
addTableConfig(tableConfig);

// Unpack the Avro files
List<File> avroFiles = unpackAvroData(_tempDir);

// Create and upload segments. For exhaustive testing, concurrently upload multiple segments with the same name
// and validate correctness with parallel push protection enabled.
ClusterIntegrationTestUtils.buildSegmentsFromAvro(avroFiles, tableConfig, schema, 0, _segmentDir, _tarDir);
// Create a copy of _tarDir to create multiple segments with the same name.
File tarDir2 = new File(_tempDir, "tarDir2");
FileUtils.copyDirectory(_tarDir, tarDir2);

List<File> tarDirs = new ArrayList<>();
tarDirs.add(_tarDir);
tarDirs.add(tarDir2);
try {
uploadSegments(getTableName(), TableType.OFFLINE, tarDirs);
} catch (Exception e) {
// If enableParallelPushProtection is enabled and the same segment is uploaded concurrently, we could get one
// of the three exception:
// - 409 conflict of the second call enters ProcessExistingSegment
// - segmentZkMetadata creation failure if both calls entered ProcessNewSegment
// - Failed to copy segment tar file to final location due to the same segment pushed twice concurrently
// In such cases we upload all the segments again to ensure that the data is set up correctly.
assertTrue(e.getMessage().contains("Another segment upload is in progress for segment") || e.getMessage()
.contains("Failed to create ZK metadata for segment") || e.getMessage()
.contains("java.nio.file.FileAlreadyExistsException"), e.getMessage());
uploadSegments(getTableName(), _tarDir);
}

// Set up service status callbacks
// NOTE: put this step after creating the table and uploading all segments so that brokers and servers can find the
// resources to monitor
registerCallbackHandlers();

// Wait for all documents loaded
waitForAllDocsLoaded(600_000L);
}

@BeforeMethod
public void resetMultiStage() {
setUseMultiStageQueryEngine(false);
}

protected void startBrokers()
throws Exception {
startBrokers(getNumBrokers());
}

protected void startServers()
throws Exception {
startServers(getNumServers());
}

private void registerCallbackHandlers() {
List<String> instances = _helixAdmin.getInstancesInCluster(getHelixClusterName());
instances.removeIf(
instanceId -> !InstanceTypeUtils.isBroker(instanceId) && !InstanceTypeUtils.isServer(instanceId));
List<String> resourcesInCluster = _helixAdmin.getResourcesInCluster(getHelixClusterName());
resourcesInCluster.removeIf(resource -> (!TableNameBuilder.isTableResource(resource)
&& !CommonConstants.Helix.BROKER_RESOURCE_INSTANCE.equals(resource)));
for (String instance : instances) {
List<String> resourcesToMonitor = new ArrayList<>();
for (String resourceName : resourcesInCluster) {
IdealState idealState = _helixAdmin.getResourceIdealState(getHelixClusterName(), resourceName);
for (String partitionName : idealState.getPartitionSet()) {
if (idealState.getInstanceSet(partitionName).contains(instance)) {
resourcesToMonitor.add(resourceName);
break;
}
}
}
_serviceStatusCallbacks.add(new ServiceStatus.MultipleCallbackServiceStatusCallback(ImmutableList.of(
new ServiceStatus.IdealStateAndCurrentStateMatchServiceStatusCallback(_helixManager, getHelixClusterName(),
instance, resourcesToMonitor, 100.0),
new ServiceStatus.IdealStateAndExternalViewMatchServiceStatusCallback(_helixManager, getHelixClusterName(),
instance, resourcesToMonitor, 100.0))));
}
}

@Test
public void testInstancesStarted() {
assertEquals(_serviceStatusCallbacks.size(), getNumBrokers() + getNumServers());
for (ServiceStatus.ServiceStatusCallback serviceStatusCallback : _serviceStatusCallbacks) {
assertEquals(serviceStatusCallback.getServiceStatus(), ServiceStatus.Status.GOOD);
}
}

@Test
public void testCompiledByV2StarField() throws Exception {
String sqlQuery = "SELECT * FROM myTable WHERE AirlineID = 0 LIMIT 1";
JsonNode response = postQuery(sqlQuery);
assertNoRowsReturned(response);
assertDataTypes(response, "INT", "INT", "LONG", "INT", "FLOAT", "DOUBLE", "INT", "STRING", "INT", "INT", "INT",
"INT", "STRING", "INT", "STRING", "INT", "INT", "INT", "INT", "INT", "DOUBLE", "FLOAT", "INT", "STRING", "INT",
"STRING", "INT", "INT", "INT", "STRING", "STRING", "INT", "STRING", "INT", "INT", "INT", "INT", "INT_ARRAY",
"INT", "INT_ARRAY", "STRING_ARRAY", "INT", "INT", "FLOAT_ARRAY", "INT", "STRING_ARRAY", "LONG_ARRAY",
"INT_ARRAY", "INT_ARRAY", "INT", "INT", "STRING", "INT", "INT", "INT", "INT", "INT", "INT", "STRING", "INT",
"INT", "INT", "STRING", "STRING", "INT", "STRING", "INT", "INT", "STRING_ARRAY", "INT", "STRING", "INT", "INT",
"INT", "STRING", "INT", "INT", "INT", "INT");
}

@Test
public void testCompiledByV2SelectionFields() throws Exception {
String sqlQuery = "SELECT AirlineID, ArrTime, ArrTimeBlk FROM myTable WHERE AirlineID = 0 LIMIT 1";
JsonNode response = postQuery(sqlQuery);
assertNoRowsReturned(response);
assertDataTypes(response, "LONG", "INT", "STRING");
}

@Test
public void testCompiledByV2TrasformedFields() throws Exception {
String sqlQuery = "SELECT AirlineID, ArrTime, ArrTime+1 FROM myTable WHERE AirlineID = 0 LIMIT 1";
JsonNode response = postQuery(sqlQuery);
assertNoRowsReturned(response);
assertDataTypes(response, "LONG", "INT", "INT");
}

@Test
public void testCompiledByV2AggregatedFields() throws Exception {
String sqlQuery = "SELECT AirlineID, avg(ArrTime) FROM myTable WHERE AirlineID = 0 GROUP BY AirlineID LIMIT 1";
JsonNode response = postQuery(sqlQuery);
assertNoRowsReturned(response);
assertDataTypes(response, "LONG", "DOUBLE");
}

@Test
public void testSchemaFallbackStarField() throws Exception {
String sqlQuery = "SELECT * FROM myTable WHERE AirlineID = 0 AND add(AirTime, AirTime, ArrTime) > 0 LIMIT 1";
JsonNode response = postQuery(sqlQuery);
assertNoRowsReturned(response);
assertDataTypes(response, "INT", "INT", "LONG", "INT", "FLOAT", "DOUBLE", "INT", "STRING", "INT", "INT", "INT",
"INT", "STRING", "INT", "STRING", "INT", "INT", "INT", "INT", "INT", "DOUBLE", "FLOAT", "INT", "STRING", "INT",
"STRING", "INT", "INT", "INT", "STRING", "STRING", "INT", "STRING", "INT", "INT", "INT", "INT", "INT", "INT",
"INT", "STRING", "INT", "INT", "FLOAT", "INT", "STRING", "LONG", "INT", "INT", "INT", "INT", "STRING", "INT",
"INT", "INT", "INT", "INT", "INT", "STRING", "INT", "INT", "INT", "STRING", "STRING", "INT", "STRING", "INT",
"INT", "STRING", "INT", "STRING", "INT", "INT", "INT", "STRING", "INT", "INT", "INT", "INT");
}

@Test
public void testSchemaFallbackSelectionFields() throws Exception {
String sqlQuery = "SELECT AirlineID, ArrTime, ArrTimeBlk FROM myTable"
+ " WHERE AirlineID = 0 AND add(ArrTime, ArrTime, ArrTime) > 0 LIMIT 1";
JsonNode response = postQuery(sqlQuery);
assertNoRowsReturned(response);
assertDataTypes(response, "LONG", "INT", "STRING");
}

@Test
public void testSchemaFallbackTransformedFields() throws Exception {
String sqlQuery = "SELECT AirlineID, ArrTime, ArrTime+1 FROM myTable"
+ " WHERE AirlineID = 0 AND add(ArrTime, ArrTime, ArrTime) > 0 LIMIT 1";
JsonNode response = postQuery(sqlQuery);
assertNoRowsReturned(response);
assertDataTypes(response, "LONG", "INT", "STRING");
}

@Test
public void testSchemaFallbackAggregatedFields() throws Exception {
String sqlQuery = "SELECT AirlineID, avg(ArrTime) FROM myTable"
+ " WHERE AirlineID = 0 AND add(ArrTime, ArrTime, ArrTime) > 0 GROUP BY AirlineID LIMIT 1";
JsonNode response = postQuery(sqlQuery);
assertNoRowsReturned(response);
assertDataTypes(response, "LONG", "DOUBLE");
}

private void assertNoRowsReturned(JsonNode response) {
assertNotNull(response.get("resultTable"));
assertNotNull(response.get("resultTable").get("rows"));
assertEquals(response.get("resultTable").get("rows").size(), 0);
}

private void assertDataTypes(JsonNode response, String... types) throws Exception {
assertNotNull(response.get("resultTable"));
assertNotNull(response.get("resultTable").get("dataSchema"));
assertNotNull(response.get("resultTable").get("dataSchema").get("columnDataTypes"));
String expected = new ObjectMapper().writeValueAsString(types);
assertEquals(response.get("resultTable").get("dataSchema").get("columnDataTypes").toString(), expected);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ protected void testQueriesValidateAgainstH2(String query)
}
}
}

Assert.assertFalse(h2ResultSet.next(), "Pinot result set is smaller than H2 result set after: "
+ numRows + " rows!");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -279,18 +279,27 @@ public List<String> getTableNamesForQuery(String sqlQuery) {
* Returns whether the query can be successfully compiled in this query environment
*/
public boolean canCompileQuery(String query) {
SqlNodeAndOptions sqlNodeAndOptions = CalciteSqlParser.compileToSqlNodeAndOptions(query);
try (PlannerContext plannerContext = getPlannerContext(sqlNodeAndOptions)) {
return getRelRootIfCanCompile(query) != null;
}

/**
* Returns the RelRoot node if the query can be compiled, null otherwise.
*/
@Nullable
public RelRoot getRelRootIfCanCompile(String query) {
try {
SqlNodeAndOptions sqlNodeAndOptions = CalciteSqlParser.compileToSqlNodeAndOptions(query);
PlannerContext plannerContext = getPlannerContext(sqlNodeAndOptions);
SqlNode sqlNode = sqlNodeAndOptions.getSqlNode();
if (sqlNode.getKind().equals(SqlKind.EXPLAIN)) {
sqlNode = ((SqlExplain) sqlNode).getExplicandum();
}
compileQuery(sqlNode, plannerContext);
RelRoot node = compileQuery(sqlNode, plannerContext);
LOGGER.debug("Successfully compiled query using the multi-stage query engine: `{}`", query);
return true;
} catch (Exception e) {
LOGGER.warn("Encountered an error while compiling query `{}` using the multi-stage query engine", query, e);
return false;
return node;
} catch (Throwable t) {
LOGGER.warn("Encountered an error while compiling query `{}` using the multi-stage query engine", query, t);
return null;
}
}

Expand Down
Loading

0 comments on commit d819d8a

Please sign in to comment.