From 377ea73d0f82543168ebf54e5930b264cbb46fbe Mon Sep 17 00:00:00 2001 From: Vivek Iyer Vaidyanathan Iyer Date: Fri, 16 Aug 2024 17:58:39 -0700 Subject: [PATCH] Return correct dataschema for empty results --- .../blocks/results/ResultsBlockUtils.java | 10 +++--- .../executor/ServerQueryExecutorV1Impl.java | 21 +++++++---- .../query/request/context/QueryContext.java | 4 ++- .../tests/OfflineClusterIntegrationTest.java | 35 +++++++++++++++++++ 4 files changed, 59 insertions(+), 11 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/blocks/results/ResultsBlockUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/blocks/results/ResultsBlockUtils.java index 9775d04d1cb1..8dbb22bb745d 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/blocks/results/ResultsBlockUtils.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/blocks/results/ResultsBlockUtils.java @@ -36,6 +36,8 @@ @SuppressWarnings("rawtypes") public class ResultsBlockUtils { + + // TODO: Remove all util functions except buildEmptyAggregationQueryResults as they are not used. private ResultsBlockUtils() { } @@ -54,7 +56,7 @@ public static BaseResultsBlock buildEmptyQueryResults(QueryContext queryContext) return buildEmptyDistinctQueryResults(queryContext); } - private static SelectionResultsBlock buildEmptySelectionQueryResults(QueryContext queryContext) { + public static SelectionResultsBlock buildEmptySelectionQueryResults(QueryContext queryContext) { List selectExpressions = queryContext.getSelectExpressions(); int numSelectExpressions = selectExpressions.size(); String[] columnNames = new String[numSelectExpressions]; @@ -68,7 +70,7 @@ private static SelectionResultsBlock buildEmptySelectionQueryResults(QueryContex return new SelectionResultsBlock(dataSchema, Collections.emptyList(), queryContext); } - private static AggregationResultsBlock buildEmptyAggregationQueryResults(QueryContext queryContext) { + public static AggregationResultsBlock buildEmptyAggregationQueryResults(QueryContext queryContext) { AggregationFunction[] aggregationFunctions = queryContext.getAggregationFunctions(); assert aggregationFunctions != null; int numAggregations = aggregationFunctions.length; @@ -79,7 +81,7 @@ private static AggregationResultsBlock buildEmptyAggregationQueryResults(QueryCo return new AggregationResultsBlock(aggregationFunctions, results, queryContext); } - private static GroupByResultsBlock buildEmptyGroupByQueryResults(QueryContext queryContext) { + public static GroupByResultsBlock buildEmptyGroupByQueryResults(QueryContext queryContext) { List> filteredAggregationFunctions = queryContext.getFilteredAggregationFunctions(); List groupByExpressions = queryContext.getGroupByExpressions(); @@ -103,7 +105,7 @@ private static GroupByResultsBlock buildEmptyGroupByQueryResults(QueryContext qu return new GroupByResultsBlock(new DataSchema(columnNames, columnDataTypes), queryContext); } - private static DistinctResultsBlock buildEmptyDistinctQueryResults(QueryContext queryContext) { + public static DistinctResultsBlock buildEmptyDistinctQueryResults(QueryContext queryContext) { List expressions = queryContext.getSelectExpressions(); int numExpressions = expressions.size(); String[] columns = new String[numExpressions]; diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/executor/ServerQueryExecutorV1Impl.java b/pinot-core/src/main/java/org/apache/pinot/core/query/executor/ServerQueryExecutorV1Impl.java index 8c2906db541e..0eae255c6e99 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/executor/ServerQueryExecutorV1Impl.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/executor/ServerQueryExecutorV1Impl.java @@ -63,6 +63,7 @@ import org.apache.pinot.core.query.request.context.QueryContext; import org.apache.pinot.core.query.request.context.TimerContext; import org.apache.pinot.core.query.request.context.utils.QueryContextConverterUtils; +import org.apache.pinot.core.query.request.context.utils.QueryContextUtils; import org.apache.pinot.core.query.utils.idset.IdSet; import org.apache.pinot.core.util.trace.TraceContext; import org.apache.pinot.segment.local.data.manager.SegmentDataManager; @@ -374,13 +375,21 @@ private InstanceResponseBlock executeInternal(TableDataManager tableDataManager, int numSelectedSegments = selectedSegments.size(); LOGGER.debug("Matched {} segments after pruning", numSelectedSegments); InstanceResponseBlock instanceResponse; - if (numSelectedSegments == 0) { - if (queryContext.isExplain()) { - instanceResponse = getExplainResponseForNoMatchingSegment(numTotalSegments, queryContext); - } else { - instanceResponse = new InstanceResponseBlock(ResultsBlockUtils.buildEmptyQueryResults(queryContext)); - } + + if (numSelectedSegments == 0 && queryContext.isExplain()) { + instanceResponse = getExplainResponseForNoMatchingSegment(numTotalSegments, queryContext); + } else if (numSelectedSegments == 0 && QueryContextUtils.isAggregationQuery(queryContext) + && queryContext.getGroupByExpressions() == null) { + // For Aggregation queries, column datatype can be inferred from the aggregation function. Short-circuit to get + // the results. + instanceResponse = new InstanceResponseBlock(ResultsBlockUtils.buildEmptyAggregationQueryResults(queryContext)); } else { + // If numSelectedSegments is empty, process only a single segment by setting LIMIT 0 to get the data schema. + if (numSelectedSegments == 0) { + queryContext.setLimit(0); + selectedSegments = indexSegments.subList(0, 1); + } + TimerContext.Timer planBuildTimer = timerContext.startNewPhaseTimer(ServerQueryPhase.BUILD_QUERY_PLAN); List selectedSegmentContexts = tableDataManager.getSegmentContexts(selectedSegments, queryContext.getQueryOptions()); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/QueryContext.java b/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/QueryContext.java index 6c4a3d75c3df..9433916692bc 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/QueryContext.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/QueryContext.java @@ -81,7 +81,7 @@ public class QueryContext { private final List _groupByExpressions; private final FilterContext _havingFilter; private final List _orderByExpressions; - private final int _limit; + private int _limit; private final int _offset; private final Map _queryOptions; private final Map _expressionOverrideHints; @@ -336,6 +336,8 @@ public void setSkipStarTree(boolean skipStarTree) { _skipStarTree = skipStarTree; } + public void setLimit(int limit) { _limit = limit; } + public boolean isSkipScanFilterReorder() { return _skipScanFilterReorder; } diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java index 6d0fc53284de..5624fe79ff45 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java @@ -3024,6 +3024,41 @@ public void testAggregationFunctionsWithUnderscore(boolean useMultiStageQueryEng assertEquals(postQuery(query).get("resultTable").get("rows").get(0).get(0).asInt(), 115545); } + @Test + public void testDataSchemaForFullyPrunedEmptyResults() + throws Exception { + String query; + + // Select query + query = "Select DestAirportID, Carrier from myTable WHERE FlightNum < -1231231"; + + // Parse the Json response. Assert if DestAirportID has columnDatatype INT and Carrier has columnDatatype STRING. + JsonNode queryResponse = postQuery(query); + JsonNode columnTypes = queryResponse.get("resultTable").get("dataSchema").get("columnDataTypes"); + assertEquals(columnTypes.get(0).asText(), "INT"); + assertEquals(columnTypes.get(1).asText(), "STRING"); + + // Aggregation query + query = "Select sum(FlightNum) from myTable WHERE FlightNum < -1231231"; + queryResponse = postQuery(query); + columnTypes = queryResponse.get("resultTable").get("dataSchema").get("columnDataTypes"); + assertEquals(columnTypes.get(0).asText(), "DOUBLE"); + + // GroupBy query + query = "SELECT carrier, sum(FlightNum) FROM mytable WHERE FlightNum < -1231231 group by carrier"; + queryResponse = postQuery(query); + columnTypes = queryResponse.get("resultTable").get("dataSchema").get("columnDataTypes"); + assertEquals(columnTypes.get(0).asText(), "STRING"); + assertEquals(columnTypes.get(1).asText(), "DOUBLE"); + + // Distinct query + query = "SELECT distinct(DestAirportID) FROM mytable WHERE FlightNum < -1231231"; + queryResponse = postQuery(query); + columnTypes = queryResponse.get("resultTable").get("dataSchema").get("columnDataTypes"); + assertEquals(columnTypes.get(0).asText(), "INT"); + } + + @Test public void testExplainPlanQueryV1() throws Exception {