diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DefaultGroupByExecutor.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DefaultGroupByExecutor.java index 133385a52456..d6045cfdfba4 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DefaultGroupByExecutor.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DefaultGroupByExecutor.java @@ -104,7 +104,7 @@ public DefaultGroupByExecutor(QueryContext queryContext, AggregationFunction[] a } } else { _groupKeyGenerator = new DictionaryBasedGroupKeyGenerator(projectOperator, groupByExpressions, numGroupsLimit, - maxInitialResultHolderCapacity); + maxInitialResultHolderCapacity, queryContext.getFilter(), queryContext.getQueryOptions()); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGenerator.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGenerator.java index 8650ccad9bad..d2588b4518cc 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGenerator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGenerator.java @@ -26,8 +26,18 @@ import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; import it.unimi.dsi.fastutil.objects.ObjectIterator; import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import javax.annotation.Nullable; +import org.apache.commons.lang3.tuple.Pair; import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.request.context.FilterContext; +import org.apache.pinot.common.request.context.predicate.InPredicate; +import org.apache.pinot.common.request.context.predicate.Predicate; +import org.apache.pinot.common.utils.config.QueryOptionsUtils; import org.apache.pinot.core.common.BlockValSet; import org.apache.pinot.core.operator.BaseProjectOperator; import org.apache.pinot.core.operator.ColumnContext; @@ -97,9 +107,11 @@ public class DictionaryBasedGroupKeyGenerator implements GroupKeyGenerator { private final int _globalGroupIdUpperBound; private final RawKeyHolder _rawKeyHolder; + private final Map _cardinalityMap; public DictionaryBasedGroupKeyGenerator(BaseProjectOperator projectOperator, - ExpressionContext[] groupByExpressions, int numGroupsLimit, int arrayBasedThreshold) { + ExpressionContext[] groupByExpressions, int numGroupsLimit, int arrayBasedThreshold, + @Nullable FilterContext filterContext, @Nullable Map queryOptions) { assert numGroupsLimit >= arrayBasedThreshold; _groupByExpressions = groupByExpressions; @@ -113,7 +125,7 @@ public DictionaryBasedGroupKeyGenerator(BaseProjectOperator projectOperator, // no need to intern dictionary values when there is only one group by expression because // only one call will be made to the dictionary to extract each raw value. _internedDictionaryValues = _numGroupByExpressions > 1 ? new Object[_numGroupByExpressions][] : null; - + _cardinalityMap = new HashMap<>(_numGroupByExpressions); long cardinalityProduct = 1L; boolean longOverflow = false; for (int i = 0; i < _numGroupByExpressions; i++) { @@ -123,6 +135,7 @@ public DictionaryBasedGroupKeyGenerator(BaseProjectOperator projectOperator, assert _dictionaries[i] != null; int cardinality = _dictionaries[i].length(); _cardinalities[i] = cardinality; + _cardinalityMap.put(groupByExpression, cardinality); if (_internedDictionaryValues != null && cardinality < MAX_DICTIONARY_INTERN_TABLE_SIZE) { _internedDictionaryValues[i] = new Object[cardinality]; } @@ -135,6 +148,13 @@ public DictionaryBasedGroupKeyGenerator(BaseProjectOperator projectOperator, } _isSingleValueColumn[i] = columnContext.isSingleValue(); } + if (queryOptions != null && QueryOptionsUtils.optimizeMaxInitialResultHolderCapacityEnabled(queryOptions)) { + Pair optimizedResult = getOptimizedMaxInitialResultHolderCapacity(filterContext); + if (optimizedResult.getLeft() && optimizedResult.getRight() != null) { + longOverflow = false; + cardinalityProduct = Math.min(optimizedResult.getRight(), cardinalityProduct); + } + } // TODO: Clear the holder after processing the query instead of before if (longOverflow) { // ArrayMapBasedHolder @@ -171,6 +191,49 @@ public DictionaryBasedGroupKeyGenerator(BaseProjectOperator projectOperator, } } + // Calculate the estimated result set size for a group-by query based on filter predicates and column cardinalities. + // If the size exceeds Long.MAX_VALUE, return an early overflow signal. Otherwise, return the product of cardinalities. + // Filters are considered by collecting IN and EQ predicates to refine the cardinality estimate. + // Returns a pair of boolean and long. The boolean indicates if the optimization is enabled, and + // the long is the estimated result set size if the optimization is enabled. + private Pair getOptimizedMaxInitialResultHolderCapacity(FilterContext filterContext) { + if (filterContext == null) { + return Pair.of(false, null); + } + + Set predicateColumns = new HashSet<>(); + filterContext.getPredicateColumns(predicateColumns); + + // Map to store the size of the predicates + Map predicateSizeMap = new HashMap<>(); + // Collect IN and EQ predicates and store their sizes + for (Predicate predicate : predicateColumns) { + if (predicate.getType() == Predicate.Type.IN || predicate.getType() == Predicate.Type.EQ) { + ExpressionContext lhs = predicate.getLhs(); + int size = (predicate.getType() == Predicate.Type.IN) + ? ((InPredicate) predicate).getValues().size() + : 1; + predicateSizeMap.merge(lhs, size, Integer::sum); + } + } + + if (predicateSizeMap.isEmpty()) { + return Pair.of(false, null); + } + + long cardinalityProduct = 1; + for (ExpressionContext expression : _groupByExpressions) { + Integer predicateLength = predicateSizeMap.get(expression); + Integer columnCardinalityLength = _cardinalityMap.get(expression); + int cardinality = Math.min(predicateLength != null ? predicateLength : columnCardinalityLength, columnCardinalityLength); + if (cardinalityProduct > Long.MAX_VALUE / cardinality) { + return Pair.of(false, null); + } + cardinalityProduct *= cardinality; + } + return Pair.of(true, cardinalityProduct); + } + @Override public int getGlobalGroupKeyUpperBound() { return _globalGroupIdUpperBound; diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/QueryContext.java b/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/QueryContext.java index 526ecc9564b9..2b341761c591 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/QueryContext.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/request/context/QueryContext.java @@ -36,8 +36,6 @@ import org.apache.pinot.common.request.context.OrderByExpressionContext; import org.apache.pinot.common.request.context.RequestContextUtils; import org.apache.pinot.common.request.context.TimeSeriesContext; -import org.apache.pinot.common.request.context.predicate.InPredicate; -import org.apache.pinot.common.request.context.predicate.Predicate; import org.apache.pinot.common.utils.config.QueryOptionsUtils; import org.apache.pinot.core.plan.maker.InstancePlanMakerImplV2; import org.apache.pinot.core.query.aggregation.function.AggregationFunction; @@ -353,56 +351,9 @@ public void setMaxExecutionThreads(int maxExecutionThreads) { } public int getMaxInitialResultHolderCapacity() { - if (QueryOptionsUtils.optimizeMaxInitialResultHolderCapacityEnabled(_queryOptions)) { - return getOptimizedMaxInitialResultHolderCapacity(); - } return _maxInitialResultHolderCapacity; } - // TODO: Improve this to use segment level info to optimize the capacity - // Optimization to right-size the initial result holder capacity for group-by queries if they exist in the filter - // If any one group-by expression is not in the filter, we return the _maxInitialResultHolderCapacity. - public int getOptimizedMaxInitialResultHolderCapacity() { - if (getFilter() == null) { - return _maxInitialResultHolderCapacity; - } - - assert getGroupByExpressions() != null; - - Set predicateColumns = new HashSet<>(); - getFilter().getPredicateColumns(predicateColumns); - - // Map to store the size of the predicates - Map predicateSizeMap = new HashMap<>(); - - // Collect IN and EQ predicates and store their sizes - for (Predicate predicate : predicateColumns) { - if (predicate.getType() == Predicate.Type.IN || predicate.getType() == Predicate.Type.EQ) { - ExpressionContext lhs = predicate.getLhs(); - int size = (predicate.getType() == Predicate.Type.IN) - ? ((InPredicate) predicate).getValues().size() - : 1; - predicateSizeMap.merge(lhs, size, Integer::sum); - } - } - - int crossProductCapacity = 1; - for (ExpressionContext expression : getGroupByExpressions()) { - Integer size = predicateSizeMap.get(expression); - - if (size == null) { - // No matching predicate for a group-by expression, return the default capacity - return _maxInitialResultHolderCapacity; - } - crossProductCapacity *= size; - if (crossProductCapacity > _maxInitialResultHolderCapacity) { - return _maxInitialResultHolderCapacity; - } - } - return crossProductCapacity; - } - - public void setMaxInitialResultHolderCapacity(int maxInitialResultHolderCapacity) { _maxInitialResultHolderCapacity = maxInitialResultHolderCapacity; } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGeneratorTest.java b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGeneratorTest.java index a0d61c0e52e4..7b2a49914702 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGeneratorTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/DictionaryBasedGroupKeyGeneratorTest.java @@ -54,11 +54,10 @@ import org.apache.pinot.spi.utils.builder.TableConfigBuilder; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertTrue; +import static org.testng.Assert.*; public class DictionaryBasedGroupKeyGeneratorTest { @@ -167,7 +166,7 @@ public void testArrayBasedSingleValue() { DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator = new DictionaryBasedGroupKeyGenerator(_projectOperator, getExpressions(groupByColumns), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, - InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY); + InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null, null); assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(), UNIQUE_ROWS, _errorMessage); assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), UNIQUE_ROWS, _errorMessage); @@ -187,7 +186,7 @@ public void testIntMapBasedSingleValue() { DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator = new DictionaryBasedGroupKeyGenerator(_projectOperator, getExpressions(groupByColumns), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, - InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY); + InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null, null); assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, _errorMessage); assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 0, _errorMessage); @@ -208,7 +207,7 @@ public void testLongMapBasedSingleValue() { DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator = new DictionaryBasedGroupKeyGenerator(_projectOperator, getExpressions(groupByColumns), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, - InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY); + InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null, null); assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, _errorMessage); assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 0, _errorMessage); @@ -229,7 +228,7 @@ public void testArrayMapBasedSingleValue() { DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator = new DictionaryBasedGroupKeyGenerator(_projectOperator, getExpressions(groupByColumns), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, - InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY); + InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null, null); assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, _errorMessage); assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 0, _errorMessage); @@ -264,7 +263,7 @@ public void testArrayBasedMultiValue() { DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator = new DictionaryBasedGroupKeyGenerator(_projectOperator, getExpressions(groupByColumns), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, - InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY); + InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null, null); int groupKeyUpperBound = dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(); assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), groupKeyUpperBound, _errorMessage); @@ -285,7 +284,7 @@ public void tesIntMapBasedMultiValue() { DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator = new DictionaryBasedGroupKeyGenerator(_projectOperator, getExpressions(groupByColumns), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, - InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY); + InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null, null); assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, _errorMessage); assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 0, _errorMessage); @@ -308,7 +307,7 @@ public void testLongMapBasedMultiValue() { DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator = new DictionaryBasedGroupKeyGenerator(_projectOperator, getExpressions(groupByColumns), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, - InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY); + InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null, null); assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, _errorMessage); assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 0, _errorMessage); @@ -330,7 +329,7 @@ public void testArrayMapBasedMultiValue() { DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator = new DictionaryBasedGroupKeyGenerator(_projectOperator, getExpressions(groupByColumns), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, - InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY); + InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, null, null); assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(), InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, _errorMessage); assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 0, _errorMessage); @@ -350,7 +349,7 @@ public void testNumGroupsLimit() { // NOTE: arrayBasedThreshold must be smaller or equal to numGroupsLimit DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator = new DictionaryBasedGroupKeyGenerator(_projectOperator, getExpressions(groupByColumns), numGroupsLimit, - numGroupsLimit); + numGroupsLimit, null, null); assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(), numGroupsLimit, _errorMessage); assertEquals(dictionaryBasedGroupKeyGenerator.getCurrentGroupKeyUpperBound(), 0, _errorMessage); @@ -431,6 +430,51 @@ public void testMapDefaultValue() { GroupKeyGenerator.INVALID_ID); } + @Test(dataProvider = "groupByResultHolderCapacityDataProvider") + public void testGetGroupByResultHolderCapacity(String query, Integer expectedCapacity) { + query = query + "SET optimizeMaxInitialResultHolderCapacity=true"; + QueryContext queryContext = QueryContextConverterUtils.getQueryContext(query); + List expressionContextList = queryContext.getGroupByExpressions(); + ExpressionContext[] expressions = + expressionContextList.toArray(new ExpressionContext[expressionContextList.size()]); + DictionaryBasedGroupKeyGenerator dictionaryBasedGroupKeyGenerator = + new DictionaryBasedGroupKeyGenerator(_projectOperator, expressions, + InstancePlanMakerImplV2.DEFAULT_NUM_GROUPS_LIMIT, + InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, queryContext.getFilter(), + queryContext.getQueryOptions()); + assertEquals(dictionaryBasedGroupKeyGenerator.getGlobalGroupKeyUpperBound(), expectedCapacity, _errorMessage); + } + + @DataProvider(name = "groupByResultHolderCapacityDataProvider") + public Object[][] groupByResultHolderCapacityDataProvider() { + return new Object[][]{ + // Single IN predicate + {"SELECT COUNT(*) FROM testTable WHERE s1 IN (1, 2, 3, 4, 5) GROUP BY s1 LIMIT 10;", 5}, + // Multiple IN predicates but only one used in group-by + {"SELECT COUNT(*) FROM testTable WHERE s1 IN (1, 2, 3) AND s2 IN (4, 5) GROUP BY s1 LIMIT 10;", 3}, + // Multiple IN predicates used in group-by + {"SELECT COUNT(*) FROM testTable WHERE s1 IN (1, 2, 3) AND s3 IN (4, 5) GROUP BY s1, s3 LIMIT 10;", 6}, + // Single EQ predicate + {"SELECT COUNT(*) FROM testTable WHERE s1 = 1 GROUP BY s1 LIMIT 10;", 1}, + // Multiple EQ predicates but only one used in group-by + {"SELECT COUNT(*) FROM testTable WHERE s1 = 1 AND s2 = 4 GROUP BY s1 LIMIT 10;", 1}, + // Mixed predicates + {"SELECT COUNT(*) FROM testTable WHERE s1 IN (1, 2, 3) AND s3 = 4 GROUP BY s1, s3 LIMIT 10;", 3}, + {"SELECT COUNT(*) FROM testTable WHERE s1 = 1 AND s3 IN (4, 5) GROUP BY s1, s3 LIMIT 10;", 2}, + // Multiple IN Predicate columns with same column name and different values + {"SELECT COUNT(*) FROM testTable WHERE s1 IN (1, 2, 3) AND s1 IN (4, 5) OR s2 IN (6, 7) GROUP BY s1, s2" + + " LIMIT 10;", 10}, + // No filter -> s1 has cardinality 100 + {"SELECT COUNT(*) FROM testTable GROUP BY s1 LIMIT 1000;", 100}, + // No matching filter EQ predicate in group-by expression -> s2 has cardinality 100 + {"SELECT COUNT(*) FROM testTable WHERE s1 = 1 GROUP BY s2 LIMIT 1000;", 100}, + // No matching filter IN predicate in group-by expression -> s2 has cardinality 100 + {"SELECT COUNT(*) FROM testTable WHERE s1 IN (1, 2, 3) GROUP BY s2 LIMIT 1000;", 100}, + // Only one matching filter predicate in group-by expression -> (3 [s1] * 100 [s2]) = 300 + {"SELECT COUNT(*) FROM testTable WHERE s1 IN (1, 2, 3) GROUP BY s1, s2 LIMIT 1000;", 300}, + }; + } + @AfterClass public void tearDown() { FileUtils.deleteQuietly(new File(INDEX_DIR_PATH)); diff --git a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/GroupByResultHolderTest.java b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/GroupByResultHolderTest.java deleted file mode 100644 index 65c5bbc024c0..000000000000 --- a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/groupby/GroupByResultHolderTest.java +++ /dev/null @@ -1,94 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.pinot.core.query.aggregation.groupby; - -import org.apache.pinot.core.plan.maker.InstancePlanMakerImplV2; -import org.apache.pinot.core.query.request.context.QueryContext; -import org.apache.pinot.core.query.request.context.utils.QueryContextConverterUtils; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - - -public class GroupByResultHolderTest { - - @Test(dataProvider = "groupByResultHolderCapacityDataProvider") - public void testGetGroupByResultHolderCapacity(String query, Integer expectedCapacity, - boolean shouldOptimizeCapacity) { - if (shouldOptimizeCapacity) { - query = query + "SET optimizeMaxInitialResultHolderCapacity=true"; - } - QueryContext queryContext = QueryContextConverterUtils.getQueryContext(query); - Assert.assertEquals(queryContext.getMaxInitialResultHolderCapacity(), expectedCapacity); - } - - @DataProvider(name = "groupByResultHolderCapacityDataProvider") - public Object[][] groupByResultHolderCapacityDataProvider() { - return new Object[][]{ - // Single IN predicate - {"SELECT COUNT(column1), MAX(column1) FROM testTable WHERE column1 IN (10, 20, 30, 40, 50) GROUP BY column1" - + " LIMIT 10;", 5, true}, - // Multiple IN predicates but only one used in group-by - {"SELECT COUNT(column1), MAX(column1) FROM testTable WHERE column1 IN (10, 20, 30) AND column2 IN (100, 200)" - + " GROUP BY column1 LIMIT 10;", 3, true}, - // Multiple IN predicates used in group-by - {"SELECT COUNT(column1), MAX(column1) FROM testTable WHERE column1 IN (10, 20, 30) AND column3 IN (40, 50)" - + " GROUP BY column1, column3 LIMIT 10;", 6, true}, - // Single EQ predicate - {"SELECT COUNT(column1), MAX(column1) FROM testTable WHERE column1 = 10 GROUP BY column1 LIMIT 10;", 1, true}, - // Multiple EQ predicates but only one used in group-by - {"SELECT COUNT(column1), MAX(column1) FROM testTable WHERE column1 = 10 AND column2 = 100 GROUP BY column1" - + " LIMIT 10;", 1, true}, - // Mixed predicates - {"SELECT COUNT(column1), MAX(column1) FROM testTable WHERE column1 IN (10, 20, 30) AND column3 = 40" - + " GROUP BY column1, column3 LIMIT 10;", 3, true}, - {"SELECT COUNT(column1), MAX(column1) FROM testTable WHERE column1 = 10 AND column3 IN (40, 50)" - + " GROUP BY column1, column3 LIMIT 10;", 2, true}, - // Multiple IN Predicate columns with same column name and different values - {"SELECT COUNT(column1), MAX(column1) FROM testTable WHERE column1 IN (10, 20, 30) AND column1 IN (40, 50)" - + " OR column2 IN (60, 70) GROUP BY column1, column2 LIMIT 10;", 10, true}, - // Multiple EQ Predicate columns with same column name - {"SELECT COUNT(column1), MAX(column1) FROM testTable WHERE column1 = 10 OR column1 = 20" - + " GROUP BY column1 LIMIT 10;", 2, true}, - // No filter - {"SELECT COUNT(column1), MAX(column1) FROM testTable GROUP BY column1 LIMIT 10;", - InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, true}, - // No matching filter EQ predicate in group-by expression - {"SELECT COUNT(column1), MAX(column1) FROM testTable WHERE column1 = 10 GROUP BY column2 LIMIT 10;", - InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, true}, - // No matching filter IN predicate in group-by expression - {"SELECT COUNT(column1), MAX(column1) FROM testTable WHERE column1 IN (10, 20, 30) GROUP BY column2 LIMIT 10;", - InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, true}, - // Only one matching filter predicate in group-by expression - {"SELECT COUNT(column1), MAX(column1) FROM testTable WHERE column1 IN (10, 20, 30) GROUP BY column1, column2" - + " LIMIT 10;", InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, true}, - // Exceeding max size limit - {"SELECT COUNT(column1), MAX(column1) FROM testTable WHERE column1 IN (1, 2, 3, 4, 5)" - + " AND column2 IN (6, 7, 8, 9, 10) AND column3 IN (11, 12, 13, 14, 15)" - + " AND column4 IN (16, 17, 18, 19, 20)" - + " AND column5 IN (21, 22, 23, 24, 25)" - + " AND column6 IN (26, 27, 28, 29, 30)" - + " GROUP BY column1, column2, column3, column4, column5, column6;", - InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, true}, - // Disable optimization - {"SELECT COUNT(column1), MAX(column1) FROM testTable WHERE column1 IN (10, 20, 30, 40, 50) GROUP BY column1" - + " LIMIT 10;", InstancePlanMakerImplV2.DEFAULT_MAX_INITIAL_RESULT_HOLDER_CAPACITY, false}, - }; - } -}