From 9b9606898872105341b5a218c81a61f8d60d76e4 Mon Sep 17 00:00:00 2001 From: "Xiaotian (Jackie) Jiang" <17555551+Jackie-Jiang@users.noreply.github.com> Date: Mon, 30 Dec 2024 23:02:46 -0800 Subject: [PATCH] Optimize MergeEqInFilterOptimizer by reducing the hash computation of Expression (#14732) --- .../filter/MergeEqInFilterOptimizer.java | 124 +++++++++++------- 1 file changed, 74 insertions(+), 50 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeEqInFilterOptimizer.java b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeEqInFilterOptimizer.java index 6836f8022617..5104587322ec 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeEqInFilterOptimizer.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeEqInFilterOptimizer.java @@ -18,16 +18,17 @@ */ package org.apache.pinot.core.query.optimizer.filter; +import com.google.common.collect.Maps; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Set; import javax.annotation.Nullable; import org.apache.pinot.common.request.Expression; import org.apache.pinot.common.request.ExpressionType; import org.apache.pinot.common.request.Function; +import org.apache.pinot.common.request.context.RequestContextUtils; import org.apache.pinot.common.utils.request.RequestUtils; import org.apache.pinot.spi.data.Schema; import org.apache.pinot.sql.FilterKind; @@ -61,9 +62,10 @@ private Expression optimize(Expression filterExpression) { String operator = function.getOperator(); if (operator.equals(FilterKind.OR.name())) { List children = function.getOperands(); - Map> valuesMap = new HashMap<>(); - List newChildren = new ArrayList<>(); - boolean recreateFilter = false; + // Key is the lhs of the EQ/IN predicate, value is the map from string representation of the value to the value + Map> valuesMap = new HashMap<>(); + List newChildren = new ArrayList<>(children.size()); + boolean[] recreateFilter = new boolean[1]; // Iterate over all the child filters to merge EQ and IN predicates for (Expression child : children) { @@ -80,52 +82,62 @@ private Expression optimize(Expression filterExpression) { List operands = childFunction.getOperands(); Expression lhs = operands.get(0); Expression value = operands.get(1); - Set values = valuesMap.get(lhs); - if (values == null) { - values = new HashSet<>(); - values.add(value); - valuesMap.put(lhs, values); - } else { - values.add(value); - // Recreate filter when multiple predicates can be merged - recreateFilter = true; - } + // Use string value to de-duplicate the values to prevent the overhead of Expression.hashCode(). This is + // consistent with how server handles predicates. + String stringValue = RequestContextUtils.getStringValue(value); + valuesMap.compute(lhs, (k, v) -> { + if (v == null) { + Map values = new HashMap<>(); + values.put(stringValue, value); + return values; + } else { + v.put(stringValue, value); + // Recreate filter when multiple predicates can be merged + recreateFilter[0] = true; + return v; + } + }); } else if (childOperator.equals(FilterKind.IN.name())) { List operands = childFunction.getOperands(); Expression lhs = operands.get(0); - Set inPredicateValuesSet = new HashSet<>(); - int numOperands = operands.size(); - for (int i = 1; i < numOperands; i++) { - inPredicateValuesSet.add(operands.get(i)); - } - int numUniqueValues = inPredicateValuesSet.size(); - if (numUniqueValues == 1 || numUniqueValues != numOperands - 1) { - // Recreate filter when the IN predicate contains only 1 value (can be rewritten to EQ predicate), - // or values can be de-duplicated - recreateFilter = true; - } - Set values = valuesMap.get(lhs); - if (values == null) { - valuesMap.put(lhs, inPredicateValuesSet); - } else { - values.addAll(inPredicateValuesSet); - // Recreate filter when multiple predicates can be merged - recreateFilter = true; - } + valuesMap.compute(lhs, (k, v) -> { + if (v == null) { + Map values = getInValues(operands); + int numUniqueValues = values.size(); + if (numUniqueValues == 1 || numUniqueValues != operands.size() - 1) { + // Recreate filter when the IN predicate contains only 1 value (can be rewritten to EQ predicate), or + // values can be de-duplicated + recreateFilter[0] = true; + } + return values; + } else { + int numOperands = operands.size(); + for (int i = 1; i < numOperands; i++) { + Expression value = operands.get(i); + // Use string value to de-duplicate the values to prevent the overhead of Expression.hashCode(). This + // is consistent with how server handles predicates. + String stringValue = RequestContextUtils.getStringValue(value); + v.put(stringValue, value); + } + // Recreate filter when multiple predicates can be merged + recreateFilter[0] = true; + return v; + } + }); } else { newChildren.add(child); } } } - if (recreateFilter) { + if (recreateFilter[0]) { if (newChildren.isEmpty() && valuesMap.size() == 1) { // Single range without other filters - Map.Entry> entry = valuesMap.entrySet().iterator().next(); - return getFilterExpression(entry.getKey(), entry.getValue()); + Map.Entry> entry = valuesMap.entrySet().iterator().next(); + return getFilterExpression(entry.getKey(), entry.getValue().values()); } else { - for (Map.Entry> entry : valuesMap.entrySet()) { - newChildren.add(getFilterExpression(entry.getKey(), entry.getValue())); + for (Map.Entry> entry : valuesMap.entrySet()) { + newChildren.add(getFilterExpression(entry.getKey(), entry.getValue().values())); } function.setOperands(newChildren); return filterExpression; @@ -138,17 +150,12 @@ private Expression optimize(Expression filterExpression) { return filterExpression; } else if (operator.equals(FilterKind.IN.name())) { List operands = function.getOperands(); - Expression lhs = operands.get(0); - Set values = new HashSet<>(); - int numOperands = operands.size(); - for (int i = 1; i < numOperands; i++) { - values.add(operands.get(i)); - } + Map values = getInValues(operands); int numUniqueValues = values.size(); - if (numUniqueValues == 1 || numUniqueValues != numOperands - 1) { - // Recreate filter when the IN predicate contains only 1 value (can be rewritten to EQ predicate), or values - // can be de-duplicated - return getFilterExpression(lhs, values); + if (numUniqueValues == 1 || numUniqueValues != operands.size() - 1) { + // Recreate filter when the IN predicate contains only 1 value (can be rewritten to EQ predicate), or values can + // be de-duplicated + return getFilterExpression(operands.get(0), values.values()); } else { return filterExpression; } @@ -157,10 +164,27 @@ private Expression optimize(Expression filterExpression) { } } + /** + * Helper method to get the values from the IN predicate. Returns a map from string representation of the value to the + * value. + */ + private Map getInValues(List operands) { + int numOperands = operands.size(); + Map values = Maps.newHashMapWithExpectedSize(numOperands - 1); + for (int i = 1; i < numOperands; i++) { + Expression value = operands.get(i); + // Use string value to de-duplicate the values to prevent the overhead of Expression.hashCode(). This is + // consistent with how server handles predicates. + String stringValue = RequestContextUtils.getStringValue(value); + values.put(stringValue, value); + } + return values; + } + /** * Helper method to construct a EQ or IN predicate filter Expression from the given lhs and values. */ - private static Expression getFilterExpression(Expression lhs, Set values) { + private static Expression getFilterExpression(Expression lhs, Collection values) { int numValues = values.size(); if (numValues == 1) { return RequestUtils.getFunctionExpression(FilterKind.EQUALS.name(), lhs, values.iterator().next());