Skip to content

Commit

Permalink
Optimize MergeEqInFilterOptimizer by reducing the hash computation of…
Browse files Browse the repository at this point in the history
… Expression (#14732)
  • Loading branch information
Jackie-Jiang authored Dec 31, 2024
1 parent f98b250 commit 9b96068
Showing 1 changed file with 74 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,17 @@
*/
package org.apache.pinot.core.query.optimizer.filter;

import com.google.common.collect.Maps;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.annotation.Nullable;
import org.apache.pinot.common.request.Expression;
import org.apache.pinot.common.request.ExpressionType;
import org.apache.pinot.common.request.Function;
import org.apache.pinot.common.request.context.RequestContextUtils;
import org.apache.pinot.common.utils.request.RequestUtils;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.sql.FilterKind;
Expand Down Expand Up @@ -61,9 +62,10 @@ private Expression optimize(Expression filterExpression) {
String operator = function.getOperator();
if (operator.equals(FilterKind.OR.name())) {
List<Expression> children = function.getOperands();
Map<Expression, Set<Expression>> valuesMap = new HashMap<>();
List<Expression> newChildren = new ArrayList<>();
boolean recreateFilter = false;
// Key is the lhs of the EQ/IN predicate, value is the map from string representation of the value to the value
Map<Expression, Map<String, Expression>> valuesMap = new HashMap<>();
List<Expression> newChildren = new ArrayList<>(children.size());
boolean[] recreateFilter = new boolean[1];

// Iterate over all the child filters to merge EQ and IN predicates
for (Expression child : children) {
Expand All @@ -80,52 +82,62 @@ private Expression optimize(Expression filterExpression) {
List<Expression> operands = childFunction.getOperands();
Expression lhs = operands.get(0);
Expression value = operands.get(1);
Set<Expression> values = valuesMap.get(lhs);
if (values == null) {
values = new HashSet<>();
values.add(value);
valuesMap.put(lhs, values);
} else {
values.add(value);
// Recreate filter when multiple predicates can be merged
recreateFilter = true;
}
// Use string value to de-duplicate the values to prevent the overhead of Expression.hashCode(). This is
// consistent with how server handles predicates.
String stringValue = RequestContextUtils.getStringValue(value);
valuesMap.compute(lhs, (k, v) -> {
if (v == null) {
Map<String, Expression> values = new HashMap<>();
values.put(stringValue, value);
return values;
} else {
v.put(stringValue, value);
// Recreate filter when multiple predicates can be merged
recreateFilter[0] = true;
return v;
}
});
} else if (childOperator.equals(FilterKind.IN.name())) {
List<Expression> operands = childFunction.getOperands();
Expression lhs = operands.get(0);
Set<Expression> inPredicateValuesSet = new HashSet<>();
int numOperands = operands.size();
for (int i = 1; i < numOperands; i++) {
inPredicateValuesSet.add(operands.get(i));
}
int numUniqueValues = inPredicateValuesSet.size();
if (numUniqueValues == 1 || numUniqueValues != numOperands - 1) {
// Recreate filter when the IN predicate contains only 1 value (can be rewritten to EQ predicate),
// or values can be de-duplicated
recreateFilter = true;
}
Set<Expression> values = valuesMap.get(lhs);
if (values == null) {
valuesMap.put(lhs, inPredicateValuesSet);
} else {
values.addAll(inPredicateValuesSet);
// Recreate filter when multiple predicates can be merged
recreateFilter = true;
}
valuesMap.compute(lhs, (k, v) -> {
if (v == null) {
Map<String, Expression> values = getInValues(operands);
int numUniqueValues = values.size();
if (numUniqueValues == 1 || numUniqueValues != operands.size() - 1) {
// Recreate filter when the IN predicate contains only 1 value (can be rewritten to EQ predicate), or
// values can be de-duplicated
recreateFilter[0] = true;
}
return values;
} else {
int numOperands = operands.size();
for (int i = 1; i < numOperands; i++) {
Expression value = operands.get(i);
// Use string value to de-duplicate the values to prevent the overhead of Expression.hashCode(). This
// is consistent with how server handles predicates.
String stringValue = RequestContextUtils.getStringValue(value);
v.put(stringValue, value);
}
// Recreate filter when multiple predicates can be merged
recreateFilter[0] = true;
return v;
}
});
} else {
newChildren.add(child);
}
}
}

if (recreateFilter) {
if (recreateFilter[0]) {
if (newChildren.isEmpty() && valuesMap.size() == 1) {
// Single range without other filters
Map.Entry<Expression, Set<Expression>> entry = valuesMap.entrySet().iterator().next();
return getFilterExpression(entry.getKey(), entry.getValue());
Map.Entry<Expression, Map<String, Expression>> entry = valuesMap.entrySet().iterator().next();
return getFilterExpression(entry.getKey(), entry.getValue().values());
} else {
for (Map.Entry<Expression, Set<Expression>> entry : valuesMap.entrySet()) {
newChildren.add(getFilterExpression(entry.getKey(), entry.getValue()));
for (Map.Entry<Expression, Map<String, Expression>> entry : valuesMap.entrySet()) {
newChildren.add(getFilterExpression(entry.getKey(), entry.getValue().values()));
}
function.setOperands(newChildren);
return filterExpression;
Expand All @@ -138,17 +150,12 @@ private Expression optimize(Expression filterExpression) {
return filterExpression;
} else if (operator.equals(FilterKind.IN.name())) {
List<Expression> operands = function.getOperands();
Expression lhs = operands.get(0);
Set<Expression> values = new HashSet<>();
int numOperands = operands.size();
for (int i = 1; i < numOperands; i++) {
values.add(operands.get(i));
}
Map<String, Expression> values = getInValues(operands);
int numUniqueValues = values.size();
if (numUniqueValues == 1 || numUniqueValues != numOperands - 1) {
// Recreate filter when the IN predicate contains only 1 value (can be rewritten to EQ predicate), or values
// can be de-duplicated
return getFilterExpression(lhs, values);
if (numUniqueValues == 1 || numUniqueValues != operands.size() - 1) {
// Recreate filter when the IN predicate contains only 1 value (can be rewritten to EQ predicate), or values can
// be de-duplicated
return getFilterExpression(operands.get(0), values.values());
} else {
return filterExpression;
}
Expand All @@ -157,10 +164,27 @@ private Expression optimize(Expression filterExpression) {
}
}

/**
* Helper method to get the values from the IN predicate. Returns a map from string representation of the value to the
* value.
*/
private Map<String, Expression> getInValues(List<Expression> operands) {
int numOperands = operands.size();
Map<String, Expression> values = Maps.newHashMapWithExpectedSize(numOperands - 1);
for (int i = 1; i < numOperands; i++) {
Expression value = operands.get(i);
// Use string value to de-duplicate the values to prevent the overhead of Expression.hashCode(). This is
// consistent with how server handles predicates.
String stringValue = RequestContextUtils.getStringValue(value);
values.put(stringValue, value);
}
return values;
}

/**
* Helper method to construct a EQ or IN predicate filter Expression from the given lhs and values.
*/
private static Expression getFilterExpression(Expression lhs, Set<Expression> values) {
private static Expression getFilterExpression(Expression lhs, Collection<Expression> values) {
int numValues = values.size();
if (numValues == 1) {
return RequestUtils.getFunctionExpression(FilterKind.EQUALS.name(), lhs, values.iterator().next());
Expand Down

0 comments on commit 9b96068

Please sign in to comment.