Skip to content

Commit

Permalink
Address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
yashmayya committed Oct 9, 2024
1 parent e9a575f commit f05689d
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import org.apache.pinot.common.request.Expression;
import org.apache.pinot.common.request.ExpressionType;
import org.apache.pinot.common.request.Function;
import org.apache.pinot.common.request.Literal;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.FieldSpec.DataType;
import org.apache.pinot.spi.data.Schema;
Expand Down Expand Up @@ -84,39 +83,37 @@ boolean canBeOptimized(Expression filterExpression, @Nullable Schema schema) {
Expression optimizeChild(Expression filterExpression, @Nullable Schema schema) {
Function function = filterExpression.getFunctionCall();
FilterKind kind = FilterKind.valueOf(function.getOperator());

if (!kind.isRange() && kind != FilterKind.EQUALS && kind != FilterKind.NOT_EQUALS) {
return filterExpression;
}

List<Expression> operands = function.getOperands();
// Verify that LHS is a numeric column and RHS is a literal before rewriting.
Expression lhs = operands.get(0);
Expression rhs = operands.get(1);

DataType dataType = getDataType(lhs, schema);
if (dataType == null || !dataType.isNumeric() || !rhs.isSetLiteral()) {
// No rewrite here
return filterExpression;
}

switch (kind) {
case BETWEEN: {
// Verify that value is a numeric column before rewriting.
List<Expression> operands = function.getOperands();
Expression value = operands.get(0);
DataType dataType = getDataType(value, schema);
if (dataType != null && dataType.isNumeric()) {
return rewriteBetweenExpression(filterExpression, dataType);
}
break;
return rewriteBetweenExpression(filterExpression, dataType);
}
case EQUALS:
case NOT_EQUALS:
case GREATER_THAN:
case GREATER_THAN_OR_EQUAL:
case LESS_THAN:
case LESS_THAN_OR_EQUAL: {
List<Expression> operands = function.getOperands();
// Verify that LHS is a numeric column and RHS is a numeric literal before rewriting.
Expression lhs = operands.get(0);
Expression rhs = operands.get(1);

if (isNumericLiteral(rhs)) {
DataType dataType = getDataType(lhs, schema);
if (dataType != null && dataType.isNumeric()) {
if (kind.isRange()) {
return rewriteRangeExpression(filterExpression, kind, dataType, rhs);
} else {
return rewriteEqualsExpression(filterExpression, kind, dataType, rhs);
}
}
if (kind.isRange()) {
return rewriteRangeExpression(filterExpression, kind, dataType, rhs);
} else {
return rewriteEqualsExpression(filterExpression, kind, dataType, rhs);
}
break;
}
default:
break;
Expand Down Expand Up @@ -373,6 +370,8 @@ private static Expression rewriteBetweenExpression(Expression between, DataType
Expression lower = operands.get(1);
Expression upper = operands.get(2);

// The BETWEEN filter predicate currently only supports literals as lower and upper bounds, but we're still checking
// here just in case.
if (lower.isSetLiteral()) {
switch (lower.getLiteral().getSetField()) {
case LONG_VALUE: {
Expand Down Expand Up @@ -524,9 +523,9 @@ private static void rewriteRangeOperator(Expression range, FilterKind kind, int
if (comparison > 0) {
// Literal value is greater than the converted value, so rewrite:
// "column > literal" to "column > converted"
// "column >= literal" to "column >= converted"
// "column >= literal" to "column > converted"
// "column < literal" to "column <= converted"
// "column <= literal" to "column < converted"
// "column <= literal" to "column <= converted"
if (kind == FilterKind.GREATER_THAN || kind == FilterKind.GREATER_THAN_OR_EQUAL) {
range.getFunctionCall().setOperator(FilterKind.GREATER_THAN.name());
} else if (kind == FilterKind.LESS_THAN || kind == FilterKind.LESS_THAN_OR_EQUAL) {
Expand Down Expand Up @@ -581,21 +580,4 @@ private static DataType getDataType(Expression expression, Schema schema) {
}
return null;
}

/** @return true if expression is a numeric literal; otherwise, false. */
private static boolean isNumericLiteral(Expression expression) {
if (expression.getType() == ExpressionType.LITERAL) {
Literal._Fields type = expression.getLiteral().getSetField();
switch (type) {
case INT_VALUE:
case LONG_VALUE:
case FLOAT_VALUE:
case DOUBLE_VALUE:
return true;
default:
break;
}
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,17 @@ public void testBetweenRewrites() {
// Test INT column with DOUBLE upper bound lesser than Integer.MIN_VALUE.
Assert.assertEquals(rewrite("SELECT * FROM testTable WHERE intColumn BETWEEN -4000000000.0 AND -3000000000.0"),
"Expression(type:LITERAL, literal:<Literal boolValue:false>)");
// Test INT column with LONG lower bound lesser than Integer.MIN_VALUE.
Assert.assertEquals(rewrite("SELECT * FROM testTable WHERE intColumn BETWEEN -4000000000 AND 0"),
"Expression(type:FUNCTION, functionCall:Function(operator:BETWEEN, operands:[Expression(type:IDENTIFIER, "
+ "identifier:Identifier(name:intColumn)), Expression(type:LITERAL, literal:<Literal "
+ "intValue:-2147483648>), Expression(type:LITERAL, literal:<Literal intValue:0>)]))");
// Test INT column with LONG upper bound greater than Integer.MAX_VALUE.
Assert.assertEquals(rewrite("SELECT * FROM testTable WHERE intColumn BETWEEN 0 AND 4000000000"),
"Expression(type:FUNCTION, functionCall:Function(operator:BETWEEN, operands:[Expression(type:IDENTIFIER, "
+ "identifier:Identifier(name:intColumn)), Expression(type:LITERAL, literal:<Literal "
+ "intValue:0>), Expression(type:LITERAL, literal:<Literal intValue:2147483647>)]))");

// Test LONG column with DOUBLE lower bound greater than Long.MAX_VALUE.
Assert.assertEquals(
rewrite("SELECT * FROM testTable WHERE longColumn BETWEEN 9323372036854775808.0 AND 9323372036854775809.0"),
Expand All @@ -324,6 +335,16 @@ public void testBetweenRewrites() {
Assert.assertEquals(
rewrite("SELECT * FROM testTable WHERE longColumn BETWEEN -9323372036854775809.0 AND -9323372036854775808.0"),
"Expression(type:LITERAL, literal:<Literal boolValue:false>)");
// Test LONG column with DOUBLE lower bound lesser than Long.MIN_VALUE.
Assert.assertEquals(rewrite("SELECT * FROM testTable WHERE longColumn BETWEEN -9323372036854775809.0 AND 0"),
"Expression(type:FUNCTION, functionCall:Function(operator:BETWEEN, operands:[Expression(type:IDENTIFIER, "
+ "identifier:Identifier(name:longColumn)), Expression(type:LITERAL, literal:<Literal "
+ "longValue:-9223372036854775808>), Expression(type:LITERAL, literal:<Literal intValue:0>)]))");
// Test LONG column with DOUBLE upper bound greater than Long.MAX_VALUE.
Assert.assertEquals(rewrite("SELECT * FROM testTable WHERE longColumn BETWEEN 0 AND 9323372036854775808.0"),
"Expression(type:FUNCTION, functionCall:Function(operator:BETWEEN, operands:[Expression(type:IDENTIFIER, "
+ "identifier:Identifier(name:longColumn)), Expression(type:LITERAL, literal:<Literal intValue:0>), "
+ "Expression(type:LITERAL, literal:<Literal longValue:9223372036854775807>)]))");

// Test DOUBLE literal rewrite for INT and LONG columns.
Assert.assertEquals(rewrite("SELECT * FROM testTable WHERE intColumn BETWEEN 2.5 AND 7.5"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1005,16 +1005,6 @@ public QueryFragment generatePredicate(String columnName, boolean useMultistageE
List<String> columnValues = _columnToValueList.get(columnName);
String leftValue = pickRandom(columnValues);
String rightValue = pickRandom(columnValues);

if (_singleValueNumericalColumnNames.contains(columnName)) {
// For numerical columns, make sure leftValue < rightValue.
if (Double.parseDouble(leftValue) > Double.parseDouble(rightValue)) {
String temp = leftValue;
leftValue = rightValue;
rightValue = temp;
}
}

return new StringQueryFragment(
String.format("%s BETWEEN %s AND %s", columnName, leftValue, rightValue),
String.format("`%s` BETWEEN %s AND %s", columnName, leftValue, rightValue));
Expand Down

0 comments on commit f05689d

Please sign in to comment.