Skip to content

Commit

Permalink
Add missing numeric precisions in Hive type system (#501)
Browse files Browse the repository at this point in the history
* add precision for int and bigint for hive type system

* use static final variables for precisions

* spotless check

* fix converting type to typespec bug

* add UT

* add tinyint and smallint precisions

* spotless

* spotless

* spotless

* new UTs

* add tableInt to rel to trino tests

* spotless

* remove redundant test
  • Loading branch information
KevinGe00 authored Jul 25, 2024
1 parent f817412 commit d1d5b1e
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 13 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2017-2023 LinkedIn Corporation. All rights reserved.
* Copyright 2017-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand All @@ -12,7 +12,7 @@
import org.apache.calcite.sql.type.SqlTypeUtil;


// Copied from Hive source code
// Precision and scale values copied from Hive source code
public class HiveTypeSystem extends RelDataTypeSystemImpl {
// TODO: This should come from type system; Currently there is no definition
// in type system for this.
Expand All @@ -29,6 +29,10 @@ public class HiveTypeSystem extends RelDataTypeSystemImpl {
private static final int DEFAULT_CHAR_PRECISION = 255;
private static final int MAX_BINARY_PRECISION = Integer.MAX_VALUE;
private static final int MAX_TIMESTAMP_PRECISION = 9;
private static final int DEFAULT_TINYINT_PRECISION = 3;
private static final int DEFAULT_SMALLINT_PRECISION = 5;
private static final int DEFAULT_INTEGER_PRECISION = 10;
private static final int DEFAULT_BIGINT_PRECISION = 19;

@Override
public int getMaxScale(SqlTypeName typeName) {
Expand Down Expand Up @@ -84,6 +88,14 @@ public int getDefaultPrecision(SqlTypeName typeName) {
case INTERVAL_MINUTE_SECOND:
case INTERVAL_SECOND:
return SqlTypeName.DEFAULT_INTERVAL_START_PRECISION;
case TINYINT:
return DEFAULT_TINYINT_PRECISION;
case SMALLINT:
return DEFAULT_SMALLINT_PRECISION;
case INTEGER:
return DEFAULT_INTEGER_PRECISION;
case BIGINT:
return DEFAULT_BIGINT_PRECISION;
default:
return -1;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2017-2023 LinkedIn Corporation. All rights reserved.
* Copyright 2017-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand Down Expand Up @@ -622,6 +622,44 @@ public void testNameSakeColumnNamesShouldGetUniqueIdentifiers() {
assertEquals(generated, expected);
}

@Test
public void testUnionIntAndBigInt() {
final String sql = "SELECT a FROM test.tableOne UNION ALL SELECT bigint_col FROM test.tableInt";
RelNode rel = converter.convertSql(sql);
assertEquals(rel.getRowType().getFieldCount(), 1);
// Should be BIGINT since it is a less restrictive type than INT
assertEquals(rel.getRowType().getFieldList().get(0).getType().getSqlTypeName(), SqlTypeName.BIGINT);
}

@Test
public void testUnionIntAndSmallInt() {
final String sql = "SELECT smallint_col FROM test.tableInt UNION ALL SELECT a FROM test.tableOne";
RelNode rel = converter.convertSql(sql);
assertEquals(rel.getRowType().getFieldCount(), 1);
// Should be INT since it is a less restrictive type than SMALLINT
assertEquals(rel.getRowType().getFieldList().get(0).getType().getSqlTypeName(), SqlTypeName.INTEGER);
}

@Test
public void testUnionIntAndTinyInt() {
final String sql = "SELECT tinyint_col FROM test.tableInt UNION ALL SELECT a FROM test.tableOne";
RelNode rel = converter.convertSql(sql);
assertEquals(rel.getRowType().getFieldCount(), 1);
// Should be INT since it is a less restrictive type than TINYINT
assertEquals(rel.getRowType().getFieldList().get(0).getType().getSqlTypeName(), SqlTypeName.INTEGER);
}

@Test
public void testIntCastToBigIntDuringComparison() {
// We're testing that a comparison between INT and BIGINT sees a cast on the more restrictive type to the
// less restrictive type and not the other way around. In other words, the INT is cast to BIGINT.
final String sql = "SELECT CASE WHEN int_col = bigint_col THEN 'abc' ELSE 'def' END FROM test.tableInt";
String expected = "LogicalProject(EXPR$0=[CASE(=(CAST($2):BIGINT, $3), 'abc', 'def')])\n"
+ " LogicalTableScan(table=[[hive, test, tableint]])\n";

assertEquals(relToString(sql), expected);
}

private String relToString(String sql) {
return RelOptUtil.toString(converter.convertSql(sql));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ public static TestHive setupDefaultHive(HiveConf conf) throws IOException {
driver.run("CREATE DATABASE IF NOT EXISTS test");
driver.run("CREATE TABLE IF NOT EXISTS test.tableOne(a int, b varchar(30), c double, d timestamp)");
driver.run("CREATE TABLE IF NOT EXISTS test.tableTwo(x int, y double)");
driver.run(
"CREATE TABLE IF NOT EXISTS test.tableInt(tinyint_col tinyint, smallint_col smallint, int_col int, bigint_col bigint)");

driver.run("CREATE DATABASE IF NOT EXISTS fuzzy_union");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,13 @@
import java.util.Set;

import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.sql.SqlBasicTypeNameSpec;
import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlDataTypeSpec;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.SqlTypeNameSpec;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.type.BasicSqlType;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.sql.type.SqlTypeUtil;

import com.linkedin.coral.common.HiveTypeSystem;
import com.linkedin.coral.common.calcite.CalciteUtil;
Expand Down Expand Up @@ -122,12 +120,6 @@ protected SqlCall transform(SqlCall sqlCall) {
}

private SqlCall castOperand(SqlNode operand, RelDataType relDataType) {
return SqlStdOperatorTable.CAST.createCall(ZERO, operand, getSqlDataTypeSpecForCasting(relDataType));
}

private SqlDataTypeSpec getSqlDataTypeSpecForCasting(RelDataType relDataType) {
final SqlTypeNameSpec typeNameSpec = new SqlBasicTypeNameSpec(relDataType.getSqlTypeName(),
relDataType.getPrecision(), relDataType.getScale(), null, ZERO);
return new SqlDataTypeSpec(typeNameSpec, ZERO);
return SqlStdOperatorTable.CAST.createCall(ZERO, operand, SqlTypeUtil.convertTypeToSpec(relDataType));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,9 @@ public static void initializeTablesAndViews(HiveConf conf) throws HiveException,
run(driver,
"CREATE TABLE IF NOT EXISTS test.tableFour(icol int, scol string, acol array<string>, mcol map<string, string>)");

run(driver,
"CREATE TABLE IF NOT EXISTS test.tableInt(tinyint_col tinyint, smallint_col smallint, int_col int, bigint_col bigint)");

}

public static HiveConf loadResourceHiveConf() {
Expand Down

0 comments on commit d1d5b1e

Please sign in to comment.