Skip to content

Commit

Permalink
[CALCITE-5807] Add SUBSTRING_INDEX function (enabled in Spark library)
Browse files Browse the repository at this point in the history
  • Loading branch information
hujianhong committed Sep 6, 2024
1 parent 15f4ef9 commit 13ecfb0
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@
import static org.apache.calcite.sql.fun.SqlLibraryOperators.STRCMP;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.STR_TO_MAP;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.TAND;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.SUBSTRING_INDEX;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.TANH;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.TIME;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.TIMESTAMP;
Expand Down Expand Up @@ -923,6 +924,7 @@ Builder populate2() {
defineMethod(MAP_FROM_ARRAYS, BuiltInMethod.MAP_FROM_ARRAYS.method, NullPolicy.ANY);
defineMethod(MAP_FROM_ENTRIES, BuiltInMethod.MAP_FROM_ENTRIES.method, NullPolicy.STRICT);
map.put(STR_TO_MAP, new StringToMapImplementor());
defineMethod(SUBSTRING_INDEX, BuiltInMethod.SUBSTRING_INDEX.method, NullPolicy.STRICT);
map.put(ARRAY_CONCAT, new ArrayConcatImplementor());
map.put(SORT_ARRAY, new SortArrayImplementor());
final MethodImplementor isEmptyImplementor =
Expand Down
49 changes: 49 additions & 0 deletions core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
Original file line number Diff line number Diff line change
Expand Up @@ -5893,6 +5893,55 @@ public static Map strToMap(String string, String stringDelimiter, String keyValu
return map;
}

/** Support the SUBSTRING_INDEX function. */
public static String substringIndex(String string, String delimiter, int count) {
if (string.isEmpty() || count == 0) {
return "";
}
if (count > 0) {
int idx = -1;
while (count > 0) {
idx = string.indexOf(delimiter, idx + 1);
if (idx >= 0) {
count--;
} else {
// can not find enough delim
return string;
}
}
if (idx == 0) {
return "";
}
return string.substring(0, idx);
} else {
int idx = string.length() - delimiter.length() + 1;
count = -count;
while (count > 0) {
idx = rfind(string, delimiter, idx - 1);
if (idx >= 0) {
count--;
} else {
return string;
}
}
if (idx + delimiter.length() == string.length()) {
return "";
}
return string.substring(idx + delimiter.length());
}
}

/** Find the string from right to left. */
private static int rfind(String string, String delim, int start) {
while (start >= 0) {
if (string.indexOf(delim, start) >= 0) {
return start;
}
start -= 1;
}
return -1;
}

/** Support the SLICE function. */
public static List slice(List list) {
List result = new ArrayList(list.size());
Expand Down
3 changes: 3 additions & 0 deletions core/src/main/java/org/apache/calcite/sql/SqlKind.java
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,9 @@ public enum SqlKind {
/** {@code STR_TO_MAP} function (Spark semantics). */
STR_TO_MAP,

/** {@code SUBSTRING_INDEX} function (Spark semantics). */
SUBSTRING_INDEX,

/** {@code REVERSE} function (SQL Server, MySQL). */
REVERSE,

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1755,6 +1755,14 @@ private static RelDataType deriveTypeMapFromEntries(SqlOperatorBinding opBinding
ReturnTypes.IDENTITY_TO_MAP_NULLABLE,
OperandTypes.STRING_OPTIONAL_STRING_OPTIONAL_STRING);

/** The "SUBSTRING_INDEX(string, delimiter, count)" function. */
@LibraryOperator(libraries = {SPARK})
public static final SqlFunction SUBSTRING_INDEX =
SqlBasicFunction.create(SqlKind.SUBSTRING_INDEX,
ReturnTypes.ARG0_NULLABLE_VARYING,
OperandTypes.STRING_STRING_INTEGER)
.withFunctionType(SqlFunctionCategory.STRING);

@LibraryOperator(libraries = {BIG_QUERY, MYSQL})
public static final SqlFunction REVERSE =
SqlBasicFunction.create(SqlKind.REVERSE,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -858,6 +858,7 @@ public enum BuiltInMethod {
MAP_FROM_ARRAYS(SqlFunctions.class, "mapFromArrays", List.class, List.class),
MAP_FROM_ENTRIES(SqlFunctions.class, "mapFromEntries", List.class),
STR_TO_MAP(SqlFunctions.class, "strToMap", String.class, String.class, String.class),
SUBSTRING_INDEX(SqlFunctions.class, "substringIndex", String.class, String.class, int.class),
SELECTIVITY(Selectivity.class, "getSelectivity", RexNode.class),
UNIQUE_KEYS(UniqueKeys.class, "getUniqueKeys", boolean.class),
AVERAGE_ROW_SIZE(Size.class, "averageRowSize"),
Expand Down
1 change: 1 addition & 0 deletions site/_docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -2864,6 +2864,7 @@ In the following:
| s | MAP_FROM_ARRAYS(array1, array2) | Returns a map created from an *array1* and *array2*. Note that the lengths of two arrays should be the same and calcite is using the LAST_WIN strategy
| s | MAP_FROM_ENTRIES(arrayOfRows) | Returns a map created from an arrays of row with two fields. Note that the number of fields in a row must be 2. Note that calcite is using the LAST_WIN strategy
| s | STR_TO_MAP(string [, stringDelimiter [, keyValueDelimiter]]) | Returns a map after splitting the *string* into key/value pairs using delimiters. Default delimiters are ',' for *stringDelimiter* and ':' for *keyValueDelimiter*. Note that calcite is using the LAST_WIN strategy
| s | SUBSTRING_INDEX(string, delim, count) | Returns the substring from *string* before *count* occurrences of the delimiter *delim*. If *count* is positive, everything to the left of the final delimiter (counting from the left) is returned. If *count* is negative, everything to the right of the final delimiter (counting from the right) is returned. The function substring_index performs a case-sensitive match when searching for *delim*.
| b m p r s | MD5(string) | Calculates an MD5 128-bit checksum of *string* and returns it as a hex string
| m | MONTHNAME(date) | Returns the name, in the connection's locale, of the month in *datetime*; for example, it returns '二月' for both DATE '2020-02-10' and TIMESTAMP '2020-02-10 10:10:10'
| o r s | NVL(value1, value2) | Returns *value1* if *value1* is not null, otherwise *value2*
Expand Down
43 changes: 43 additions & 0 deletions testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -8725,6 +8725,49 @@ void checkArrayReverseFunc(SqlOperatorFixture f0, SqlFunction function,
f.checkNull("str_to_map('a:1,b:2,c:3', ',',null)");
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-5807">[CALCITE-5807]
* Add SUBSTRING_INDEX function (enabled in Spark library).</a>.
*/
@Test void testSubstringIndexFunc() {
final SqlOperatorFixture f0 = fixture();
f0.setFor(SqlLibraryOperators.SUBSTRING_INDEX);
f0.checkFails("^substring_index('a', ',')^",
"No match found for function signature SUBSTRING_INDEX\\("
+ "<CHARACTER>, <CHARACTER>\\)", false);

final SqlOperatorFixture f = f0.withLibrary(SqlLibrary.SPARK);
f.checkString("substring_index('www.apache.org', '.', 2)",
"www.apache", "VARCHAR(14) NOT NULL");
f.checkString("substring_index('www.apache.org', '.', 1)",
"www", "VARCHAR(14) NOT NULL");
f.checkString("substring_index('www.apache.org', '.', 3)",
"www.apache.org", "VARCHAR(14) NOT NULL");
f.checkString("substring_index('www.apache.org', '.', -1)",
"org", "VARCHAR(14) NOT NULL");

f.checkString("substring_index('aBc', 'B', -1)",
"c", "VARCHAR(3) NOT NULL");
f.checkString("substring_index('aBc', 'b', -1)",
"aBc", "VARCHAR(3) NOT NULL");

f.checkString("substring_index('aBc', 'B', 0)",
"", "VARCHAR(3) NOT NULL");
f.checkString("substring_index('aBc', 'b', 0)",
"", "VARCHAR(3) NOT NULL");

f.checkNull("substring_index(cast(null as varchar(1)),"
+ " cast(null as varchar(1)), cast(null as integer))");
f.checkNull("substring_index(cast(null as varchar(1)),"
+ " cast(null as varchar(1)), 2)");
f.checkNull("substring_index('abc', cast(null as varchar(1)),"
+ " cast(null as integer))");
f.checkNull("substring_index(cast(null as varchar(1)), '.',"
+ " cast(null as integer))");
f.checkNull("substring_index('abc', '.', cast(null as integer))");
f.checkNull("substring_index('abc', cast(null as varchar(1)), 2)");
}

/** Tests {@code UNIX_SECONDS} and other datetime functions from BigQuery. */
@Test void testUnixSecondsFunc() {
SqlOperatorFixture f = fixture()
Expand Down

0 comments on commit 13ecfb0

Please sign in to comment.