Skip to content

Commit

Permalink
refactor: use tryFindOrCompile instead of findOrCompile for regex [1/…
Browse files Browse the repository at this point in the history
…n] (facebookincubator#12234)

Summary:

When evaluating regexes and the regex throws, we can simply capture the exception inside a status instead of doing a throw which is extremely expensive. This change just uses the pre-existing expected API for error handling. This allows queries which times out after 2 hours on an operator to finish in 47 minutes.

Differential Revision: D68983392
  • Loading branch information
yuandagits authored and facebook-github-bot committed Feb 1, 2025
1 parent 8a3aa63 commit c9920bc
Showing 1 changed file with 32 additions and 5 deletions.
37 changes: 32 additions & 5 deletions velox/functions/lib/Re2Functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,12 @@ class Re2Match final : public exec::VectorFunction {
exec::LocalDecodedVector toSearch(context, *args[0], rows);
exec::LocalDecodedVector pattern(context, *args[1], rows);
context.applyToSelectedNoThrow(rows, [&](vector_size_t row) {
auto& re = *cache_.findOrCompile(pattern->valueAt<StringView>(row));
auto tryRe = cache_.tryFindOrCompile(pattern->valueAt<StringView>(row));
if (tryRe.hasError()) {
context.setStatus(row, tryRe.error());
return;
}
const auto& re = *tryRe.value();
result.set(row, Fn(toSearch->valueAt<StringView>(row), re));
});
}
Expand Down Expand Up @@ -394,15 +399,27 @@ class Re2SearchAndExtract final : public exec::VectorFunction {
if (args.size() == 2) {
groups.resize(1);
context.applyToSelectedNoThrow(rows, [&](vector_size_t i) {
auto& re = *cache_.findOrCompile(pattern->valueAt<StringView>(i));
auto tryRe = cache_.tryFindOrCompile(pattern->valueAt<StringView>(i));
if (tryRe.hasError()) {
context.setStatus(i, tryRe.error());
return;
}
const auto& re = *tryRe.value();

mustRefSourceStrings |=
re2Extract(result, i, re, toSearch, groups, 0, emptyNoMatch_);
});
} else {
exec::LocalDecodedVector groupIds(context, *args[2], rows);
context.applyToSelectedNoThrow(rows, [&](vector_size_t i) {
const auto groupId = groupIds->valueAt<T>(i);
auto& re = *cache_.findOrCompile(pattern->valueAt<StringView>(i));
auto tryRe = cache_.tryFindOrCompile(pattern->valueAt<StringView>(i));
if (tryRe.hasError()) {
context.setStatus(i, tryRe.error());
return;
}

const auto& re = *tryRe.value();
checkForBadGroupId(groupId, re);
groups.resize(groupId + 1);
mustRefSourceStrings |=
Expand Down Expand Up @@ -1195,7 +1212,12 @@ class Re2ExtractAll final : public exec::VectorFunction {
//
groups.resize(1);
context.applyToSelectedNoThrow(rows, [&](vector_size_t row) {
auto& re = *cache_.findOrCompile(pattern->valueAt<StringView>(row));
auto tryRe = cache_.tryFindOrCompile(pattern->valueAt<StringView>(row));
if (tryRe.hasError()) {
context.setStatus(row, tryRe.error());
return;
}
const auto& re = *tryRe.value();
re2ExtractAll(resultWriter, re, inputStrs, row, groups, 0);
});
} else {
Expand All @@ -1204,7 +1226,12 @@ class Re2ExtractAll final : public exec::VectorFunction {
exec::LocalDecodedVector groupIds(context, *args[2], rows);
context.applyToSelectedNoThrow(rows, [&](vector_size_t row) {
const T groupId = groupIds->valueAt<T>(row);
auto& re = *cache_.findOrCompile(pattern->valueAt<StringView>(row));
auto tryRe = cache_.tryFindOrCompile(pattern->valueAt<StringView>(row));
if (tryRe.hasError()) {
context.setStatus(row, tryRe.error());
return;
}
const auto& re = *tryRe.value();
checkForBadGroupId(groupId, re);
groups.resize(groupId + 1);
re2ExtractAll(resultWriter, re, inputStrs, row, groups, groupId);
Expand Down

0 comments on commit c9920bc

Please sign in to comment.