Skip to content

Commit

Permalink
HPCC-30735 Check regular expressions are valid at compile time
Browse files Browse the repository at this point in the history
Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
  • Loading branch information
ghalliday committed Nov 7, 2023
1 parent c852637 commit 2d88fa2
Show file tree
Hide file tree
Showing 9 changed files with 110 additions and 0 deletions.
2 changes: 2 additions & 0 deletions ecl/hql/hqlerrors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,7 @@
#define HQLWRN_MergeInputIncompatible 3163
#define HQLWRN_MergeInputLastMissing 3164
#define HQLERR_MissingDelayedMember 3165
#define HQLERR_InvalidRegex 3166

#define HQLERR_DedupFieldNotFound_Text "Field removed from dedup could not be found"
#define HQLERR_CycleWithModuleDefinition_Text "Module definition contains an illegal cycle/recursive definition %s"
Expand Down Expand Up @@ -579,6 +580,7 @@
#define HQLERR_AlienUseData_Text "More efficient to use a DATA field than this custom alien type"
#define HQLERR_LibraryParamNoFunctions_Text "Library parameter '%s' cannot be a function"
#define HQLERR_MissingDelayedMember_Text "Module %s does not contain a member %s"
#define HQLERR_InvalidRegex_Text "%s"

/* parser error */
#define ERR_PARSER_CANNOTRECOVER 3005 /* The parser can not recover from previous error(s) */
Expand Down
1 change: 1 addition & 0 deletions ecl/hql/hqlgram.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,7 @@ class HqlGram : implements IErrorReceiver, public CInterface
IHqlExpression * getTargetPlatformExpr();
void normalizeExpression(attribute & expr);
void normalizeExpression(attribute & expr, type_t expectedType, bool isConstant, bool callAllowed=true);
void checkRegex(const attribute & pattern);

IHqlExpression * createListFromExprArray(const attribute & errpos, HqlExprArray & args);
IHqlExpression * normalizeExprList(const attribute & errpos, const HqlExprArray & values);
Expand Down
3 changes: 3 additions & 0 deletions ecl/hql/hqlgram.y
Original file line number Diff line number Diff line change
Expand Up @@ -6423,6 +6423,7 @@ primexpr1
| REGEXFIND '(' expression ',' expression regexOpt ')'
{
parser->normalizeExpression($3, type_stringorunicode, false);
parser->checkRegex($3);
if(isUnicodeType($3.queryExprType()))
parser->normalizeExpression($5, type_unicode, false);
else
Expand All @@ -6432,6 +6433,7 @@ primexpr1
| REGEXFIND '(' expression ',' expression ',' expression regexOpt ')'
{
parser->normalizeExpression($3, type_stringorunicode, false);
parser->checkRegex($3);
Owned<ITypeInfo> subType;
if(isUnicodeType($3.queryExprType()))
{
Expand All @@ -6449,6 +6451,7 @@ primexpr1
| REGEXFINDSET '(' expression ',' expression regexOpt ')'
{
parser->normalizeExpression($3, type_stringorunicode, false);
parser->checkRegex($3);
Owned<ITypeInfo> retType;
if(isUnicodeType($3.queryExprType()))
{
Expand Down
11 changes: 11 additions & 0 deletions ecl/hql/hqlgram2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4070,6 +4070,17 @@ void HqlGram::checkMaxCompatible(IHqlExpression * sortOrder, IHqlExpression * va
}


void HqlGram::checkRegex(const attribute & pattern)
{
Owned<IException> e = checkRegexSyntax(pattern.queryExpr());
if (e)
{
StringBuffer msg;
e->errorMessage(msg);
reportError(HQLERR_InvalidRegex, pattern.pos, HQLERR_InvalidRegex_Text, msg.str());
}
}

static void extractExtraImports(HqlExprCopyArray & extra, HqlLookupContext & lookupCtx, IHqlScope * scope, IHqlScope * baseScope)
{
HqlExprArray symbols;
Expand Down
34 changes: 34 additions & 0 deletions ecl/hql/hqlutil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10698,3 +10698,37 @@ IHqlExpression * queryAttributeModifier(ITypeInfo * type, IAtom * name)
}
return nullptr;
}


IException * checkRegexSyntax(IHqlExpression * expr)
{
if (expr)
{
IValue * value = expr->queryValue();
if (value)
{
try
{
if (isUnicodeType(expr->queryType()))
{
Owned<ITypeInfo> unknownVarUnicodeType = makeVarUnicodeType(UNKNOWN_LENGTH, nullptr);
Owned<IValue> castValue = value->castTo(unknownVarUnicodeType);
ICompiledUStrRegExpr * compiled = rtlCreateCompiledUStrRegExpr((const UChar *)castValue->queryValue(), false);
rtlDestroyCompiledUStrRegExpr(compiled);
}
else
{
Owned<ITypeInfo> unknownVarStringType = makeVarStringType(UNKNOWN_LENGTH);
Owned<IValue> castValue = value->castTo(unknownVarStringType);
ICompiledStrRegExpr * compiled = rtlCreateCompiledStrRegExpr((const char *)castValue->queryValue(), false);
rtlDestroyCompiledStrRegExpr(compiled);
}
}
catch (IException * e)
{
return e;
}
}
}
return nullptr;
}
3 changes: 3 additions & 0 deletions ecl/hql/hqlutil.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,9 @@ extern HQL_API bool splitResultValue(SharedHqlExpr & dataset, SharedHqlExpr & at
extern HQL_API bool isDependentOnParameter(IHqlExpression * expr);
extern HQL_API bool isTimed(IHqlExpression * expr);

//Check a regular expression return an exception if it is invalid
extern HQL_API IException * checkRegexSyntax(IHqlExpression * expr);

inline bool isInternalEmbedAttr(IAtom *name)
{
return name == languageAtom || name == projectedAtom || name == streamedAtom || name == _linkCounted_Atom ||
Expand Down
11 changes: 11 additions & 0 deletions ecl/hqlcpp/hqlhtcpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18454,6 +18454,17 @@ IHqlExpression * HqlCppTranslator::doBuildRegexCompileInstance(BuildCtx & ctx, I
match = declareCtx->queryMatchExpr(searchKey);
if (match)
return match->queryExpr();

//Most regexes will have been checked in the parser, but not if it is the result of a constant fold.
//Check again because an error thrown in a static constructor of a dynamically loaded dll can cause
//the process to abort HPCC-30735
Owned<IException> e = checkRegexSyntax(pattern);
if (e)
{
StringBuffer msg;
e->errorMessage(msg);
reportError(queryLocation(pattern), ECODETEXT(HQLERR_InvalidRegex), msg.str());
}
}
}
else
Expand Down
22 changes: 22 additions & 0 deletions ecl/regress/badregexfind.ecl
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*##############################################################################

HPCC SYSTEMS software Copyright (C) 2023 HPCC Systems®.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
############################################################################## */

string searchString := '' : stored('searchString');
unicode searchUnicode := U'' : stored('searchUnicode');

regexfind('[dolly', searchString);
regexfind(U'[dolly', searchUnicode);
23 changes: 23 additions & 0 deletions ecl/regress/badregexfind2.ecl
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*##############################################################################

HPCC SYSTEMS software Copyright (C) 2023 HPCC Systems®.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
############################################################################## */

string searchString := '' : stored('searchString');
unicode searchUnicode := U'' : stored('searchUnicode');

f(string p) := regexfind(p, searchString);

f('[dolly');

0 comments on commit 2d88fa2

Please sign in to comment.