Skip to content

Commit

Permalink
OF-2628: Avoid polynomial regular expression used on uncontrolled data
Browse files Browse the repository at this point in the history
Prevent possibility of regular expression execution depending on user-provided values taking exceptionally long / many resources to complete

This commit replaces the regular expression with a solution that doesn't use regular expressions. Arguably, the complexity of this code does not change much.
  • Loading branch information
guusdk committed Jul 18, 2023
1 parent 62be095 commit f290e0c
Showing 1 changed file with 31 additions and 31 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2005-2008 Jive Software. All rights reserved.
* Copyright (C) 2005-2008 Jive Software, 2023 Ignite Realtime Foundation. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -23,8 +23,6 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.mina.core.buffer.IoBuffer;
import org.apache.mina.filter.codec.ProtocolDecoderException;
Expand All @@ -44,9 +42,6 @@
* @author Gaston Dombiak
*/
class XMLLightweightParser {

private static final Pattern XML_HAS_CHARREF = Pattern.compile("&#(0*([0-9]+)|[xX]0*([0-9a-fA-F]+));");

private static final String MAX_PROPERTY_NAME = "xmpp.parser.buffer.size";
private static int maxBufferSize;
// Chars that rappresent CDATA section start
Expand Down Expand Up @@ -400,37 +395,41 @@ else if (ch == '/' && head.length() > 0) {
* The input string
* @return {@code true} if the input string contains an invalid numeric character reference, {@code false}
* otherwise.
* @see http://www.w3.org/TR/2008/REC-xml-20081126/#dt-charref
* @see <a href="https://www.w3.org/TR/2008/REC-xml-20081126/#dt-charref">Definition of a character reference</a>
*/
public static boolean hasIllegalCharacterReferences(String string) {
// If there's no character reference, don't bother to do more specific checking.
final Matcher matcher = XML_HAS_CHARREF.matcher(string);

while (matcher.find()) {
final String decValue = matcher.group(2);
if (decValue != null) {
final int value = Integer.parseInt(decValue);
if (!isLegalXmlCharacter(value)) {
return true;
} else {
continue;
}
int needle = 0;
while (needle < string.length()) {
final int start = string.indexOf("&#", needle);
if (start == -1) {
return false;
}
final int end = string.indexOf(";", start + 2);
if (end == -1) {
return false;
}
needle = end;

final boolean isHex = string.charAt(start + 2) == 'x' || string.charAt(start + 2) == 'X';

final String candidate;
final int radix;
if (isHex) {
candidate = string.substring(start + 3, end);
radix = 16;
} else {
candidate = string.substring(start + 2, end);
radix = 10;
}

final String hexValue = matcher.group(3);
if (hexValue != null) {
final int value = Integer.parseInt(hexValue, 16);
try {
final int value = Integer.parseInt(candidate, radix);
if (!isLegalXmlCharacter(value)) {
return true;
} else {
continue;
}
} catch (NumberFormatException e) {
// The 'candidate' value wasn't a numeric character reference.
}

// This is bad. The XML_HAS_CHARREF expression should have a hit for either the decimal
// or the heximal notation.
throw new IllegalStateException(
"An error occurred while searching for illegal character references in the value [" + string + "].");
}

return false;
Expand All @@ -442,8 +441,9 @@ public static boolean hasIllegalCharacterReferences(String string) {
*
* @param value
* the codepoint
* @return {@code true} if the codepoint is a valid charater per XML 1.0 definition, {@code false} otherwise.
* @see http://www.w3.org/TR/2008/REC-xml-20081126/#NT-Char
* @return {@code true} if the codepoint is a valid character per XML 1.0 definition, {@code false} otherwise.
*
* @see <a href="https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Char">Definition of a characters range</a>
*/
public static boolean isLegalXmlCharacter(int value) {
return value == 0x9 || value == 0xA || value == 0xD || (value >= 0x20 && value <= 0xD7FF)
Expand Down

0 comments on commit f290e0c

Please sign in to comment.