Skip to content

Commit

Permalink
Handle invalid XML chars (#838)
Browse files Browse the repository at this point in the history
  • Loading branch information
martinothamar authored Oct 21, 2024
1 parent 531a4f3 commit 209f36f
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 9 deletions.
35 changes: 27 additions & 8 deletions src/Altinn.App.Core/Helpers/ObjectUtils.cs
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
using System.Collections;
using System.Reflection;
using System.Text.RegularExpressions;
using System.Xml.Serialization;

namespace Altinn.App.Core.Helpers;

/// <summary>
/// Utilities for working with model instances
/// </summary>
public static class ObjectUtils
public static partial class ObjectUtils
{
/// <summary>
/// Set empty Guid properties named "AltinnRowId" to a new random guid
Expand Down Expand Up @@ -137,14 +138,25 @@ public static void PrepareModelForXmlStorage(object model, int depth = 64)
SetToDefaultIfShouldSerializeFalse(model, prop, methodInfos);

// Set string properties with [XmlText] attribute to null if they are empty or whitespace
if (
value is string s
&& string.IsNullOrWhiteSpace(s)
&& prop.GetCustomAttribute<XmlTextAttribute>() is not null
)
if (value is string s)
{
// Ensure empty strings are set to null
prop.SetValue(model, null);
if (string.IsNullOrWhiteSpace(s) && prop.GetCustomAttribute<XmlTextAttribute>() is not null)
{
// Ensure empty strings are set to null
prop.SetValue(model, null);
}
else
{
if (prop.SetMethod is not null)
{
// If a property doesn't have a setter, it hopefully doesn't have user input,
// and therefore it far less likely to have invalid XML chars. If that were the case
// we will still just error out when serializing to XML

// Remove invalid xml characters
prop.SetValue(model, XmlInvalidCharsRegex().Replace(s, "\uFFFD")); // \uFFFD is the unicode replacement character �
}
}
}

// continue recursion over all properties that are NOT null or value types
Expand All @@ -155,6 +167,13 @@ value is string s
}
}

// Regex copied from: https://stackoverflow.com/a/961504
// Which is based on spec: https://www.w3.org/TR/xml/#charsets
[GeneratedRegex(
@"(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F\uFEFF\uFFFE\uFFFF]"
)]
private static partial Regex XmlInvalidCharsRegex();

private static void SetToDefaultIfShouldSerializeFalse(object model, PropertyInfo prop, MethodInfo[] methodInfos)
{
string methodName = $"ShouldSerialize{prop.Name}";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ public class StringMedORID
{ "\n\n", null },
{ "\n\na", "\n\na" },
{ "a\n\n", "a\n\n" },
{ "a\nb", "a\nb" }
{ "a\nb", "a\nb" },
};

[Theory]
Expand All @@ -132,6 +132,19 @@ public void TestPrepareForStorage(string? value, string? storedValue)
AssertObject(test, value, storedValue);
}

[Fact]
public void TestInvalidXmlCharsAreHandled()
{
var input = "'\u0002'"; // Represents start of text (␂)
var output = "'\uFFFD'"; // Represents replacement character (�)

var test = new YttersteObjekt { NormalString = input, };

ObjectUtils.PrepareModelForXmlStorage(test);

test.NormalString.Should().Be(output);
}

[Theory]
[MemberData(nameof(StringTests))]
public void TestSerializeDeserializeAsStorage(string? value, string? storedValue)
Expand Down

0 comments on commit 209f36f

Please sign in to comment.