Skip to content

Commit

Permalink
Merge pull request #255 from mhutch/csharp-type-validation
Browse files Browse the repository at this point in the history
Fix validation of Using items with generics and escaping
  • Loading branch information
mhutch authored Sep 30, 2024
2 parents 4bd136a + 9720806 commit 2375548
Show file tree
Hide file tree
Showing 12 changed files with 332 additions and 86 deletions.
28 changes: 28 additions & 0 deletions MonoDevelop.MSBuild.Tests/Analyzers/CoreDiagnosticTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -656,5 +656,33 @@ public void SdkNameExpressionWithProperty ()
ignoreDiagnostics: [CoreDiagnostics.UnwrittenProperty]
);
}

[Test]
public void InvalidCSharpType ()
{
var source = TextWithMarkers.Parse (@"<Project>
<PropertyGroup>
<SomeType>|Foo-Bar|</SomeType>
<SomeType>System.Collections.Generic.Dictionary%3CMicrosoft.NET.Sdk.WorkloadManifestReader.WorkloadId, Microsoft.NET.Sdk.WorkloadManifestReader.WorkloadDefinition%3E</SomeType>
</PropertyGroup>
</Project>", '|');

var schema = new MSBuildSchema {
new PropertyInfo ("SomeType", "", MSBuildValueKind.CSharpType)
};

var spans = source.GetMarkedSpans ('|');

var expected = new MSBuildDiagnostic (
CoreDiagnostics.InvalidCSharpType,
spans[0],
messageArgs: [ "Foo-Bar" ]
);

VerifyDiagnostics (source.Text, out _,
includeCoreDiagnostics: true,
expectedDiagnostics: [ expected ],
schema: schema);
}
}
}
9 changes: 8 additions & 1 deletion MonoDevelop.MSBuild/Language/CoreDiagnostics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -511,10 +511,17 @@ class CoreDiagnostics
public const string InvalidClrType_Id = nameof (InvalidClrType);
public static readonly MSBuildDiagnosticDescriptor InvalidClrType = new (
InvalidClrType_Id,
"Invalid qualified .NET type name",
"Invalid .NET type name",
"The value `{0}` is not a valid qualified .NET type name string",
MSBuildDiagnosticSeverity.Error);

public const string InvalidCSharpType_Id = nameof (InvalidCSharpType);
public static readonly MSBuildDiagnosticDescriptor InvalidCSharpType = new (
InvalidCSharpType_Id,
"Invalid C# type name",
"The value `{0}` is not a valid qualified C# type name string",
MSBuildDiagnosticSeverity.Error);

public const string InvalidClrTypeName_Id = nameof (InvalidClrTypeName);
public static readonly MSBuildDiagnosticDescriptor InvalidClrTypeName = new (
InvalidClrTypeName_Id,
Expand Down
18 changes: 14 additions & 4 deletions MonoDevelop.MSBuild/Language/Expressions/ExpressionText.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,15 @@ namespace MonoDevelop.MSBuild.Language.Expressions
[DebuggerDisplay ("{Value} (IsPure: {IsPure})")]
public class ExpressionText : ExpressionNode
{
/// <summary>
/// The raw value of this text, including any whitespace and XML entities and MSBuild escape sequences.
/// </summary>
// TODO: audit access to this and make sure they should not be calling GetUnescapedValue instead
public string Value { get; }

/// <summary>
/// Gets the unescaped value of this text, optionally trimming whitespace.
/// Gets the unescaped value of this text, optionally trimming whitespace. This unescapes
/// both XML entities and MSBuild %XX escape sequences.
/// </summary>
/// <param name="trim">Whether to trim leading and trailing whitespace.</param>
/// <param name="trimmedOffset">The offset of the text, taking optional trimming into account.</param>
Expand Down Expand Up @@ -41,17 +46,22 @@ public string GetUnescapedValue (bool trim, out int trimmedOffset, out int escap
trimmedOffset = start + Offset;
}

return XmlEscaping.UnescapeEntities (value);
// TODO: technically there could be escaped whitespace, should we trim it too?
var xmlUnescaped = XmlEscaping.UnescapeEntities (value);
var msbuildUnescaped = Util.MSBuildEscaping.UnescapeString (xmlUnescaped);

return msbuildUnescaped;
}

/// <summary>
/// Indicates whether this exists by itself, as opposed to being concatenated with other values.
/// </summary>
public bool IsPure { get; }

public ExpressionText (int offset, string value, bool isPure) : base (offset, value.Length)
// TODO: add ctor that takes unescaped value and audit callers
public ExpressionText (int offset, string rawValue, bool isPure) : base (offset, rawValue.Length)
{
Value = value;
Value = rawValue;
IsPure = isPure;
}

Expand Down
24 changes: 9 additions & 15 deletions MonoDevelop.MSBuild/Language/MSBuildDocumentValidator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -857,22 +857,28 @@ void VisitPureLiteral (MSBuildElementSyntax elementSyntax, MSBuildAttributeSynta
break;
}
case MSBuildValueKind.ClrNamespace:
if (!IsValidTypeOrNamespace (value, out _)) {
if (!TypeNameValidation.IsValidClrNamespace (value)) {
AddErrorWithArgs (CoreDiagnostics.InvalidClrNamespace, value);
}
break;

case MSBuildValueKind.ClrType:
if (!IsValidTypeOrNamespace (value, out _)) {
if (!TypeNameValidation.IsValidClrTypeOrNamespace (value, out _, out _)) {
AddErrorWithArgs (CoreDiagnostics.InvalidClrType, value);
}
break;

case MSBuildValueKind.ClrTypeName:
if (!(IsValidTypeOrNamespace (value, out int componentCount) && componentCount == 1)) {
if (!TypeNameValidation.IsValidClrTypeName (value)) {
AddErrorWithArgs (CoreDiagnostics.InvalidClrTypeName, value);
}
break;

case MSBuildValueKind.CSharpType:
if (!TypeNameValidation.IsValidCSharpTypeOrNamespace (value)) {
AddErrorWithArgs (CoreDiagnostics.InvalidCSharpType, value);
}
break;
}

void AddErrorWithArgs (MSBuildDiagnosticDescriptor d, params object[] args) => Diagnostics.Add (d, new TextSpan (trimmedOffset, escapedLength), args);
Expand All @@ -889,18 +895,6 @@ void AddMisspelledValueError (MSBuildDiagnosticDescriptor d, params object[] arg
args
);
}

static bool IsValidTypeOrNamespace (string value, out int componentCount)
{
string[] components = value.Split ('.');
componentCount = components.Length;
foreach (var component in components) {
if (!System.CodeDom.Compiler.CodeGenerator.IsValidLanguageIndependentIdentifier (component)) {
return false;
}
}
return true;
}
}

void CheckPropertyWrite (PropertyInfo resolvedProperty, TextSpan span)
Expand Down
230 changes: 230 additions & 0 deletions MonoDevelop.MSBuild/Language/TypeNameValidation.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

#if NETCOREAPP
#nullable enable
#endif

using System.Globalization;

namespace MonoDevelop.MSBuild.Language;

static class TypeNameValidation
{
public static bool IsValidCSharpTypeOrNamespace (string value) => IsValidCSharpTypeOrNamespace(value, out _, out _);

public static bool IsValidCSharpTypeOrNamespace (string value, out int componentCount, out bool isGenericType)
{
int offset = 0;
if (!ConsumeCSharpGenericTypeOrNamespace(value, ref offset, out componentCount, out isGenericType)) {
return false;
}
return offset == value.Length;
}


static bool ConsumeCSharpGenericTypeOrNamespace (string value, ref int offset, out int componentCount, out bool isGenericType)
{
isGenericType = false;

componentCount = ConsumeIdentifiers(value, ref offset);

if (componentCount == 0) {
return false;
}

if (offset == value.Length) {
return true;
}

// try to consume generics in the form SomeType<SomeOtherType,AnotherType>
if (value[offset] != '<') {
// complete but not a generic type, we are done consuming, caller decides what to do with next char
return true;
}
offset++;

isGenericType = true;
do {
// Ignore whitespace after a comma as that's pretty common e.g. SomeType<SomeOtherType, AnotherType>
// and is valid if this is going to be injected verbatim into a C# file.
// We could be more strict and disallow whitespace, but that will likely trip people up.
// We could also be more liberal and allow general whitespace around the angle brackets and commas,
// but that's a bit of a rabbit hole and not likely to be very useful.
if (value[offset] == ',') {
offset++;
ConsumeSpaces(value, ref offset);
}
if (!ConsumeCSharpGenericTypeOrNamespace(value, ref offset, out int consumedComponents, out _)) {
return false;
}
} while (value[offset] == ',');

if (offset < value.Length && value[offset] == '>') {
offset++;
return true;
}

return false;
}

static void ConsumeSpaces(string value, ref int offset)
{
while (value[offset] == ' ') {
offset++;
}
}

public static bool IsValidClrNamespace (string value) => IsValidClrTypeOrNamespace(value, out _, out var hasGenerics) && !hasGenerics;

public static bool IsValidClrTypeName (string value) => IsValidClrTypeOrNamespace(value, out var componentCount, out var hasGenerics) && componentCount == 1 && !hasGenerics;

public static bool IsValidClrTypeOrNamespace (string value) => IsValidClrTypeOrNamespace(value, out _, out _);

public static bool IsValidClrTypeOrNamespace (string value, out int componentCount, out bool isGenericType)
{
int offset = 0;
if (!ConsumeClrGenericTypeOrNamespace(value, ref offset, out componentCount, out isGenericType)) {
return false;
}
return offset == value.Length;
}

static bool ConsumeClrGenericTypeOrNamespace (string value, ref int offset, out int componentCount, out bool isGenericType)
{
isGenericType = false;

componentCount = ConsumeIdentifiers(value, ref offset);

if (componentCount == 0) {
return false;
}

if (offset == value.Length) {
return true;
}

// try to consume generics in the form SomeType`2[SomeOtherType,AnotherType]
if (value[offset] != '`') {
// complete but not a generic type, we are done consuming, caller decides what to do with next char
return true;
}
offset++;

int argCountOffset = offset;
if(!ConsumeNumbers(value, ref offset) || !int.TryParse(value.Substring(argCountOffset, offset - argCountOffset), out int genericArgCount)) {
return false;
}

isGenericType = true;
if (value[offset++] != '[') {
return false;
}

for (int i = 0; i < genericArgCount; i++) {
// recursively consume the generic type arguments
if (!ConsumeClrGenericTypeOrNamespace(value, ref offset, out int consumedComponents, out _)) {
return false;
}
if (i < genericArgCount - 1) {
if (offset < value.Length && value[offset] == ',') {
offset++;
} else {
return false;
}
}
}

if (offset < value.Length && value[offset] == ']') {
offset++;
return true;
}

return false;
}

static bool ConsumeNumbers (string value, ref int offset)
{
int start = offset;
while(char.IsDigit(value[offset])) {
offset++;
}
return offset > start;
}

static int ConsumeIdentifiers (string value, ref int offset)
{
int componentCount = 0;

while(ConsumeIdentifier(value, ref offset)) {
componentCount++;
if (offset >= value.Length || value[offset] != '.') {
return componentCount;
}
offset++;
}

if (componentCount > 0 && value[offset - 1] == '.') {
// we consumed a dot but no identifier followed it, so walk it back
offset--;
}

return componentCount;
}

static bool ConsumeIdentifier (string value, ref int offset)
{
bool nextMustBeStartChar = true;

while (offset < value.Length) {
char ch = value[offset];
// based on https://github.com/dotnet/runtime/blob/96be510135829ff199c6c341ad461c36bab4809e/src/libraries/Common/src/System/CSharpHelpers.cs#L141
UnicodeCategory uc = CharUnicodeInfo.GetUnicodeCategory(ch);
switch (uc) {
case UnicodeCategory.UppercaseLetter: // Lu
case UnicodeCategory.LowercaseLetter: // Ll
case UnicodeCategory.TitlecaseLetter: // Lt
case UnicodeCategory.ModifierLetter: // Lm
case UnicodeCategory.LetterNumber: // Lm
case UnicodeCategory.OtherLetter: // Lo
nextMustBeStartChar = false;
offset++;
continue;

case UnicodeCategory.NonSpacingMark: // Mn
case UnicodeCategory.SpacingCombiningMark: // Mc
case UnicodeCategory.ConnectorPunctuation: // Pc
case UnicodeCategory.DecimalDigitNumber: // Nd
// Underscore is a valid starting character, even though it is a ConnectorPunctuation.
if (nextMustBeStartChar && ch != '_')
return false;
offset++;
continue;

default:
// unlike CSharpHelpers.cs, don't check IsSpecialTypeChar here because we're only looking
// for identifier components that are valid for all languages, and generic syntax is
// handled separately.
//
break;
}
break;
}

// return true if we have been able to consume a valid identifier. it need
// not be the entire string.
return !nextMustBeStartChar;
}

public static bool IsValidCSharpType (string value, out int componentCount)
{
string[] components = value.Split ('.');
componentCount = components.Length;
foreach (var component in components) {
if (!System.CodeDom.Compiler.CodeGenerator.IsValidLanguageIndependentIdentifier (component)) {
return false;
}
}
return true;
}
}
Loading

0 comments on commit 2375548

Please sign in to comment.