Skip to content

Commit

Permalink
Validate the header before reading a large grammar file.
Browse files Browse the repository at this point in the history
Saves some allocations if the file to load is not a grammar file.
This is done only on modern frameworks.
  • Loading branch information
teo-tsirpanis committed Oct 31, 2023
1 parent 1d20533 commit 1f802ea
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 3 deletions.
46 changes: 46 additions & 0 deletions src/FarkleNeo/Compatibility/StreamCompat.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Copyright © Theodore Tsirpanis and Contributors.
// SPDX-License-Identifier: MIT

// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

#if (NETCOREAPP || NETSTANDARD2_1_OR_GREATER) && !NET7_0_OR_GREATER
namespace System.IO
{
internal static class StreamCompat
{
public static int ReadAtLeast(this Stream stream, Span<byte> buffer, int minimumLength)
{
int totalRead = 0;
while (totalRead < minimumLength)
{
int n = stream.Read(buffer[totalRead..]);
if (n == 0)
{
break;
}
totalRead += n;
}
return totalRead;
}

public static void ReadExactly(this Stream stream, Span<byte> buffer)
{
while (!buffer.IsEmpty)
{
int n = stream.Read(buffer);
if (n == 0)
{
break;
}
buffer = buffer[n..];
}

if (!buffer.IsEmpty)
{
throw new EndOfStreamException();
}
}
}
}
#endif
24 changes: 21 additions & 3 deletions src/FarkleNeo/Grammars/Grammar.cs
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,27 @@ internal static Grammar CreateUnsafe(ImmutableArray<byte> grammarData)
public static Grammar CreateFromFile(string path)
{
ArgumentNullExceptionCompat.ThrowIfNull(path);
// TODO-PERF: Consider reading a part of the file at the beginning
// to validate the header, and then reading all of it.
ImmutableArray<byte> data = ImmutableCollectionsMarshal.AsImmutableArray(File.ReadAllBytes(path));
ImmutableArray<byte> data;
#if NETCOREAPP || NETSTANDARD2_1_OR_GREATER
// If the file is very big, read only a part of it to make
// sure it has a valid header, before reading the entire file.
using (Stream file = File.OpenRead(path))
{
if (file.Length > 4096)
{
Span<byte> buffer = stackalloc byte[GrammarHeader.MinHeaderDisambiguatorSize];
int nRead = file.ReadAtLeast(buffer, buffer.Length);
GrammarHeader header = GrammarHeader.Read(buffer);
ValidateHeader(header);
file.Position = 0;
}
byte[] dataArray = new byte[file.Length];
file.ReadExactly(dataArray);
data = ImmutableCollectionsMarshal.AsImmutableArray(dataArray);
}
#else
data = ImmutableCollectionsMarshal.AsImmutableArray(File.ReadAllBytes(path));
#endif
return Create(data);
}

Expand Down
17 changes: 17 additions & 0 deletions src/FarkleNeo/Grammars/GrammarHeader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-License-Identifier: MIT

using Farkle.Buffers;
using System.Diagnostics;
using System.Runtime.InteropServices;
using static Farkle.Grammars.GrammarConstants;

Expand All @@ -15,6 +16,22 @@ internal readonly struct GrammarHeader(ushort versionMajor, ushort versionMinor,
/// </summary>
private const int VersionIndependentHeaderSize = sizeof(ulong) + 2 * sizeof(ushort);

/// <summary>
/// The smallest number of bytes necessary to read from
/// the start of a Farkle grammar file to determine its type.
/// </summary>
public static int MinHeaderDisambiguatorSize => EgtNeoHeader.Length;

#if DEBUG
static GrammarHeader()
{
Debug.Assert(MinHeaderDisambiguatorSize >= VersionIndependentHeaderSize);
Debug.Assert(MinHeaderDisambiguatorSize >= CgtHeader.Length);
Debug.Assert(MinHeaderDisambiguatorSize >= EgtHeader.Length);
Debug.Assert(MinHeaderDisambiguatorSize >= EgtNeoHeader.Length);
}
#endif

public ushort VersionMajor { get; private init; } = versionMajor;
public ushort VersionMinor { get; private init; } = versionMinor;
public uint StreamCount { get; private init; } = streamCount;
Expand Down

0 comments on commit 1f802ea

Please sign in to comment.