Skip to content

Commit

Permalink
Always provide an meaningful exception message when CSV parsing fails
Browse files Browse the repository at this point in the history
When parsing failed the exception message was previous the generic "Exception of type 'Sylvan.Data.Csv.CsvFormatException' was thrown", which gave no clue as to what was wrong. Provide a good exception message in each case where this exception is thrown, including the position where parsing failed, a snippet of the input (which can be searched for to locate the problematic input easily) and what was wrong with it. Update unit tests to check for the new exception messages.
  • Loading branch information
loop-evgeny committed Jan 7, 2025
1 parent c14be86 commit 1864087
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 31 deletions.
49 changes: 33 additions & 16 deletions source/Sylvan.Data.Csv.Tests/CsvDataReaderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,8 @@ public void Quoted2()
[Fact]
public void MissingHeaders()
{
Assert.Throws<CsvMissingHeadersException>(() => CsvDataReader.Create(new StringReader("")));
var ex = Assert.Throws<CsvMissingHeadersException>(() => CsvDataReader.Create(new StringReader("")));
Assert.StartsWith("The CSV file does not have headers", ex.Message);
}

[Fact]
Expand Down Expand Up @@ -491,7 +492,8 @@ public void BufferTooSmall()
using var tr = File.OpenText("Data/Binary.csv");
var csv = CsvDataReader.Create(tr, opts);
csv.Read();
Assert.Throws<CsvRecordTooLargeException>(() => csv.Read());
var ex = Assert.Throws<CsvRecordTooLargeException>(() => csv.Read());
Assert.StartsWith("Row 2 was too large.", ex.Message);
}

[Fact]
Expand Down Expand Up @@ -871,6 +873,8 @@ public void BadQuoteFirstRow()
var ex = Assert.Throws<CsvFormatException>(() => csv.Read());

Assert.Equal(1, ex.RowNumber);
Assert.StartsWith("Found unexpected character 'C' in field 1 on row 1: C\n" + Environment.NewLine
+ "A delimiter (,), newline or EOF was expected", ex.Message);
}

[Fact]
Expand All @@ -879,6 +883,8 @@ public void BadQuoteHeader()
using var tr = new StringReader("Name,\"Va\"lue\nA,\"B\"C\n");
var ex = Assert.Throws<CsvFormatException>(() => CsvDataReader.Create(tr, new CsvDataReaderOptions { Schema = CsvSchema.Nullable }));
Assert.Equal(0, ex.RowNumber);
Assert.StartsWith("Found unexpected character 'l' in field 1 on row 0: lue\nA,\"B\"C\n" + Environment.NewLine
+ "A delimiter (,), newline or EOF was expected after a closing quote", ex.Message);
}

[Fact]
Expand All @@ -888,7 +894,9 @@ public void BadQuote()
var csv = CsvDataReader.Create(tr, new CsvDataReaderOptions { Schema = CsvSchema.Nullable });
Assert.True(csv.Read());
Assert.Equal("A\"\"B", csv.GetString(0));
Assert.Throws<CsvFormatException>(() => csv.Read());
var ex = Assert.Throws<CsvFormatException>(() => csv.Read());
Assert.StartsWith("Found unexpected character 'm' in field 1 on row 2: more,\n" + Environment.NewLine
+ "A delimiter (,), newline or EOF was expected", ex.Message);
}

[Fact]
Expand Down Expand Up @@ -1094,7 +1102,9 @@ public void QuoteHandling()
using var reader = new StringReader("Name\r\n\b\r\n\"quoted\"field,");
var csv = CsvDataReader.Create(reader);
Assert.True(csv.Read());
Assert.Throws<CsvFormatException>(() => csv.Read());
var ex = Assert.Throws<CsvFormatException>(() => csv.Read());
Assert.StartsWith("Found unexpected character 'f' in field 0 on row 2: field," + Environment.NewLine
+ "A delimiter (,), newline or EOF was expected after a closing quote", ex.Message);
}

[Fact]
Expand Down Expand Up @@ -1321,7 +1331,8 @@ public void TooLongHeaderThrows()
sw.Write(i);
}
var data = sw.ToString();
Assert.Throws<CsvRecordTooLargeException>(() => CsvDataReader.Create(new StringReader(data), new CsvDataReaderOptions { BufferSize = 0x1000 }));
var ex = Assert.Throws<CsvRecordTooLargeException>(() => CsvDataReader.Create(new StringReader(data), new CsvDataReaderOptions { BufferSize = 0x1000 }));
Assert.StartsWith("Row 0 was too large", ex.Message);
}


Expand All @@ -1343,7 +1354,8 @@ public void TooLongRecordThrows()
var data = sw.ToString();
var csv = CsvDataReader.Create(new StringReader(data), new CsvDataReaderOptions { BufferSize = 0x1000 });
csv.Read();
Assert.Throws<CsvRecordTooLargeException>(() => csv.Read());
var ex = Assert.Throws<CsvRecordTooLargeException>(() => csv.Read());
Assert.StartsWith("Row 2 was too large", ex.Message);
}

[Fact]
Expand Down Expand Up @@ -1440,7 +1452,8 @@ public void Issue_79_NoHeaders()
[Fact]
public void EmptyHeader()
{
Assert.Throws<CsvMissingHeadersException>(() => CsvDataReader.Create(new StringReader("")));
var ex = Assert.Throws<CsvMissingHeadersException>(() => CsvDataReader.Create(new StringReader("")));
Assert.StartsWith("The CSV file does not have headers", ex.Message);
}

[Fact]
Expand All @@ -1454,7 +1467,8 @@ public void EmptyNoHeader()
[Fact]
public async Task EmptyHeaderAsync()
{
await Assert.ThrowsAsync<CsvMissingHeadersException>(async () => await CsvDataReader.CreateAsync(new StringReader("")));
var ex = await Assert.ThrowsAsync<CsvMissingHeadersException>(async () => await CsvDataReader.CreateAsync(new StringReader("")));
Assert.StartsWith("The CSV file does not have headers", ex.Message);
}

[Fact]
Expand Down Expand Up @@ -1892,7 +1906,9 @@ public void EscapeEOF()
HasHeaders = false,
Escape = '\\',
});
Assert.Throws<CsvFormatException>(() => csv.Read());
var ex = Assert.Throws<CsvFormatException>(() => csv.Read());
Assert.Equal("Found unexpected character '\\' in field 0 on row 0: \\" + Environment.NewLine
+ "Escape character \\ was encountered at the end of the text.", ex.Message);
}

[Fact]
Expand Down Expand Up @@ -1968,13 +1984,13 @@ public void MacOSLineEndDetect()
[InlineData("a\"a\"a", true, "a\"a\"a")]
[InlineData("a\"\"\"a", true, "a\"\"\"a")]

[InlineData("\"a\"\"\"a\"", false, null)]
[InlineData("\"a\"a", false, null)]
[InlineData("\"a\"a\"a\"", false, null)]
[InlineData("\"\"a", false, null)]
[InlineData("\"\"a\"", false, null)]
[InlineData("\"\"\"", false, null)]
[InlineData("\"\"\"\"\"", false, null)]
[InlineData("\"a\"\"\"a\"", false, "Found unexpected character 'a' in field 0 on row 1: a\"")]
[InlineData("\"a\"a", false, "Found unexpected character 'a' in field 0 on row 1: a")]
[InlineData("\"a\"a\"a\"", false, "Found unexpected character 'a' in field 0 on row 1: a\"")]
[InlineData("\"\"a", false, "Found unexpected character 'a' in field 0 on row 1: a")]
[InlineData("\"\"a\"", false, "Found unexpected character 'a' in field 0 on row 1: a\"")]
[InlineData("\"\"\"", false, "A quoted field at the end of the input ends without a closing quote.")]
[InlineData("\"\"\"\"\"", false, "A quoted field at the end of the input ends without a closing quote.")]

public void Quotes(string data, bool valid, string expected)
{
Expand All @@ -1991,6 +2007,7 @@ public void Quotes(string data, bool valid, string expected)
{
var ex = Assert.Throws<CsvFormatException>(() => csv.Read());
Assert.Equal(1, ex.RowNumber);
Assert.StartsWith(expected, ex.Message);
}
}

Expand Down
3 changes: 2 additions & 1 deletion source/Sylvan.Data.Csv.Tests/CsvDataWriterTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,8 @@ static void BinaryBig(BinaryEncoding encoding, int bufferSize, int? maxBufferSiz
}
else
{
Assert.Throws<CsvRecordTooLargeException>(writeFunc);
var ex = Assert.Throws<CsvRecordTooLargeException>(writeFunc);
Assert.StartsWith("Row 1 was too large", ex.Message);
return;
}

Expand Down
4 changes: 2 additions & 2 deletions source/Sylvan.Data.Csv/CsvDataReader+Async.cs
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ async Task<bool> NextRecordAsync(CancellationToken cancel = default)
{
if (!GrowBuffer())
{
throw new CsvRecordTooLargeException(this.RowNumber, 0, null, null);
throw new CsvRecordTooLargeException(this.RowNumber, 0);
}
}
await FillBufferAsync(cancel).ConfigureAwait(false);
Expand Down Expand Up @@ -263,7 +263,7 @@ async Task<bool> NextRecordAsync(CancellationToken cancel = default)
{
// if we consumed the entire buffer reading this record, then this is an exceptional situation
// we expect a record to be able to fit entirely within the buffer.
throw new CsvRecordTooLargeException(this.RowNumber, fieldIdx, null, null);
throw new CsvRecordTooLargeException(this.RowNumber, fieldIdx);
}
}
await FillBufferAsync(cancel).ConfigureAwait(false);
Expand Down
4 changes: 2 additions & 2 deletions source/Sylvan.Data.Csv/CsvDataReader+Sync.cs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ bool NextRecord()
{
if (!GrowBuffer())
{
throw new CsvRecordTooLargeException(this.RowNumber, 0, null, null);
throw new CsvRecordTooLargeException(this.RowNumber, 0);
}
}
FillBuffer();
Expand Down Expand Up @@ -131,7 +131,7 @@ bool NextRecord()
{
// if we consumed the entire buffer reading this record, then this is an exceptional situation
// we expect a record to be able to fit entirely within the buffer.
throw new CsvRecordTooLargeException(this.RowNumber, fieldIdx, null, null);
throw new CsvRecordTooLargeException(this.RowNumber, fieldIdx);

}
}
Expand Down
22 changes: 17 additions & 5 deletions source/Sylvan.Data.Csv/CsvDataReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,14 @@ ReadResult ReadField(int fieldIdx)
var idx = this.idx;
var buffer = this.buffer;

void SetUnexpectedCharacterException(string reason)
{
string preview = new string(buffer.Skip(idx - 1).Take(Math.Min(this.bufferEnd - idx + 1, 50)).ToArray());
this.pendingException = new CsvFormatException(rowNumber, fieldIdx,
$"Found unexpected character '{c}' in field {fieldIdx} on row {rowNumber}: {preview}"
+ Environment.NewLine + reason);
}

// this will remain -1 if it is unquoted.
// Otherwise we use it to determine if the quotes were "clean".
var closeQuoteIdx = -1;
Expand Down Expand Up @@ -710,7 +718,7 @@ ReadResult ReadField(int fieldIdx)
if (atEndOfText)
{
// there was nothing to escape
pendingException = new CsvFormatException(rowNumber, fieldIdx);
SetUnexpectedCharacterException($"Escape character {this.escape} was encountered at the end of the text.");
return ReadResult.False;
}
return ReadResult.Incomplete;
Expand Down Expand Up @@ -826,7 +834,7 @@ ReadResult ReadField(int fieldIdx)
}
else
{
this.pendingException = new CsvFormatException(rowNumber, fieldIdx);
SetUnexpectedCharacterException($"An unescaped quote character ({quote}) was found inside a quoted field.");
return ReadResult.False;
}
}
Expand Down Expand Up @@ -864,7 +872,9 @@ ReadResult ReadField(int fieldIdx)
{
// if the field is quoted, we shouldn't be here.
// the only valid characters would be a delimiter, a new line, or EOF.
this.pendingException = new CsvFormatException(rowNumber, fieldIdx);
SetUnexpectedCharacterException($"A delimiter ({this.delimiter}), newline or EOF"
+ $" was expected after a closing quote. If the preceding quote is part of the field it"
+ $" should be escaped with {this.escape}.");
return ReadResult.False;
}
}
Expand Down Expand Up @@ -907,7 +917,8 @@ ReadResult ReadField(int fieldIdx)
if (style != CsvStyle.Lax)
{
var rowNumber = this.rowNumber == 0 && this.state == State.Initialized ? 1 : this.rowNumber;
this.pendingException = new CsvFormatException(rowNumber, fieldIdx);
this.pendingException = new CsvFormatException(rowNumber, fieldIdx,
"A quoted field at the end of the input ends without a closing quote.");
return ReadResult.False;
}
}
Expand Down Expand Up @@ -1716,7 +1727,8 @@ CharSpan PrepareField(int offset, int len, int escapeCount)
{
// we should never get here. Invalid fields should always be
// handled in ReadField and end up in PrepareInvalidField
throw new CsvFormatException(rowNumber, -1);
throw new CsvFormatException(rowNumber, -1,
$"An quote escape character ({escape}) was the last character of a quoted field.");
}
}
else
Expand Down
11 changes: 6 additions & 5 deletions source/Sylvan.Data.Csv/Exceptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ namespace Sylvan.Data.Csv;
/// </summary>
public class CsvFormatException : FormatException
{
internal CsvFormatException(int row, int ordinal, string? msg = null, Exception? inner = null)
: base(msg, inner)
internal CsvFormatException(int row, int ordinal, string message, Exception? inner = null)
: base(message, inner)
{
this.RowNumber = row;
this.FieldOrdinal = ordinal;
Expand Down Expand Up @@ -43,16 +43,17 @@ internal CsvConfigurationException() { }
/// </remarks>
public sealed class CsvRecordTooLargeException : CsvFormatException
{
internal CsvRecordTooLargeException(int row, int ordinal, string? msg = null, Exception? inner = null)
: base(row, ordinal, msg, inner) { }
internal CsvRecordTooLargeException(int row, int ordinal)
: base(row, ordinal, $"Row {row} was too large. Try increasing the MaxBufferSize setting.") { }
}

/// <summary>
/// The exception that is thrown when reading an empty CSV when headers are expected.
/// </summary>
public sealed class CsvMissingHeadersException : CsvFormatException
{
internal CsvMissingHeadersException() : base(0, 0, null, null) { }
internal CsvMissingHeadersException()
: base(0, 0, "The CSV file does not have headers, but the HasHeaders option was set to true.") { }
}

/// <summary>
Expand Down

0 comments on commit 1864087

Please sign in to comment.