Skip to content

Commit

Permalink
Fix issue when final character in field is escaped. (#221)
Browse files Browse the repository at this point in the history
  • Loading branch information
MarkPflug authored Nov 3, 2023
1 parent bd4acdd commit 382df36
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 14 deletions.
3 changes: 3 additions & 0 deletions docs/Csv/Sylvan.Data.Csv.Releases.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Sylvan.Data.Csv Release Notes

_1.3.5_
- Fixes a bug where fields could be incorrectly read when the final character was escaped when reading with `CsvStyle.Escaped`.

_1.3.4_
- Adds `CsvSchema.Dynamic` which treats CSV data as having "variant" type. This can be useful when processing
CSV data where the field type might change from row to row.
Expand Down
51 changes: 51 additions & 0 deletions source/Sylvan.Data.Csv.Tests/CsvDataReaderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1856,6 +1856,57 @@ public void QuotedHeader()
Assert.Equal("3", csv.GetString(2));
}

[Fact]
public void FinalCharInCellIsEscaped()
{
using var reader = new StringReader(@"a\, \,,b,\,");
using var csvReader = CsvDataReader.Create(reader, new CsvDataReaderOptions
{
CsvStyle = CsvStyle.Escaped,
HasHeaders = false,
Escape = '\\',
});

csvReader.Read();
var value0 = csvReader.GetString(0);
var value1 = csvReader.GetString(1);
var value2 = csvReader.GetString(2);
Assert.Equal("a, ,", value0); // This will fail; will be "a, \" instead of "a, ,"
Assert.Equal("b", value1);
Assert.Equal(",", value2);
}

[Fact]
public void EscapeEOF()
{
using var reader = new StringReader("\\");
Assert.Throws<InvalidDataException>(() =>
{
using var csvReader = CsvDataReader.Create(reader, new CsvDataReaderOptions
{
CsvStyle = CsvStyle.Escaped,
HasHeaders = false,
Escape = '\\',
});
});
}

[Fact]
public void FinalCharInCellIsEscapeError()
{
using var reader = new StringReader("\\\\\\\n");
using var csvReader = CsvDataReader.Create(reader, new CsvDataReaderOptions
{
CsvStyle = CsvStyle.Escaped,
HasHeaders = false,
Escape = '\\',
});

csvReader.Read();
var value0 = csvReader.GetString(0);
Assert.Equal("\\\n", value0);
}


#if NET6_0_OR_GREATER

Expand Down
32 changes: 19 additions & 13 deletions source/Sylvan.Data.Csv/CsvDataReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,7 @@ ReadResult ReadField(int fieldIdx)
c = buffer[idx++]; // the escaped char
if (IsEndOfLine(c))
{
idx--;// "unconsume" the newline character, so that ConsumeLineEnd can process it.
// if the escape precede an EOL, we might have to consume 2 chars
var r = ConsumeLineEnd(buffer, ref idx);
if (r == ReadResult.Incomplete)
Expand All @@ -693,9 +694,8 @@ ReadResult ReadField(int fieldIdx)
{
if (atEndOfText)
{
// TODO: not sure what to do here.
escapeCount++;
break;
// there was nothing to escape
throw new InvalidDataException();
}
return ReadResult.Incomplete;
}
Expand Down Expand Up @@ -1591,7 +1591,6 @@ CharSpan GetFieldUnsafe(int ordinal)

CharSpan PrepareField(int offset, int len, int escapeCount)
{

bool inQuote = true; // we start inside the quotes

var eLen = len - escapeCount;
Expand All @@ -1606,29 +1605,36 @@ CharSpan PrepareField(int offset, int len, int escapeCount)

int i = 0;
int d = 0;
while (d < eLen)
while (i < len)
{
var c = buffer[offset + i++];
if (inQuote)
{
if (c == escape && i + 1 < len)
if (c == escape)
{
c = buffer[offset + i++];
if (c != quote && c != escape)
if (i < len)
{
if (quote == escape)
c = buffer[offset + i++];
if (c != quote && c != escape)
{
// the escape we just saw was actually the closing quote
// the remainder of the field will be added verbatim
inQuote = false;
if (quote == escape)
{
// the escape we just saw was actually the closing quote
// the remainder of the field will be added verbatim
inQuote = false;
}
}
}
else
{
throw new InvalidDataException();
}
}
else
if (c == quote)
{
// we've found the broken closing quote
// skip it.
// skip it.
inQuote = false;
continue;
}
Expand Down
2 changes: 1 addition & 1 deletion source/Sylvan.Data.Csv/Sylvan.Data.Csv.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<PropertyGroup>
<TargetFrameworks>net6.0;netstandard2.1;netstandard2.0</TargetFrameworks>
<VersionPrefix>1.3.4</VersionPrefix>
<VersionPrefix>1.3.5</VersionPrefix>
<Description>A .NET library for reading and writing delimited CSV data.</Description>
<PackageTags>csv;delimited;data;datareader;datawriter;simd</PackageTags>
<Nullable>enable</Nullable>
Expand Down

0 comments on commit 382df36

Please sign in to comment.