diff --git a/Directory.Build.props b/Directory.Build.props
index 938f2f3..2745d77 100644
--- a/Directory.Build.props
+++ b/Directory.Build.props
@@ -6,9 +6,9 @@
$(Owner)
https://markpflug.github.io/Sylvan.png
Sylvan.png
- © 2023 $(Owner)
+ © 2024 $(Owner)
license.txt
- 11.0
+ 12.0
true
CS1030;CA1835;$(WarningsNotAsErrors)
diff --git a/docs/Csv/Options.md b/docs/Csv/Options.md
index 7bc8e42..d08baf1 100644
--- a/docs/Csv/Options.md
+++ b/docs/Csv/Options.md
@@ -26,12 +26,34 @@ These defaults to `null`, which attempt to parse the values as the default "true
If either `TrueString` or `FalseString` are non-null, then that value is the singular, case-insensitive string that will be interpreted as the associated boolean value. If only one of the two is assigned it causes all other values to be interpreted as the negation. If both are assigned any value that is not one or the other will result in a `FormatException` being thrown.
-__DateFormat__
-
+__DateTimeFormat__
The format string used to parse `DateTime` values. This defaults to null, which will result in values being parsed using the provide `CultureInfo`.
Some CSV data sources use a compact date format like `"yyyyMMdd"` which cannot be parsed by default date parsing behavior, in which case this option allows parsing such values.
+__DateTimeOffsetFormat__
+The format string used when writing DateTimeOffset values
+This defaults to null, which will result in values being parsed using the provided `CultureInfo`.
+
+__TimeSpanFormat__
+The format string used when writing TimeSpan values that have to time component. This defaults to null, which will result in values being parsed using the provided `CultureInfo`.
+
+__TimeOnlyFormat__
+The format string used when writing TimeOnly values. This option is only available when using .NET 6 or greater.
+This defaults to null, which will result in values being parsed using the provided `CultureInfo`.
+
+__DateOnlyFormat__
+The format string used when writing DateOnly values. This option is only available when using .NET 6 or greater.
+This defaults to null, which will result in values being parsed using the provided `CultureInfo`.
+
+__DateFormat__
+
+**Obsolete**, Use DateTimeFormat instead.
+
+__TimeFormat__
+
+**Obsolete**, Use TimeOnlyFormat instead.
+
__BinaryEncoding__
The encoding format used to interpret binary data, either Base64 or Hexadecimal. Hexadecimal values can optionally be prefixed with "0x".
@@ -61,6 +83,20 @@ __Culture__
The `CultureInfo` used when parsing primitive values. Defaults to
`InvariantCulture`.
+__Style__
+
+Specifies the parsing mode to be used when reading a CSV file.
+
+*Standard*: This mode uses slightly modified RFC4180 parsing, that allows non-comma delimiters to be used.
+Valid RFC 4180 files should parse as expected in this mode.
+
+*Escaped*: This mode uses escaping instead of quoting fields.
+Any field delimiter, record delimiter (newline) or escape character in a field value will be escaped by a preceeding escape character.
+
+*Lax*: This mode uses a more lenient parsing mode that will parse malformed fields and avoid throwing an exception.
+This mode starts by parsing using the `Standard` style, and upon finding a closing quote will parse the remainder of the field
+as if it were unquoted.
+
__OwnsReader__
Indicates if the `CsvDataReader` owns the TextReader and should dispose it when complete. Defaults to true.
@@ -122,4 +158,4 @@ static string Pool(char[] buf, int offset, int length)
// anything else just construct normally (or call a nested factory)
return new string(buf, offset, length);
}
-```
\ No newline at end of file
+```
diff --git a/docs/Csv/Sylvan.Data.Csv.Releases.md b/docs/Csv/Sylvan.Data.Csv.Releases.md
index 4523d5b..3af43c0 100644
--- a/docs/Csv/Sylvan.Data.Csv.Releases.md
+++ b/docs/Csv/Sylvan.Data.Csv.Releases.md
@@ -1,5 +1,9 @@
# Sylvan.Data.Csv Release Notes
+_1.3.6_
+- Adds `CsvStyle.Lax` which allows parsing CVS files with invalid fields. In this mode, the parser will not produce exceptions, but will
+ do a "best effort" to parse invalid fields.
+
_1.3.5_
- Fixes a bug where fields could be incorrectly read when the final character was escaped when reading with `CsvStyle.Escaped`.
diff --git a/license.txt b/license.txt
index 55fe1f3..2c936d1 100644
--- a/license.txt
+++ b/license.txt
@@ -1,6 +1,6 @@
MIT License
-Copyright (c) 2022 Mark Pflug
+Copyright (c) 2024 Mark Pflug
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/source/Sylvan.Data.Csv.Tests/CsvDataReaderTests.cs b/source/Sylvan.Data.Csv.Tests/CsvDataReaderTests.cs
index c0be13e..2869c16 100644
--- a/source/Sylvan.Data.Csv.Tests/CsvDataReaderTests.cs
+++ b/source/Sylvan.Data.Csv.Tests/CsvDataReaderTests.cs
@@ -5,6 +5,7 @@
using System.Globalization;
using System.IO;
using System.Linq;
+using System.Numerics;
using System.Text;
using System.Threading.Tasks;
using Xunit;
@@ -949,7 +950,7 @@ public void Binary2()
[InlineData("N,V\na\\\nb,c\n", "a\nb", "c")]
[InlineData("N,V\na\\\r\nb\n", "a\r\nb", "")]
[InlineData("N,V\na\\\r\nb", "a\r\nb", "")]
- public void ImpliedQuote(string input, string a, string b)
+ public void EscapedStyle(string input, string a, string b)
{
using var reader = new StringReader(input);
var options =
@@ -1884,14 +1885,14 @@ public void FinalCharInCellIsEscaped()
public void EscapeEOF()
{
using var reader = new StringReader("\\");
-
+
using var csv = CsvDataReader.Create(reader, new CsvDataReaderOptions
{
CsvStyle = CsvStyle.Escaped,
HasHeaders = false,
Escape = '\\',
});
- Assert.Throws(() => csv.Read());
+ Assert.Throws(() => csv.Read());
}
[Fact]
@@ -1921,7 +1922,7 @@ public void FinalCharInCellIsEscapeError()
[InlineData("\"a\"\"a\"\"a\"", true, "a\"a\"a")]
[InlineData("a\"a\"a", true, "a\"a\"a")]
[InlineData("a\"\"\"a", true, "a\"\"\"a")]
-
+
[InlineData("\"a\"\"\"a\"", false, null)]
[InlineData("\"a\"a", false, null)]
[InlineData("\"a\"a\"a\"", false, null)]
@@ -1935,11 +1936,12 @@ public void Quotes(string data, bool valid, string expected)
var r = new StringReader("a,b,c\n" + data);
var csv = CsvDataReader.Create(r);
- if (valid) {
+ if (valid)
+ {
csv.Read();
var value = csv.GetString(0);
Assert.Equal(expected, value);
- }
+ }
else
{
var ex = Assert.Throws(() => csv.Read());
@@ -1947,6 +1949,67 @@ public void Quotes(string data, bool valid, string expected)
}
}
+ [Theory]
+ // these are valid, and parse the same as the non-lax test
+ [InlineData("a", "a")]
+ [InlineData("\"\"", "")]
+ [InlineData("\"\"\"\"", "\"")]
+ [InlineData("\"\"\"\"\"\"", "\"\"")]
+ [InlineData("\"a\"", "a")]
+ [InlineData("\"a\"\"a\"", "a\"a")]
+ [InlineData("\"a\"\"a\"\"a\"", "a\"a\"a")]
+ [InlineData("a\"a\"a", "a\"a\"a")]
+ [InlineData("a\"\"\"a", "a\"\"\"a")]
+ // these are invalid, but will still produce a string in lax mode.
+ [InlineData("\"a\"\"\"a\"", "a\"a\"")]
+ [InlineData("\"a\"a", "aa")]
+ [InlineData("\"a\"a\"a\"", "aa\"a\"")]
+ [InlineData("\"\"a", "a")]
+ [InlineData("\"\"a\"", "a\"")]
+ //[InlineData("\"\"\"", "\"")]
+ [InlineData("\"\"\"\"\"", "\"\"")]
+ // test when invalid fields exist at the end of a file.
+ public void LaxQuotesEnd(string data, string expected)
+ {
+ var r = new StringReader("a,b,c\n1,2,3\n" + data);
+ var opts = new CsvDataReaderOptions { CsvStyle = CsvStyle.Lax };
+ var csv = CsvDataReader.Create(r, opts);
+ csv.Read(); // skip the 1,2,3
+ csv.Read();
+ var value = csv.GetString(0);
+ Assert.Equal(expected, value);
+ }
+
+ [Theory]
+ // these are valid, and parse the same as the non-lax test
+ [InlineData("a", "a")]
+ [InlineData("\"\"", "")]
+ [InlineData("\"\"\"\"", "\"")]
+ [InlineData("\"\"\"\"\"\"", "\"\"")]
+ [InlineData("\"a\"", "a")]
+ [InlineData("\"a\"\"a\"", "a\"a")]
+ [InlineData("\"a\"\"a\"\"a\"", "a\"a\"a")]
+ [InlineData("a\"a\"a", "a\"a\"a")]
+ [InlineData("a\"\"\"a", "a\"\"\"a")]
+ // these are invalid, but will still produce a string in lax mode.
+ [InlineData("\"a\"\"\"a\"", "a\"a\"")]
+ [InlineData("\"a\"a", "aa")]
+ [InlineData("\"a\"a\"a\"", "aa\"a\"")]
+ [InlineData("\"\"a", "a")]
+ [InlineData("\"\"a\"", "a\"")]
+ [InlineData("\"\"\"", "\"\n4,5,6\n")]
+ [InlineData("\"\"\"\"\"", "\"\"\n4,5,6\n")]
+ // test when invalid fields exist in the middle of a file.
+ public void LaxQuotesMid(string data, string expected)
+ {
+ var r = new StringReader("a,b,c\n1,2,3\n" + data + "\n4,5,6\n");
+ var opts = new CsvDataReaderOptions { CsvStyle = CsvStyle.Lax };
+ var csv = CsvDataReader.Create(r, opts);
+ csv.Read(); // skip the 1,2,3
+ csv.Read();
+ var value = csv.GetString(0);
+ Assert.Equal(expected, value);
+ }
#if NET6_0_OR_GREATER
diff --git a/source/Sylvan.Data.Csv.Tests/Sylvan.Data.Csv.Tests.csproj b/source/Sylvan.Data.Csv.Tests/Sylvan.Data.Csv.Tests.csproj
index 4eb084a..4ab443c 100644
--- a/source/Sylvan.Data.Csv.Tests/Sylvan.Data.Csv.Tests.csproj
+++ b/source/Sylvan.Data.Csv.Tests/Sylvan.Data.Csv.Tests.csproj
@@ -13,7 +13,7 @@
-
+
diff --git a/source/Sylvan.Data.Csv/CsvDataReader.cs b/source/Sylvan.Data.Csv/CsvDataReader.cs
index 0eb8886..e1b327f 100644
--- a/source/Sylvan.Data.Csv/CsvDataReader.cs
+++ b/source/Sylvan.Data.Csv/CsvDataReader.cs
@@ -54,6 +54,7 @@ enum QuoteState
Unquoted = 0,
Quoted = 1,
ImplicitQuotes = 3,
+ InvalidQuotes = 4,
}
struct FieldInfo
@@ -669,6 +670,14 @@ ReadResult ReadField(int fieldIdx)
int fieldEnd = 0;
bool last = false;
bool complete = false;
+
+ if (fieldIdx >= fieldInfos.Length)
+ {
+ // this resize is constrained by the fact that the record has to fit in one row
+ Array.Resize(ref fieldInfos, fieldInfos.Length * 2);
+ }
+ ref var fi = ref fieldInfos[fieldIdx];
+
if (style == CsvStyle.Escaped)
{
// consume quoted field.
@@ -716,85 +725,78 @@ ReadResult ReadField(int fieldIdx)
{
if (idx < bufferEnd)
{
- c = buffer[idx++];
+ c = buffer[idx];
- if (c <= minSafe)
+ if (c == quote)
{
- if (c == quote)
- {
- closeQuoteIdx = idx;
+ idx++; // consume the quote we just read
+ closeQuoteIdx = idx;
- // consume quoted field.
- while (idx < bufferEnd)
+ // consume quoted field.
+ while (idx < bufferEnd)
+ {
+ c = buffer[idx++];
+ if (c == escape)
{
- c = buffer[idx++];
- if (c == escape)
+ if (idx < bufferEnd)
{
- if (idx < bufferEnd)
+ c = buffer[idx++]; // the escaped char
+ if (c == escape || c == quote)
+ {
+ escapeCount++;
+ continue;
+ }
+ else
+ if (escape == quote)
+ {
+ idx--;
+ closeQuoteIdx = idx;
+ fieldEnd = closeQuoteIdx;
+ // the quote (escape) we just saw was a the closing quote
+ break;
+ }
+ }
+ else
+ {
+ if (atEndOfText)
{
- c = buffer[idx++]; // the escaped char
- if (c == escape || c == quote)
- {
- escapeCount++;
- continue;
- }
- else
if (escape == quote)
{
- idx--;
+ complete = true;
+ last = true;
closeQuoteIdx = idx;
fieldEnd = closeQuoteIdx;
// the quote (escape) we just saw was a the closing quote
- break;
- }
- }
- else
- {
- if (atEndOfText)
- {
- if (escape == quote)
- {
- complete = true;
- last = true;
- closeQuoteIdx = idx;
- fieldEnd = closeQuoteIdx;
- // the quote (escape) we just saw was a the closing quote
- }
- break;
}
- return ReadResult.Incomplete;
+ break;
}
+ return ReadResult.Incomplete;
}
+ }
- if (c == quote)
+ if (c == quote)
+ {
+ // immediately after the quote should be a delimiter, eol, or eof, but...
+ // we can simply treat the remainder of the record like a normal unquoted field
+ // we are currently positioned on the quote, the next while loop will consume it
+ closeQuoteIdx = idx;
+ fieldEnd = closeQuoteIdx;
+ break;
+ }
+ if (IsEndOfLine(c))
+ {
+ idx--;
+ var r = ConsumeLineEnd(buffer, ref idx);
+ if (r == ReadResult.Incomplete)
{
- // immediately after the quote should be a delimiter, eol, or eof, but...
- // we can simply treat the remainder of the record like a normal unquoted field
- // we are currently positioned on the quote, the next while loop will consume it
- closeQuoteIdx = idx;
- fieldEnd = closeQuoteIdx;
- break;
+ return ReadResult.Incomplete;
}
- if (IsEndOfLine(c))
+ else
{
- idx--;
- var r = ConsumeLineEnd(buffer, ref idx);
- if (r == ReadResult.Incomplete)
- {
- return ReadResult.Incomplete;
- }
- else
- {
- // continue on. We are inside a quoted string, so the newline is part of the value.
- }
+ // continue on. We are inside a quoted string, so the newline is part of the value.
}
- } // we exit this loop when we reach the closing quote.
- }
- else
- {
- // "unread" the last character and let the next loop handle it.
- idx--;
- }
+ }
+ } // we exit this loop when we reach the closing quote.
}
}
}
@@ -815,8 +817,15 @@ ReadResult ReadField(int fieldIdx)
// this handles the case where we had a quoted field
if (c == quote && closeQuoteIdx >= 0)
{
- this.pendingException = new CsvFormatException(rowNumber, fieldIdx);
- return ReadResult.False;
+ if (style == CsvStyle.Lax)
+ {
+ fi.quoteState = QuoteState.InvalidQuotes;
+ }
+ else
+ {
+ this.pendingException = new CsvFormatException(rowNumber, fieldIdx);
+ return ReadResult.False;
+ }
}
else
if (IsEndOfLine(c))
@@ -837,30 +846,38 @@ ReadResult ReadField(int fieldIdx)
last = true;
break;
}
- }
+ }
else
{
if (closeQuoteIdx >= 0)
{
- // if the field is quoted, we shouldn't be here.
- // the only valid characters would be a delimiter, a new line, or EOF.
- this.pendingException = new CsvFormatException(rowNumber, fieldIdx);
- return ReadResult.False;
+ if (style == CsvStyle.Lax)
+ {
+ // in lax mode, we'll continue reading the remainder of the field
+ // after the closig quote
+ fi.quoteState = QuoteState.InvalidQuotes;
+ }
+ else
+ {
+ // if the field is quoted, we shouldn't be here.
+ // the only valid characters would be a delimiter, a new line, or EOF.
+ this.pendingException = new CsvFormatException(rowNumber, fieldIdx);
+ return ReadResult.False;
+ }
}
}
}
if (complete || atEndOfText)
{
- if (fieldIdx >= fieldInfos.Length)
+
+ if (atEndOfText && !complete)
{
- // this resize is constrained by the fact that the record has to fit in one row
- Array.Resize(ref fieldInfos, fieldInfos.Length * 2);
+ fieldEnd = idx;
}
curFieldCount++;
- ref var fi = ref fieldInfos[fieldIdx];
if (style == CsvStyle.Escaped)
{
@@ -883,9 +900,13 @@ ReadResult ReadField(int fieldIdx)
}
else
{
- var rowNumber = this.rowNumber == 0 && this.state == State.Initialized ? 1 : this.rowNumber;
- this.pendingException = new CsvFormatException(rowNumber, fieldIdx);
- return ReadResult.False;
+ fi.quoteState = QuoteState.InvalidQuotes;
+ if (style != CsvStyle.Lax)
+ {
+ var rowNumber = this.rowNumber == 0 && this.state == State.Initialized ? 1 : this.rowNumber;
+ this.pendingException = new CsvFormatException(rowNumber, fieldIdx);
+ return ReadResult.False;
+ }
}
}
}
@@ -1521,6 +1542,12 @@ internal readonly struct CharSpan
public CharSpan(char[] buffer, int offset, int length)
{
+#if DEBUG
+ if (offset < 0 || length < 0)
+ {
+ throw new Exception();
+ }
+#endif
Debug.Assert(offset >= 0);
Debug.Assert(length >= 0);
this.buffer = buffer;
@@ -1585,6 +1612,9 @@ internal CharSpan GetField(int ordinal)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
CharSpan GetFieldUnsafe(int ordinal)
{
+ // "Unsafe" meaning this should only be called
+ // in contexts where ordinal is already validated to be in-range
+
ref var fi = ref this.fieldInfos[ordinal];
var startIdx = recordStart + (ordinal == 0 ? 0 : this.fieldInfos[ordinal - 1].endIdx + 1);
var endIdx = recordStart + fi.endIdx;
@@ -1593,20 +1623,21 @@ CharSpan GetFieldUnsafe(int ordinal)
var buffer = this.buffer;
if (fi.quoteState != QuoteState.Unquoted)
{
- // if there are no escapes, we can just "trim" the quotes off
- if (fi.quoteState != QuoteState.ImplicitQuotes)
+ switch (fi.quoteState)
{
- offset += 1;
- len -= 2;
- }
-
- if (fi.quoteState == QuoteState.Quoted && fi.escapeCount == 0)
- {
- // happy path, nothing else to do
- }
- else
- {
- return PrepareField(offset, len, fi.escapeCount);
+ case QuoteState.InvalidQuotes:
+ return PrepareInvalidField(offset, len);
+ case QuoteState.Quoted:
+ // trim the quotes
+ offset += 1;
+ len -= 2;
+ if (fi.escapeCount > 0)
+ {
+ goto case QuoteState.ImplicitQuotes;
+ }
+ break;
+ case QuoteState.ImplicitQuotes: // escaped
+ return PrepareField(offset, len, fi.escapeCount);
}
}
return new CharSpan(buffer, offset, len);
@@ -1619,11 +1650,10 @@ CharSpan PrepareField(int offset, int len, int escapeCount)
var eLen = len - escapeCount;
// if there is room in the buffer before the current record
// we'll use that as scratch space to unescape the value
- var temp = buffer;
- if (recordStart < eLen)
+ if (scratchStr.Length < len)
{
// otherwise we'll allocate a buffer
- temp = new char[eLen];
+ scratchStr = new char[len];
}
int i = 0;
@@ -1650,8 +1680,8 @@ CharSpan PrepareField(int offset, int len, int escapeCount)
}
else
{
- // we should never get here. Bad fields should always be
- // handled in "read"
+ // we should never get here. Invalid fields should always be
+ // handled in ReadField and end up in PrepareInvalidField
throw new CsvFormatException(rowNumber, -1);
}
}
@@ -1664,9 +1694,66 @@ CharSpan PrepareField(int offset, int len, int escapeCount)
continue;
}
}
- temp[d++] = c;
+ scratchStr[d++] = c;
+ }
+ return new CharSpan(scratchStr, 0, eLen);
+ }
+
+ char[] scratchStr = Array.Empty();
+
+ // this should only be called in Lax mode, otherwise an exception
+ // would have been thrown in ReadField.
+ CharSpan PrepareInvalidField(int offset, int len)
+ {
+ bool inQuote = false;
+
+ // increase the scratch space if needed.
+ if (scratchStr.Length < len)
+ {
+ scratchStr = new char[len];
+ }
+
+ int i = 0;
+ if (buffer[offset + i] == quote)
+ {
+ i++;
+ inQuote = true;
+ }
+
+ int d = 0;
+ while (i < len)
+ {
+ var c = buffer[offset + i++];
+ if (inQuote)
+ {
+ if (c == escape)
+ {
+ if (i < len)
+ {
+ c = buffer[offset + i++];
+ if (c != quote && c != escape)
+ {
+ if (quote == escape)
+ {
+ // the escape we just saw was actually the closing quote
+ // the remainder of the field will be added verbatim
+ inQuote = false;
+ }
+ }
+ }
+ }
+ else
+ if (c == quote)
+ {
+ // we've found the broken closing quote
+ // skip it.
+ inQuote = false;
+ continue;
+ }
+ }
+ scratchStr[d++] = c;
}
- return new CharSpan(temp, 0, eLen);
+ return new CharSpan(scratchStr, 0, d);
}
///
diff --git a/source/Sylvan.Data.Csv/CsvDataWriter+FieldWriter.cs b/source/Sylvan.Data.Csv/CsvDataWriter+FieldWriter.cs
index e7cf626..697dc4b 100644
--- a/source/Sylvan.Data.Csv/CsvDataWriter+FieldWriter.cs
+++ b/source/Sylvan.Data.Csv/CsvDataWriter+FieldWriter.cs
@@ -193,12 +193,12 @@ public override int Write(WriterContext context, int ordinal, char[] buffer, int
public override byte[] GetValue(DbDataReader reader, int ordinal)
{
- throw new InvalidOperationException();
+ throw new NotSupportedException();
}
public override int WriteValue(WriterContext context, byte[] value, char[] buffer, int offset)
{
- throw new NotImplementedException();
+ throw new NotSupportedException();
}
}
@@ -260,12 +260,12 @@ static int ToHexCharArray(byte[] dataBuffer, int offset, int length, char[] outp
public override byte[] GetValue(DbDataReader reader, int ordinal)
{
- throw new InvalidOperationException();
+ throw new NotSupportedException();
}
public override int WriteValue(WriterContext context, byte[] value, char[] buffer, int offset)
{
- throw new NotImplementedException();
+ throw new NotSupportedException();
}
}
diff --git a/source/Sylvan.Data.Csv/CsvDataWriterOptions.cs b/source/Sylvan.Data.Csv/CsvDataWriterOptions.cs
index 4603a8d..459b3db 100644
--- a/source/Sylvan.Data.Csv/CsvDataWriterOptions.cs
+++ b/source/Sylvan.Data.Csv/CsvDataWriterOptions.cs
@@ -142,7 +142,7 @@ public string? TimeFormat {
public char Comment { get; set; }
///
- /// The string to use for line breaks separating records. The default is Environment.NewLine.
+ /// The string to use for line breaks separating records. The default is "\n".
/// Must be one of "\r", "\n", or "\r\n".
///
public string NewLine { get; set; }
diff --git a/source/Sylvan.Data.Csv/CsvStyle.cs b/source/Sylvan.Data.Csv/CsvStyle.cs
index 55ba3bb..9f17e9d 100644
--- a/source/Sylvan.Data.Csv/CsvStyle.cs
+++ b/source/Sylvan.Data.Csv/CsvStyle.cs
@@ -9,6 +9,7 @@ public enum CsvStyle
{
///
/// Parses using the standard RFC4180 mode.
+ /// Malformed fields will produce a during calls to .
///
Standard = 1,
@@ -22,4 +23,11 @@ public enum CsvStyle
/// Interprets fields as if they are implicitly quoted. Delimiters and new lines within fields are preceded by an escape character.
///
Escaped = 2,
+
+ ///
+ /// Parses CSV using lax quote handling where incorrectly quoted fields don't produce an error.
+ /// In this mode a field will be parsed using the mode, and when a (unescaped) closing quote is found, the remainder
+ /// of the field will be parsed as if it were unquoted.
+ ///
+ Lax = 3,
}
diff --git a/source/Sylvan.Data.Csv/Sylvan.Data.Csv.csproj b/source/Sylvan.Data.Csv/Sylvan.Data.Csv.csproj
index f13f8cb..45ad86f 100644
--- a/source/Sylvan.Data.Csv/Sylvan.Data.Csv.csproj
+++ b/source/Sylvan.Data.Csv/Sylvan.Data.Csv.csproj
@@ -2,7 +2,8 @@
net6.0;netstandard2.1;netstandard2.0
- 1.3.5
+ 1.3.6
+ b0001
A .NET library for reading and writing delimited CSV data.
csv;delimited;data;datareader;datawriter;simd
enable
diff --git a/source/Sylvan.Data.Tests/ExtendedDataReaderTests.cs b/source/Sylvan.Data.Tests/ExtendedDataReaderTests.cs
index 449a04f..def6928 100644
--- a/source/Sylvan.Data.Tests/ExtendedDataReaderTests.cs
+++ b/source/Sylvan.Data.Tests/ExtendedDataReaderTests.cs
@@ -42,6 +42,23 @@ public void Test2()
}
+ [Fact]
+ public void TestGetData()
+ {
+ var data = "a\n1";
+ var csv = CsvDataReader.Create(new StringReader(data));
+ var r = csv.WithColumns(new CustomDataColumn("b", r => new byte[] { 1, 2, 3 }));
+
+ Assert.True(r.Read());
+ Assert.Equal(1, r.GetInt32(0));
+
+ Assert.Equal(3, r.GetBytes(1, 0, null, 0, 0));
+ var buf = new byte[3];
+ Assert.Equal(3, r.GetBytes(1, 0, buf, 0, buf.Length));
+ Assert.Equal(new byte[] { 1, 2, 3 }, buf);
+ Assert.False(r.Read());
+ }
+
static bool IsNullString(DbDataReader r, int idx)
{
var s = r.GetString(idx);
diff --git a/source/Sylvan.Data/DataExtensions.cs b/source/Sylvan.Data/DataExtensions.cs
index d08f974..9d74aa3 100644
--- a/source/Sylvan.Data/DataExtensions.cs
+++ b/source/Sylvan.Data/DataExtensions.cs
@@ -36,7 +36,7 @@ public static DataTable ToSchemaTable(this System.Collections.ObjectModel.ReadOn
///
/// The base data reader.
/// The extra columns to attach.
- /// A Db
+ /// A DbDataReader.
public static DbDataReader WithColumns(this DbDataReader reader, params IDataColumn[] columns)
{
return new ExtendedDataReader(reader, columns);
@@ -127,29 +127,26 @@ public static async IAsyncEnumerable GetRecordsAsync(this DbDataReader rea
}
}
-#endif
-
///
/// var reader = seq.AsDataReader()
///
- public static DbDataReader AsDataReader(this IEnumerable seq)
+ public static DbDataReader AsDataReader(this IAsyncEnumerable seq, CancellationToken cancel = default)
where T : class
{
- return new SyncObjectDataReader(seq);
+ return new AsyncObjectDataReader(seq, cancel);
}
-#if IAsyncEnumerable
+#endif
///
/// var reader = seq.AsDataReader()
///
- public static DbDataReader AsDataReader(this IAsyncEnumerable seq, CancellationToken cancel = default)
+ public static DbDataReader AsDataReader(this IEnumerable seq)
where T : class
{
- return new AsyncObjectDataReader(seq, cancel);
+ return new SyncObjectDataReader(seq);
}
-#endif
///
/// Selects a subset of columns for a DbDataReader.
///
diff --git a/source/Sylvan.Data/DataReaderAdapter.cs b/source/Sylvan.Data/DataReaderAdapter.cs
index 9add23b..7dd0405 100644
--- a/source/Sylvan.Data/DataReaderAdapter.cs
+++ b/source/Sylvan.Data/DataReaderAdapter.cs
@@ -326,7 +326,7 @@ public virtual ReadOnlyCollection GetColumnSchema()
return new ReadOnlyCollection(cols);
}
- class Col : DbColumn
+ sealed class Col : DbColumn
{
public Col(int ordinal, string name, Type type)
{
diff --git a/source/Sylvan.Data/ExtendedDataReader.cs b/source/Sylvan.Data/ExtendedDataReader.cs
index 1ee96c9..4b8a49e 100644
--- a/source/Sylvan.Data/ExtendedDataReader.cs
+++ b/source/Sylvan.Data/ExtendedDataReader.cs
@@ -47,14 +47,14 @@ public interface IDataColumn
///
/// Gets a range of data from the column.
///
- int GetData(DbDataReader reader, T[] buffer, long dataOffset, int bufferOffset, int length);
+ int GetData(DbDataReader reader, T[]? buffer, long dataOffset, int bufferOffset, int length);
}
///
/// Defines a custom data column.
///
-///
-public class CustomDataColumn : IDataColumn
+/// The data type of the column.
+public sealed class CustomDataColumn : IDataColumn
{
///
public string Name { get; }
@@ -101,14 +101,28 @@ public object GetValue(DbDataReader reader)
}
///
- public int GetData(DbDataReader reader, TData[] buffer, long dataOffset, int bufferOffset, int length)
+ public int GetData(DbDataReader reader, TData[]? buffer, long dataOffset, int bufferOffset, int length)
{
var t = valueSource(reader);
if (t is TData[] data)
{
- Array.Copy(data, dataOffset, buffer, bufferOffset, length);
+ var len = 0;
+ if (buffer == null)
+ {
+ // passing a null buffer allows querying the length.
+ len = data.Length;
+ }
+ else
+ {
+ len = Math.Min(data.Length - (int)dataOffset, length);
+ Array.Copy(data, dataOffset, buffer, bufferOffset, len);
+ }
+ return len;
+ }
+ else
+ {
+ throw new InvalidCastException();
}
- throw new InvalidCastException();
}
readonly Func valueSource;
@@ -146,9 +160,9 @@ public DataReaderColumn(DbDataReader reader, int ordinal, bool allowNull)
public bool AllowNull => allowNull;
- public int GetData(DbDataReader reader, TData[] buffer, long dataOffset, int bufferOffset, int length)
+ public int GetData(DbDataReader reader, TData[]? buffer, long dataOffset, int bufferOffset, int length)
{
- throw new NotImplementedException();
+ throw new NotSupportedException();
}
public object GetValue(DbDataReader reader)
@@ -234,7 +248,7 @@ public override byte GetByte(int ordinal)
public override long GetBytes(int ordinal, long dataOffset, byte[]? buffer, int bufferOffset, int length)
{
- if (buffer == null) throw new ArgumentNullException(nameof(buffer));
+ //if (buffer == null) throw new ArgumentNullException(nameof(buffer));
return GetColumn(ordinal).GetData(this, buffer, dataOffset, bufferOffset, length);
}
@@ -421,4 +435,24 @@ public ReadOnlyCollection GetColumnSchema()
{
return schema;
}
+
+ public override void Close()
+ {
+ this.dr.Close();
+ }
+
+#if ASYNC
+ public override Task CloseAsync()
+ {
+ return dr.CloseAsync();
+ }
+#endif
+
+ protected override void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ dr.Dispose();
+ }
+ }
}
diff --git a/source/Sylvan.Data/ReflectionDataBinder.cs b/source/Sylvan.Data/ReflectionDataBinder.cs
deleted file mode 100644
index 866b6d9..0000000
--- a/source/Sylvan.Data/ReflectionDataBinder.cs
+++ /dev/null
@@ -1,156 +0,0 @@
-
-//#if DEBUG
-
-//using System;
-//using System.Collections.Generic;
-//using System.Collections.ObjectModel;
-//using System.Data;
-//using System.Data.Common;
-//using System.Linq;
-//using System.Reflection;
-//using System.Runtime.Serialization;
-
-//namespace Sylvan.Data
-//{
-// // A DataBinder implementation that uses reflection.
-// // This was created merely to compare performance with CompiledDataBinder.
-// sealed class ReflectionDataBinder : IDataBinder
-// {
-// readonly ReadOnlyCollection schema;
-// readonly DbColumn[] columns;
-
-// readonly object?[] args = new object?[1];
-// readonly Type type;
-
-// readonly Action[] propBinders;
-
-// public ReflectionDataBinder(ReadOnlyCollection schema)
-// {
-// this.type = typeof(T);
-// this.schema = schema;
-
-// this.columns = schema.ToArray();
-
-// var ordinalMap =
-// schema
-// .Where(c => !string.IsNullOrEmpty(c.ColumnName))
-// .Select((c, i) => new { Column = c, Idx = c.ColumnOrdinal ?? throw new ArgumentException() })
-// .ToDictionary(p => p.Column.ColumnName, p => new { p.Column, p.Idx });
-
-// DbColumn? GetCol(int? idx, string? name)
-// {
-// if (!string.IsNullOrEmpty(name))
-// {
-// // interesting that this needs to be annotated with not-null
-// if (ordinalMap!.TryGetValue(name!, out var c))
-// {
-// return c.Column;
-// }
-// }
-// if (idx != null)
-// {
-// return schema[idx.Value];
-// }
-// return null;
-// }
-
-// var propBinderList = new List>();
-
-// foreach (var property in type.GetProperties(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance))
-// {
-// args[0] = null;
-
-// var dataMemberAttr = property.GetCustomAttribute();
-// var columnOrdinal = dataMemberAttr?.Order;
-// var columnName = dataMemberAttr?.Name ?? property.Name;
-
-// var setter = property.GetSetMethod(true)!;
-
-// var paramType = setter.GetParameters()[0].ParameterType;
-
-// var col = GetCol(columnOrdinal, columnName);
-// if (col == null)
-// {
-// // TODO: potentially add an argument to throw if there is an unbound property?
-// continue;
-// }
-
-// var ordinal = col.ColumnOrdinal ?? columnOrdinal ?? -1;
-
-// if (ordinal < 0)
-// {
-// // this means the column didn't know it's own ordinal, and neither did the property.
-// continue;
-// }
-
-// var type = col.DataType;
-// var typeCode = Type.GetTypeCode(type);
-// Func selector;
-
-// switch (typeCode)
-// {
-// case TypeCode.Int32:
-// selector = r => r.GetInt32(ordinal);
-// break;
-// case TypeCode.DateTime:
-// selector = r => r.GetDateTime(ordinal);
-// break;
-// case TypeCode.String:
-// selector = r => r.GetString(ordinal);
-// break;
-// case TypeCode.Double:
-// selector = r => r.GetDouble(ordinal);
-// break;
-// default:
-// if (col.DataType == typeof(Guid))
-// {
-// selector = r => r.GetGuid(ordinal);
-// break;
-// }
-// continue;
-// }
-
-// Action? propBinder = null;
-// if (col.AllowDBNull != false)
-// {
-// propBinder = (r, i) =>
-// {
-// if (r.IsDBNull(ordinal) == false)
-// {
-// var val = selector(r);
-// args[0] = val;
-// setter.Invoke(i, args);
-// }
-// };
-// }
-// else
-// {
-// propBinder = (r, i) =>
-// {
-// var val = selector(r);
-// args[0] = val;
-// setter.Invoke(i, args);
-// };
-// }
-// propBinderList.Add(propBinder);
-// }
-// this.propBinders = propBinderList.ToArray();
-// }
-
-// public void Bind(DbDataReader record, object item)
-// {
-// throw new NotImplementedException();
-// }
-
-// void IDataBinder.Bind(DbDataReader record, T item)
-// {
-// foreach (var pb in propBinders)
-// {
-// pb(record, item);
-// }
-// }
-// }
-//}
-
-
-//#endif
\ No newline at end of file