From 12b8941fe32539070f4b4cc9b8e0780b6f307e52 Mon Sep 17 00:00:00 2001 From: Paul Irwin Date: Sun, 5 Jan 2025 20:37:54 -0700 Subject: [PATCH] BREAKING: Use BOM-less UTF-8 encoding for writes, #1027 (#1075) * SWEEP: Use BOM-less UTF-8 encoding for writes, #1027 * Remove OfflineSorter.DEFAULT_ENCODING field and replace with IOUtils.CHARSET_UTF_8 * Rename IOUtils.CHARSET_UTF_8 to ENCODING_UTF_8_NO_BOM --- .../ByTask/Tasks/WriteEnwikiLineDocTask.cs | 7 ++-- .../ByTask/Tasks/WriteLineDocTask.cs | 9 ++--- .../Quality/Trec/QueryDriver.cs | 2 +- .../Utils/ExtractReuters.cs | 5 +-- .../Utils/ExtractWikipedia.cs | 3 +- .../Util/Fst/FSTTester.cs | 22 ++++++------- src/Lucene.Net.TestFramework/Util/TestUtil.cs | 4 +-- .../Analysis/Hunspell/Test64kAffixes.cs | 33 ++++++++++--------- .../Util/TestFilesystemResourceLoader.cs | 5 +-- .../ByTask/Feeds/DocMakerTest.cs | 3 +- .../ByTask/Feeds/LineDocSourceTest.cs | 13 ++++---- .../ByTask/Utils/StreamUtilsTest.cs | 19 ++++++----- src/Lucene.Net.Tests.Demo/TestDemo.cs | 6 ++-- .../Taxonomy/TestTaxonomyFacetCounts.cs | 2 +- src/Lucene.Net.Tests/Index/TestCheckIndex.cs | 3 +- .../Index/TestDocInverterPerFieldErrorInfo.cs | 5 +-- .../Index/TestIndexWriterDelete.cs | 4 +-- src/Lucene.Net.Tests/Index/TestPayloads.cs | 6 ++-- .../Search/Spans/TestBasics.cs | 22 ++++++------- src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs | 3 +- .../Util/TestOfflineSorter.cs | 4 +-- .../CompressingStoredFieldsReader.cs | 2 +- .../Lucene3x/Lucene3xStoredFieldsReader.cs | 2 +- .../Lucene40/Lucene40StoredFieldsReader.cs | 2 +- src/Lucene.Net/Index/Term.cs | 4 ++- src/Lucene.Net/Support/StandardCharsets.cs | 6 +++- src/Lucene.Net/Util/IOUtils.cs | 11 +++++-- src/Lucene.Net/Util/OfflineSorter.cs | 28 +++++++--------- 28 files changed, 126 insertions(+), 109 deletions(-) diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTask.cs index 7e61359b05..a12280f212 100644 --- a/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTask.cs +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTask.cs @@ -2,6 +2,7 @@ using Lucene.Net.Benchmarks.ByTask.Utils; using Lucene.Net.Documents; using Lucene.Net.Index; +using Lucene.Net.Support; using System; using System.IO; using System.Text; @@ -26,9 +27,9 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks */ /// - /// A which for Wikipedia input, will write category pages + /// A which for Wikipedia input, will write category pages /// to another file, while remaining pages will be written to the original file. - /// The categories file is derived from the original file, by adding a prefix "categories-". + /// The categories file is derived from the original file, by adding a prefix "categories-". /// public class WriteEnwikiLineDocTask : WriteLineDocTask { @@ -38,7 +39,7 @@ public WriteEnwikiLineDocTask(PerfRunData runData) : base(runData) { Stream @out = StreamUtils.GetOutputStream(CategoriesLineFile(new FileInfo(m_fname))); - categoryLineFileOut = new StreamWriter(@out, Encoding.UTF8); + categoryLineFileOut = new StreamWriter(@out, StandardCharsets.UTF_8); WriteHeader(categoryLineFileOut); } diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteLineDocTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteLineDocTask.cs index d724cfc516..ddb8727970 100644 --- a/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteLineDocTask.cs +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteLineDocTask.cs @@ -3,6 +3,7 @@ using Lucene.Net.Benchmarks.ByTask.Utils; using Lucene.Net.Documents; using Lucene.Net.Index; +using Lucene.Net.Support; using Lucene.Net.Support.Threading; using Lucene.Net.Util; using System; @@ -49,8 +50,8 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks /// line.file.outthe name of the file to write the output to. That parameter is mandatory. NOTE: the file is re-created. /// line.fieldswhich fields should be written in each line. (optional, default: ). /// sufficient.fields - /// list of field names, separated by comma, which, - /// if all of them are missing, the document will be skipped. For example, to require + /// list of field names, separated by comma, which, + /// if all of them are missing, the document will be skipped. For example, to require /// that at least one of f1,f2 is not empty, specify: "f1,f2" in this field. To specify /// that no field is required, i.e. that even empty docs should be emitted, specify "," /// (optional, default: ). @@ -112,10 +113,10 @@ public WriteLineDocTask(PerfRunData runData, bool performWriteHeader) throw new ArgumentException("line.file.out must be set"); } Stream @out = StreamUtils.GetOutputStream(new FileInfo(m_fname)); - m_lineFileOut = new StreamWriter(@out, Encoding.UTF8); + m_lineFileOut = new StreamWriter(@out, StandardCharsets.UTF_8); docMaker = runData.DocMaker; - // init fields + // init fields string f2r = config.Get("line.fields", null); if (f2r is null) { diff --git a/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs b/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs index b75eb55b50..d08dab75a5 100644 --- a/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs +++ b/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs @@ -71,7 +71,7 @@ public static void Main(string[] args) FileInfo topicsFile = new FileInfo(args[0]); FileInfo qrelsFile = new FileInfo(args[1]); - SubmissionReport submitLog = new SubmissionReport(new StreamWriter(new FileStream(args[2], FileMode.Create, FileAccess.Write), Encoding.UTF8 /* huh, no nio.Charset ctor? */), "lucene"); + SubmissionReport submitLog = new SubmissionReport(new StreamWriter(new FileStream(args[2], FileMode.Create, FileAccess.Write), IOUtils.ENCODING_UTF_8_NO_BOM /* huh, no nio.Charset ctor? */), "lucene"); using Store.FSDirectory dir = Store.FSDirectory.Open(new DirectoryInfo(args[3])); using IndexReader reader = DirectoryReader.Open(dir); string fieldSpec = args.Length == 5 ? args[4] : "T"; // default to Title-only if not specified. diff --git a/src/Lucene.Net.Benchmark/Utils/ExtractReuters.cs b/src/Lucene.Net.Benchmark/Utils/ExtractReuters.cs index be1f79aa2f..8d04403261 100644 --- a/src/Lucene.Net.Benchmark/Utils/ExtractReuters.cs +++ b/src/Lucene.Net.Benchmark/Utils/ExtractReuters.cs @@ -1,4 +1,5 @@ -using System; +using Lucene.Net.Support; +using System; using System.IO; using System.Text; using System.Text.RegularExpressions; @@ -118,7 +119,7 @@ protected virtual void ExtractFile(FileInfo sgmFile) string outFile = System.IO.Path.Combine(outputDir.FullName, sgmFile.Name + "-" + (docNumber++) + ".txt"); // System.out.println("Writing " + outFile); - StreamWriter writer = new StreamWriter(new FileStream(outFile, FileMode.Create, FileAccess.Write), Encoding.UTF8); + StreamWriter writer = new StreamWriter(new FileStream(outFile, FileMode.Create, FileAccess.Write), StandardCharsets.UTF_8); writer.Write(@out); writer.Dispose(); outBuffer.Length = 0; diff --git a/src/Lucene.Net.Benchmark/Utils/ExtractWikipedia.cs b/src/Lucene.Net.Benchmark/Utils/ExtractWikipedia.cs index 5504248a3d..7a50f3fb5f 100644 --- a/src/Lucene.Net.Benchmark/Utils/ExtractWikipedia.cs +++ b/src/Lucene.Net.Benchmark/Utils/ExtractWikipedia.cs @@ -1,6 +1,7 @@ using Lucene.Net.Benchmarks.ByTask.Feeds; using Lucene.Net.Benchmarks.ByTask.Utils; using Lucene.Net.Documents; +using Lucene.Net.Support; using System; using System.Collections.Generic; using System.Globalization; @@ -88,7 +89,7 @@ public virtual void Create(string id, string title, string time, string body) try { - using TextWriter writer = new StreamWriter(new FileStream(f.FullName, FileMode.Create, FileAccess.Write), Encoding.UTF8); + using TextWriter writer = new StreamWriter(new FileStream(f.FullName, FileMode.Create, FileAccess.Write), StandardCharsets.UTF_8); writer.Write(contents.ToString()); } catch (Exception ioe) when (ioe.IsIOException()) diff --git a/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs b/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs index e173c6f72b..cc4917321b 100644 --- a/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs +++ b/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs @@ -318,16 +318,16 @@ internal virtual FST DoTest(int prune1, int prune2, bool allowRandomSuffixSha bool willRewrite = random.NextBoolean(); - Builder builder = new Builder(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, - prune1, prune2, - prune1 == 0 && prune2 == 0, - allowRandomSuffixSharing ? random.NextBoolean() : true, - allowRandomSuffixSharing ? TestUtil.NextInt32(random, 1, 10) : int.MaxValue, - outputs, - null, - willRewrite, - PackedInt32s.DEFAULT, - true, + Builder builder = new Builder(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, + prune1, prune2, + prune1 == 0 && prune2 == 0, + allowRandomSuffixSharing ? random.NextBoolean() : true, + allowRandomSuffixSharing ? TestUtil.NextInt32(random, 1, 10) : int.MaxValue, + outputs, + null, + willRewrite, + PackedInt32s.DEFAULT, + true, 15); if (LuceneTestCase.Verbose) { @@ -386,7 +386,7 @@ internal virtual FST DoTest(int prune1, int prune2, bool allowRandomSuffixSha if (LuceneTestCase.Verbose && pairs.Count <= 20 && fst != null) { - using (TextWriter w = new StreamWriter(new FileStream("out.dot", FileMode.OpenOrCreate), Encoding.UTF8)) + using (TextWriter w = new StreamWriter(new FileStream("out.dot", FileMode.OpenOrCreate), StandardCharsets.UTF_8)) { Util.ToDot(fst, w, false, false); } diff --git a/src/Lucene.Net.TestFramework/Util/TestUtil.cs b/src/Lucene.Net.TestFramework/Util/TestUtil.cs index d270d069e0..ad25bf4186 100644 --- a/src/Lucene.Net.TestFramework/Util/TestUtil.cs +++ b/src/Lucene.Net.TestFramework/Util/TestUtil.cs @@ -167,7 +167,7 @@ public static CheckIndex.Status CheckIndex(Directory dir, bool crossCheckTermVec ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); CheckIndex checker = new CheckIndex(dir); checker.CrossCheckTermVectors = crossCheckTermVectors; - checker.InfoStream = new StreamWriter(bos, Encoding.UTF8); + checker.InfoStream = new StreamWriter(bos, IOUtils.ENCODING_UTF_8_NO_BOM); CheckIndex.Status indexStatus = checker.DoCheckIndex(null); if (indexStatus is null || indexStatus.Clean == false) { @@ -203,7 +203,7 @@ public static void CheckReader(AtomicReader reader, bool crossCheckTermVectors) { // LUCENENET: dispose the StreamWriter and ByteArrayOutputStream when done using ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); - using StreamWriter infoStream = new StreamWriter(bos, Encoding.UTF8, leaveOpen: true, bufferSize: 1024); + using StreamWriter infoStream = new StreamWriter(bos, IOUtils.ENCODING_UTF_8_NO_BOM, leaveOpen: true, bufferSize: 1024); reader.CheckIntegrity(); CheckIndex.Status.FieldNormStatus fieldNormStatus = Index.CheckIndex.TestFieldNorms(reader, infoStream); diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/Test64kAffixes.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/Test64kAffixes.cs index e210ee9fda..ec4824a7c4 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/Test64kAffixes.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/Test64kAffixes.cs @@ -1,5 +1,6 @@ // Lucene version compatibility level 4.10.4 using J2N; +using Lucene.Net.Support; using Lucene.Net.Util; using NUnit.Framework; using System.Collections.Generic; @@ -35,25 +36,25 @@ public void Test() FileInfo affix = new FileInfo(System.IO.Path.Combine(tempDir.FullName, "64kaffixes.aff")); FileInfo dict = new FileInfo(System.IO.Path.Combine(tempDir.FullName, "64kaffixes.dic")); - using var affixWriter = new StreamWriter( - new FileStream(affix.FullName, FileMode.OpenOrCreate), Encoding.UTF8); - - // 65k affixes with flag 1, then an affix with flag 2 - affixWriter.Write("SET UTF-8\nFLAG num\nSFX 1 Y 65536\n"); - for (int i = 0; i < 65536; i++) + using (var affixWriter = new StreamWriter( + new FileStream(affix.FullName, FileMode.OpenOrCreate), StandardCharsets.UTF_8)) { - affixWriter.Write("SFX 1 0 " + i.ToHexString() + " .\n"); - } - affixWriter.Write("SFX 2 Y 1\nSFX 2 0 s\n"); - affixWriter.Dispose(); - - using var dictWriter = new StreamWriter( - new FileStream(dict.FullName, FileMode.OpenOrCreate), Encoding.UTF8); + // 65k affixes with flag 1, then an affix with flag 2 + affixWriter.Write("SET UTF-8\nFLAG num\nSFX 1 Y 65536\n"); + for (int i = 0; i < 65536; i++) + { + affixWriter.Write("SFX 1 0 " + i.ToHexString() + " .\n"); + } + affixWriter.Write("SFX 2 Y 1\nSFX 2 0 s\n"); + } // affixWriter.Dispose(); - // drink signed with affix 2 (takes -s) - dictWriter.Write("1\ndrink/2\n"); - dictWriter.Dispose(); + using (var dictWriter = new StreamWriter( + new FileStream(dict.FullName, FileMode.OpenOrCreate), StandardCharsets.UTF_8)) + { + // drink signed with affix 2 (takes -s) + dictWriter.Write("1\ndrink/2\n"); + } // dictWriter.Dispose(); using Stream affStream = new FileStream(affix.FullName, FileMode.OpenOrCreate, FileAccess.ReadWrite); using Stream dictStream = new FileStream(dict.FullName, FileMode.OpenOrCreate, FileAccess.ReadWrite); diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestFilesystemResourceLoader.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestFilesystemResourceLoader.cs index 06db2c28ef..d8166892cc 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestFilesystemResourceLoader.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestFilesystemResourceLoader.cs @@ -1,5 +1,6 @@ // Lucene version compatibility level 4.8.1 using J2N; +using Lucene.Net.Support; using Lucene.Net.Util; using NUnit.Framework; using System; @@ -78,7 +79,7 @@ public virtual void TestBaseDir() DirectoryInfo @base = CreateTempDir("fsResourceLoaderBase"); try { - TextWriter os = new StreamWriter(new FileStream(System.IO.Path.Combine(@base.FullName, "template.txt"), FileMode.Create, FileAccess.Write), Encoding.UTF8); + TextWriter os = new StreamWriter(new FileStream(System.IO.Path.Combine(@base.FullName, "template.txt"), FileMode.Create, FileAccess.Write), StandardCharsets.UTF_8); try { os.Write("foobar\n"); @@ -120,4 +121,4 @@ public virtual void TestDelegation() assertEquals("foobar", WordlistLoader.GetLines(rl.OpenResource("template.txt"), Encoding.UTF8).First()); } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/DocMakerTest.cs b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/DocMakerTest.cs index d8acb4f66f..acd70715bb 100644 --- a/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/DocMakerTest.cs +++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/DocMakerTest.cs @@ -5,6 +5,7 @@ using Lucene.Net.Index; using Lucene.Net.Search; using Lucene.Net.Support; +using Lucene.Net.Util; using NUnit.Framework; using System.Collections.Generic; using System.IO; @@ -170,7 +171,7 @@ public void TestDocMakerLeak() // DocMaker did not close its ContentSource if resetInputs was called twice, // leading to a file handle leak. FileInfo f = new FileInfo(Path.Combine(getWorkDir().FullName, "docMakerLeak.txt")); - TextWriter ps = new StreamWriter(new FileStream(f.FullName, FileMode.Create, FileAccess.Write), Encoding.UTF8); + TextWriter ps = new StreamWriter(new FileStream(f.FullName, FileMode.Create, FileAccess.Write), IOUtils.ENCODING_UTF_8_NO_BOM); ps.WriteLine("one title\t" + (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) + "\tsome content"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results ps.Dispose(); diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/LineDocSourceTest.cs b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/LineDocSourceTest.cs index c9b0cad902..ad909a49c0 100644 --- a/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/LineDocSourceTest.cs +++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/LineDocSourceTest.cs @@ -4,6 +4,7 @@ using Lucene.Net.Benchmarks.ByTask.Utils; using Lucene.Net.Index; using Lucene.Net.Search; +using Lucene.Net.Support; using Lucene.Net.Util; using NUnit.Framework; using System; @@ -41,7 +42,7 @@ private void createBZ2LineFile(FileInfo file, bool addHeader) { Stream @out = new FileStream(file.FullName, FileMode.Create, FileAccess.Write); @out = new BZip2OutputStream(@out); // csFactory.createCompressorOutputStream("bzip2", @out); - TextWriter writer = new StreamWriter(@out, Encoding.UTF8); + TextWriter writer = new StreamWriter(@out, StandardCharsets.UTF_8); writeDocsToFile(writer, addHeader, null); writer.Dispose(); } @@ -59,7 +60,7 @@ private void writeDocsToFile(TextWriter writer, bool addHeader, IDictionary p = new Dictionary(); foreach (String f in extraFields) { @@ -231,7 +232,7 @@ public void TestInvalidFormat() for (int i = 0; i < testCases.Length; i++) { FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "one-line")); - TextWriter writer = new StreamWriter(new FileStream(file.FullName, FileMode.Create, FileAccess.Write), Encoding.UTF8); + TextWriter writer = new StreamWriter(new FileStream(file.FullName, FileMode.Create, FileAccess.Write), StandardCharsets.UTF_8); writer.Write(testCases[i]); writer.WriteLine(); writer.Dispose(); diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Utils/StreamUtilsTest.cs b/src/Lucene.Net.Tests.Benchmark/ByTask/Utils/StreamUtilsTest.cs index eb2aaec78f..d8e175f743 100644 --- a/src/Lucene.Net.Tests.Benchmark/ByTask/Utils/StreamUtilsTest.cs +++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Utils/StreamUtilsTest.cs @@ -1,4 +1,5 @@ using ICSharpCode.SharpZipLib.BZip2; +using Lucene.Net.Support; using Lucene.Net.Util; using NUnit.Framework; using System; @@ -27,7 +28,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils public class StreamUtilsTest : BenchmarkTestCase { - private static readonly String TEXT = "Some-Text..."; + private static readonly string TEXT = "Some-Text..."; private DirectoryInfo testDir; [Test] @@ -82,15 +83,15 @@ public void TestGetOutputStreamPlain() assertReadText(autoOutFile("TEXT")); } - private FileInfo rawTextFile(String ext) + private FileInfo rawTextFile(string ext) { FileInfo f = new FileInfo(Path.Combine(testDir.FullName, "testfile." + ext)); - using (TextWriter w = new StreamWriter(new FileStream(f.FullName, FileMode.Create, FileAccess.Write), Encoding.UTF8)) + using (TextWriter w = new StreamWriter(new FileStream(f.FullName, FileMode.Create, FileAccess.Write), StandardCharsets.UTF_8)) w.WriteLine(TEXT); return f; } - private FileInfo rawGzipFile(String ext) + private FileInfo rawGzipFile(string ext) { FileInfo f = new FileInfo(Path.Combine(testDir.FullName, "testfile." + ext)); using (Stream os = new GZipStream(new FileStream(f.FullName, FileMode.Create, FileAccess.Write), CompressionMode.Compress)) //new CompressorStreamFactory().createCompressorOutputStream(CompressorStreamFactory.GZIP, new FileOutputStream(f)); @@ -98,7 +99,7 @@ private FileInfo rawGzipFile(String ext) return f; } - private FileInfo rawBzip2File(String ext) + private FileInfo rawBzip2File(string ext) { FileInfo f = new FileInfo(Path.Combine(testDir.FullName, "testfile." + ext)); Stream os = new BZip2OutputStream(new FileStream(f.FullName, FileMode.Create, FileAccess.Write)); // new CompressorStreamFactory().createCompressorOutputStream(CompressorStreamFactory.BZIP2, new FileOutputStream(f)); @@ -106,7 +107,7 @@ private FileInfo rawBzip2File(String ext) return f; } - private FileInfo autoOutFile(String ext) + private FileInfo autoOutFile(string ext) { FileInfo f = new FileInfo(Path.Combine(testDir.FullName, "testfile." + ext)); Stream os = StreamUtils.GetOutputStream(f); @@ -116,7 +117,7 @@ private FileInfo autoOutFile(String ext) private void writeText(Stream os) { - TextWriter w = new StreamWriter(os, Encoding.UTF8); + TextWriter w = new StreamWriter(os, StandardCharsets.UTF_8); w.WriteLine(TEXT); w.Dispose(); } @@ -124,8 +125,8 @@ private void writeText(Stream os) private void assertReadText(FileInfo f) { Stream ir = StreamUtils.GetInputStream(f); - TextReader r = new StreamReader(ir, Encoding.UTF8); - String line = r.ReadLine(); + TextReader r = new StreamReader(ir, StandardCharsets.UTF_8); + string line = r.ReadLine(); assertEquals("Wrong text found in " + f.Name, TEXT, line); r.Dispose(); } diff --git a/src/Lucene.Net.Tests.Demo/TestDemo.cs b/src/Lucene.Net.Tests.Demo/TestDemo.cs index 356405bc0d..16b2379a53 100644 --- a/src/Lucene.Net.Tests.Demo/TestDemo.cs +++ b/src/Lucene.Net.Tests.Demo/TestDemo.cs @@ -37,7 +37,7 @@ private void TestOneSearch(DirectoryInfo indexPath, string query, int expectedHi var fakeSystemOut = new StreamWriter(bytes, Encoding.GetEncoding(0)); Console.SetOut(fakeSystemOut); // LUCENENET specific: changed the arguments to act more like the dotnet.exe commands. - // * only optional arguments start with - + // * only optional arguments start with - // * options have a long form that starts with -- // * required arguments must be supplied without - or -- and in a specific order // Since the demo is meant to be seen by end users, these changes were necessary to make @@ -80,13 +80,13 @@ public void TestIndexSearch() DirectoryInfo indexDir = CreateTempDir("DemoTest"); // LUCENENET specific: changed the arguments to act more like the dotnet.exe commands. - // * only optional arguments start with - + // * only optional arguments start with - // * options have a long form that starts with -- // * required arguments must be supplied without - or -- and in a specific order // Since the demo is meant to be seen by end users, these changes were necessary to make // it consistent with the lucene-cli utility. // NOTE: There is no -create in lucene, but it has the same effect as if --update were left out - IndexFiles.Main(new string[] { indexDir.FullName, filesDir.FullName }); + IndexFiles.Main(new string[] { indexDir.FullName, filesDir.FullName }); //IndexFiles.Main(new string[] { "-create", "-docs", filesDir.FullName, "-index", indexDir.FullName }); TestOneSearch(indexDir, "apache", 3); TestOneSearch(indexDir, "patent", 8); diff --git a/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetCounts.cs b/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetCounts.cs index 206ef6f1b8..02f8764f7b 100644 --- a/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetCounts.cs +++ b/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetCounts.cs @@ -134,7 +134,7 @@ public virtual void TestBasic() string result; using (ByteArrayOutputStream bos = new ByteArrayOutputStream()) { - using (StreamWriter w = new StreamWriter(bos, Encoding.UTF8, 2048, true) { AutoFlush = true }) + using (StreamWriter w = new StreamWriter(bos, IOUtils.ENCODING_UTF_8_NO_BOM, 2048, true) { AutoFlush = true }) { PrintTaxonomyStats.PrintStats(taxoReader, w, true); } diff --git a/src/Lucene.Net.Tests/Index/TestCheckIndex.cs b/src/Lucene.Net.Tests/Index/TestCheckIndex.cs index bb1c3a7ad0..443ea738c0 100644 --- a/src/Lucene.Net.Tests/Index/TestCheckIndex.cs +++ b/src/Lucene.Net.Tests/Index/TestCheckIndex.cs @@ -1,6 +1,7 @@ using Lucene.Net.Documents; using Lucene.Net.Index.Extensions; using Lucene.Net.Support.IO; +using Lucene.Net.Util; using NUnit.Framework; using System.Collections.Generic; using System.IO; @@ -63,7 +64,7 @@ public virtual void TestDeletedDocs() ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); CheckIndex checker = new CheckIndex(dir); - checker.InfoStream = new StreamWriter(bos, Encoding.UTF8); + checker.InfoStream = new StreamWriter(bos, IOUtils.ENCODING_UTF_8_NO_BOM); if (Verbose) { checker.InfoStream = Console.Out; diff --git a/src/Lucene.Net.Tests/Index/TestDocInverterPerFieldErrorInfo.cs b/src/Lucene.Net.Tests/Index/TestDocInverterPerFieldErrorInfo.cs index bca4cfb3a9..130b175f90 100644 --- a/src/Lucene.Net.Tests/Index/TestDocInverterPerFieldErrorInfo.cs +++ b/src/Lucene.Net.Tests/Index/TestDocInverterPerFieldErrorInfo.cs @@ -1,6 +1,7 @@ using Lucene.Net.Analysis; using Lucene.Net.Documents; using Lucene.Net.Support.IO; +using Lucene.Net.Util; using NUnit.Framework; using System; using System.IO; @@ -90,7 +91,7 @@ public virtual void TestInfoStreamGetsFieldName() IndexWriter writer; IndexWriterConfig c = new IndexWriterConfig(TEST_VERSION_CURRENT, new ThrowingAnalyzer()); ByteArrayOutputStream infoBytes = new ByteArrayOutputStream(); - StreamWriter infoPrintStream = new StreamWriter(infoBytes, Encoding.UTF8); + StreamWriter infoPrintStream = new StreamWriter(infoBytes, IOUtils.ENCODING_UTF_8_NO_BOM); TextWriterInfoStream printStreamInfoStream = new TextWriterInfoStream(infoPrintStream); c.SetInfoStream(printStreamInfoStream); writer = new IndexWriter(dir, c); @@ -119,7 +120,7 @@ public virtual void TestNoExtraNoise() IndexWriter writer; IndexWriterConfig c = new IndexWriterConfig(TEST_VERSION_CURRENT, new ThrowingAnalyzer()); ByteArrayOutputStream infoBytes = new ByteArrayOutputStream(); - StreamWriter infoPrintStream = new StreamWriter(infoBytes, Encoding.UTF8); + StreamWriter infoPrintStream = new StreamWriter(infoBytes, IOUtils.ENCODING_UTF_8_NO_BOM); TextWriterInfoStream printStreamInfoStream = new TextWriterInfoStream(infoPrintStream); c.SetInfoStream(printStreamInfoStream); writer = new IndexWriter(dir, c); diff --git a/src/Lucene.Net.Tests/Index/TestIndexWriterDelete.cs b/src/Lucene.Net.Tests/Index/TestIndexWriterDelete.cs index a64e9af0dc..525750e988 100644 --- a/src/Lucene.Net.Tests/Index/TestIndexWriterDelete.cs +++ b/src/Lucene.Net.Tests/Index/TestIndexWriterDelete.cs @@ -1400,7 +1400,7 @@ public virtual void TestDeletesCheckIndexOutput() ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); //MemoryStream bos = new MemoryStream(1024); CheckIndex checker = new CheckIndex(dir); - checker.InfoStream = new StreamWriter(bos, Encoding.UTF8); + checker.InfoStream = new StreamWriter(bos, IOUtils.ENCODING_UTF_8_NO_BOM); CheckIndex.Status indexStatus = checker.DoCheckIndex(null); Assert.IsTrue(indexStatus.Clean); checker.FlushInfoStream(); @@ -1413,7 +1413,7 @@ public virtual void TestDeletesCheckIndexOutput() w.Dispose(); bos = new ByteArrayOutputStream(1024); - checker.InfoStream = new StreamWriter(bos, Encoding.UTF8); + checker.InfoStream = new StreamWriter(bos, IOUtils.ENCODING_UTF_8_NO_BOM); indexStatus = checker.DoCheckIndex(null); Assert.IsTrue(indexStatus.Clean); checker.FlushInfoStream(); diff --git a/src/Lucene.Net.Tests/Index/TestPayloads.cs b/src/Lucene.Net.Tests/Index/TestPayloads.cs index e35cc6aa5f..1affe6ba57 100644 --- a/src/Lucene.Net.Tests/Index/TestPayloads.cs +++ b/src/Lucene.Net.Tests/Index/TestPayloads.cs @@ -84,7 +84,7 @@ public virtual void TestPayloadFieldBit() // enabled in only some documents d.Add(NewTextField("f3", "this field has payloads in some docs", Field.Store.NO)); // only add payload data for field f2 - analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1); + analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM), 0, 1); writer.AddDocument(d); // flush writer.Dispose(); @@ -106,8 +106,8 @@ public virtual void TestPayloadFieldBit() d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO)); d.Add(NewTextField("f3", "this field has payloads in some docs", Field.Store.NO)); // add payload data for field f2 and f3 - analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1); - analyzer.SetPayloadData("f3", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 3); + analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM), 0, 1); + analyzer.SetPayloadData("f3", "somedata".GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM), 0, 3); writer.AddDocument(d); // force merge diff --git a/src/Lucene.Net.Tests/Search/Spans/TestBasics.cs b/src/Lucene.Net.Tests/Search/Spans/TestBasics.cs index d54678ae93..0e08ed67f5 100644 --- a/src/Lucene.Net.Tests/Search/Spans/TestBasics.cs +++ b/src/Lucene.Net.Tests/Search/Spans/TestBasics.cs @@ -77,7 +77,7 @@ public override bool IncrementToken() if (m_input.IncrementToken()) { #pragma warning disable 612, 618 - payloadAttr.Payload = new BytesRef(("pos: " + pos).GetBytes(IOUtils.CHARSET_UTF_8)); + payloadAttr.Payload = new BytesRef(("pos: " + pos).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM)); #pragma warning restore 612, 618 pos++; return true; @@ -533,7 +533,7 @@ public virtual void TestSpanPayloadCheck() { SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five")); #pragma warning disable 612, 618 - BytesRef pay = new BytesRef(("pos: " + 5).GetBytes(IOUtils.CHARSET_UTF_8)); + BytesRef pay = new BytesRef(("pos: " + 5).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM)); #pragma warning restore 612, 618 SpanQuery query = new SpanPayloadCheckQuery(term1, new JCG.List() { pay.Bytes }); CheckHits(query, new int[] { 1125, 1135, 1145, 1155, 1165, 1175, 1185, 1195, 1225, 1235, 1245, 1255, 1265, 1275, 1285, 1295, 1325, 1335, 1345, 1355, 1365, 1375, 1385, 1395, 1425, 1435, 1445, 1455, 1465, 1475, 1485, 1495, 1525, 1535, 1545, 1555, 1565, 1575, 1585, 1595, 1625, 1635, 1645, 1655, 1665, 1675, 1685, 1695, 1725, 1735, 1745, 1755, 1765, 1775, 1785, 1795, 1825, 1835, 1845, 1855, 1865, 1875, 1885, 1895, 1925, 1935, 1945, 1955, 1965, 1975, 1985, 1995 }); @@ -549,8 +549,8 @@ public virtual void TestSpanPayloadCheck() clauses[1] = term2; snq = new SpanNearQuery(clauses, 0, true); #pragma warning disable 612, 618 - pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.CHARSET_UTF_8)); - pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.CHARSET_UTF_8)); + pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM)); + pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM)); #pragma warning restore 612, 618 list = new JCG.List(); list.Add(pay.Bytes); @@ -563,9 +563,9 @@ public virtual void TestSpanPayloadCheck() clauses[2] = new SpanTermQuery(new Term("field", "five")); snq = new SpanNearQuery(clauses, 0, true); #pragma warning disable 612, 618 - pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.CHARSET_UTF_8)); - pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.CHARSET_UTF_8)); - BytesRef pay3 = new BytesRef(("pos: " + 2).GetBytes(IOUtils.CHARSET_UTF_8)); + pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM)); + pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM)); + BytesRef pay3 = new BytesRef(("pos: " + 2).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM)); #pragma warning restore 612, 618 list = new JCG.List(); list.Add(pay.Bytes); @@ -597,10 +597,10 @@ public virtual void TestComplexSpanChecks() var payloads = new JCG.List(); #pragma warning disable 612, 618 - BytesRef pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.CHARSET_UTF_8)); - BytesRef pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.CHARSET_UTF_8)); - BytesRef pay3 = new BytesRef(("pos: " + 3).GetBytes(IOUtils.CHARSET_UTF_8)); - BytesRef pay4 = new BytesRef(("pos: " + 4).GetBytes(IOUtils.CHARSET_UTF_8)); + BytesRef pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM)); + BytesRef pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM)); + BytesRef pay3 = new BytesRef(("pos: " + 3).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM)); + BytesRef pay4 = new BytesRef(("pos: " + 4).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM)); #pragma warning restore 612, 618 payloads.Add(pay.Bytes); payloads.Add(pay2.Bytes); diff --git a/src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs b/src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs index f21e68457d..e9de419c30 100644 --- a/src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs +++ b/src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs @@ -2,6 +2,7 @@ using J2N.Threading.Atomic; using Lucene.Net.Diagnostics; using Lucene.Net.Index.Extensions; +using Lucene.Net.Support; using Lucene.Net.Util.Automaton; using NUnit.Framework; using RandomizedTesting.Generators; @@ -589,7 +590,7 @@ public virtual void Run(int limit, bool verify, bool verifyByOutput) Console.WriteLine(ord + " terms; " + fst.NodeCount + " nodes; " + fst.ArcCount + " arcs; " + fst.ArcWithOutputCount + " arcs w/ output; tot size " + fst.GetSizeInBytes()); if (fst.NodeCount < 100) { - TextWriter w = new StreamWriter(new FileStream("out.dot", FileMode.Create), Encoding.UTF8); + TextWriter w = new StreamWriter(new FileStream("out.dot", FileMode.Create), StandardCharsets.UTF_8); Util.ToDot(fst, w, false, false); w.Dispose(); Console.WriteLine("Wrote FST to out.dot"); diff --git a/src/Lucene.Net.Tests/Util/TestOfflineSorter.cs b/src/Lucene.Net.Tests/Util/TestOfflineSorter.cs index 3f9ae8929f..19a3cc20d3 100644 --- a/src/Lucene.Net.Tests/Util/TestOfflineSorter.cs +++ b/src/Lucene.Net.Tests/Util/TestOfflineSorter.cs @@ -86,7 +86,7 @@ public virtual void TestEmpty_AsStream() public virtual void TestSingleLine() { #pragma warning disable 612, 618 - CheckSort(new OfflineSorter(), new byte[][] { "Single line only.".GetBytes(IOUtils.CHARSET_UTF_8) }); + CheckSort(new OfflineSorter(), new byte[][] { "Single line only.".GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM) }); #pragma warning restore 612, 618 } @@ -95,7 +95,7 @@ public virtual void TestSingleLine() public virtual void TestSingleLine_AsStream() { #pragma warning disable 612, 618 - CheckSortAsStream(new OfflineSorter(), new byte[][] { "Single line only.".GetBytes(IOUtils.CHARSET_UTF_8) }); + CheckSortAsStream(new OfflineSorter(), new byte[][] { "Single line only.".GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM) }); #pragma warning restore 612, 618 } diff --git a/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsReader.cs b/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsReader.cs index 14fd869e2c..16b053f4d2 100644 --- a/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsReader.cs +++ b/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsReader.cs @@ -205,7 +205,7 @@ private static void ReadField(DataInput @in, StoredFieldVisitor visitor, FieldIn data = new byte[length]; @in.ReadBytes(data, 0, length); #pragma warning disable 612, 618 - visitor.StringField(info, IOUtils.CHARSET_UTF_8.GetString(data)); + visitor.StringField(info, IOUtils.ENCODING_UTF_8_NO_BOM.GetString(data)); #pragma warning restore 612, 618 break; diff --git a/src/Lucene.Net/Codecs/Lucene3x/Lucene3xStoredFieldsReader.cs b/src/Lucene.Net/Codecs/Lucene3x/Lucene3xStoredFieldsReader.cs index 442a08e718..8615bb283a 100644 --- a/src/Lucene.Net/Codecs/Lucene3x/Lucene3xStoredFieldsReader.cs +++ b/src/Lucene.Net/Codecs/Lucene3x/Lucene3xStoredFieldsReader.cs @@ -334,7 +334,7 @@ private void ReadField(StoredFieldVisitor visitor, FieldInfo info, int bits) } else { - visitor.StringField(info, IOUtils.CHARSET_UTF_8.GetString(bytes)); + visitor.StringField(info, IOUtils.ENCODING_UTF_8_NO_BOM.GetString(bytes)); } } } diff --git a/src/Lucene.Net/Codecs/Lucene40/Lucene40StoredFieldsReader.cs b/src/Lucene.Net/Codecs/Lucene40/Lucene40StoredFieldsReader.cs index b6736feb28..d21f358714 100644 --- a/src/Lucene.Net/Codecs/Lucene40/Lucene40StoredFieldsReader.cs +++ b/src/Lucene.Net/Codecs/Lucene40/Lucene40StoredFieldsReader.cs @@ -239,7 +239,7 @@ private void ReadField(StoredFieldVisitor visitor, FieldInfo info, int bits) else { #pragma warning disable 612, 618 - visitor.StringField(info, IOUtils.CHARSET_UTF_8.GetString(bytes)); + visitor.StringField(info, IOUtils.ENCODING_UTF_8_NO_BOM.GetString(bytes)); #pragma warning restore 612, 618 } } diff --git a/src/Lucene.Net/Index/Term.cs b/src/Lucene.Net/Index/Term.cs index 6e245af2a6..38eda37a33 100644 --- a/src/Lucene.Net/Index/Term.cs +++ b/src/Lucene.Net/Index/Term.cs @@ -1,4 +1,5 @@ using J2N.Text; +using Lucene.Net.Support; using System; using System.Text; @@ -90,7 +91,8 @@ public Term(string fld) public static string ToString(BytesRef termText) { // the term might not be text, but usually is. so we make a best effort - Encoding decoder = new UTF8Encoding(false, true); + // LUCENENET TODO: determine if we should use DecoderFallback.ExceptionFallback here + Encoding decoder = StandardCharsets.UTF_8; try { return decoder.GetString(termText.Bytes, termText.Offset, termText.Length); diff --git a/src/Lucene.Net/Support/StandardCharsets.cs b/src/Lucene.Net/Support/StandardCharsets.cs index 781f3b3aea..73ef0f9096 100644 --- a/src/Lucene.Net/Support/StandardCharsets.cs +++ b/src/Lucene.Net/Support/StandardCharsets.cs @@ -22,5 +22,9 @@ namespace Lucene.Net.Support; internal static class StandardCharsets { - public static readonly Encoding UTF_8 = IOUtils.CHARSET_UTF_8; + /// + /// + /// This is a convenience reference to . + /// + public static readonly Encoding UTF_8 = IOUtils.ENCODING_UTF_8_NO_BOM; } diff --git a/src/Lucene.Net/Util/IOUtils.cs b/src/Lucene.Net/Util/IOUtils.cs index dc17cdbdd9..c3141b00da 100644 --- a/src/Lucene.Net/Util/IOUtils.cs +++ b/src/Lucene.Net/Util/IOUtils.cs @@ -45,16 +45,21 @@ public static class IOUtils // LUCENENET specific - made static /// UTF-8 instance to prevent repeated /// lookups and match Java's behavior /// with respect to a lack of a byte-order mark (BOM). + /// + /// It is important to use this encoding over + /// particularly when writing data, to ensure that the BOM is not written. + /// For reading data, either this or can be used, + /// as both will correctly interpret data with or without a BOM. /// - public static readonly Encoding CHARSET_UTF_8 = new UTF8Encoding( + public static readonly Encoding ENCODING_UTF_8_NO_BOM = new UTF8Encoding( encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true); /// /// UTF-8 charset string. - /// Where possible, use instead, + /// Where possible, use instead, /// as using the constant may slow things down. - /// + /// public static readonly string UTF_8 = "UTF-8"; /// diff --git a/src/Lucene.Net/Util/OfflineSorter.cs b/src/Lucene.Net/Util/OfflineSorter.cs index c1114b18cb..968368ca25 100644 --- a/src/Lucene.Net/Util/OfflineSorter.cs +++ b/src/Lucene.Net/Util/OfflineSorter.cs @@ -41,12 +41,6 @@ namespace Lucene.Net.Util /// public sealed class OfflineSorter { - /// - /// The default encoding (UTF-8 without a byte order mark) used by and . - /// This encoding should always be used when calling the constructor overloads that accept or . - /// - public static readonly Encoding DEFAULT_ENCODING = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); - /// /// The recommended buffer size to use on or when creating a /// and . @@ -196,10 +190,10 @@ public SortInfo(OfflineSorter offlineSorter) /// public override string ToString() { - return string.Format(CultureInfo.InvariantCulture, - "time={0:0.00} sec. total ({1:0.00} reading, {2:0.00} sorting, {3:0.00} merging), lines={4}, temp files={5}, merges={6}, soft ram limit={7:0.00} MB", - TotalTime / 1000.0d, ReadTime / 1000.0d, SortTime / 1000.0d, MergeTime / 1000.0d, - Lines, TempMergeFiles, MergeRounds, + return string.Format(CultureInfo.InvariantCulture, + "time={0:0.00} sec. total ({1:0.00} reading, {2:0.00} sorting, {3:0.00} merging), lines={4}, temp files={5}, merges={6}, soft ram limit={7:0.00} MB", + TotalTime / 1000.0d, ReadTime / 1000.0d, SortTime / 1000.0d, MergeTime / 1000.0d, + Lines, TempMergeFiles, MergeRounds, (double)BufferSize / MB); } } @@ -606,7 +600,7 @@ public class ByteSequencesWriter : IDisposable /// Constructs a to the provided . /// is null. public ByteSequencesWriter(FileStream stream) - : this(new BinaryWriter(stream, DEFAULT_ENCODING, leaveOpen: false)) + : this(new BinaryWriter(stream, IOUtils.ENCODING_UTF_8_NO_BOM, leaveOpen: false)) { } @@ -614,7 +608,7 @@ public ByteSequencesWriter(FileStream stream) /// Constructs a to the provided . /// is null. public ByteSequencesWriter(FileStream stream, bool leaveOpen) - : this(new BinaryWriter(stream, DEFAULT_ENCODING, leaveOpen)) + : this(new BinaryWriter(stream, IOUtils.ENCODING_UTF_8_NO_BOM, leaveOpen)) { } @@ -638,7 +632,7 @@ public ByteSequencesWriter(FileInfo file) /// /// Constructs a to the provided . /// NOTE: To match Lucene, pass the 's constructor the - /// , which is UTF-8 without a byte order mark. + /// , which is UTF-8 without a byte order mark. /// /// is null. public ByteSequencesWriter(BinaryWriter writer) @@ -728,7 +722,7 @@ public class ByteSequencesReader : IDisposable /// Constructs a from the provided . /// is null. public ByteSequencesReader(FileStream stream) - : this(new BinaryReader(stream, DEFAULT_ENCODING, leaveOpen: false)) + : this(new BinaryReader(stream, IOUtils.ENCODING_UTF_8_NO_BOM, leaveOpen: false)) { } @@ -736,7 +730,7 @@ public ByteSequencesReader(FileStream stream) /// Constructs a from the provided . /// is null. public ByteSequencesReader(FileStream stream, bool leaveOpen) - : this(new BinaryReader(stream, DEFAULT_ENCODING, leaveOpen)) + : this(new BinaryReader(stream, IOUtils.ENCODING_UTF_8_NO_BOM, leaveOpen)) { } @@ -762,7 +756,7 @@ public ByteSequencesReader(FileInfo file) /// Constructs a from the provided . /// /// NOTE: To match Lucene, pass the 's constructor the - /// , which is UTF-8 without a byte order mark. + /// , which is UTF-8 without a byte order mark. /// /// is null. public ByteSequencesReader(BinaryReader reader) @@ -848,4 +842,4 @@ protected virtual void Dispose(bool disposing) // LUCENENET specific - implement /// Returns the comparer in use to sort entries public IComparer Comparer => comparer; } -} \ No newline at end of file +}