From 6868c38bf18d80f2696343905c7a5aced4f16a48 Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Sat, 20 Jan 2024 20:52:51 +0700 Subject: [PATCH] BUG: Lucene.Net.Analysis.Miscellaneous.TestStemmerOverrideFilter::TestRandomRealisticWhiteSpace(): Ported patch from https://github.com/apache/lucene/commit/bce10efeb40c11271cb398c37b859408818b8a00 to fix rare random test failures. Fixes #896. --- .../TestStemmerOverrideFilter.cs | 34 +++++++++++++++++-- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestStemmerOverrideFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestStemmerOverrideFilter.cs index 699d86fa82..fc5e47f825 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestStemmerOverrideFilter.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestStemmerOverrideFilter.cs @@ -4,6 +4,7 @@ using J2N.Text; using Lucene.Net.Analysis.Core; using Lucene.Net.Analysis.En; +using Lucene.Net.Analysis.Util; using Lucene.Net.Attributes; using Lucene.Net.Util; using NUnit.Framework; @@ -99,7 +100,14 @@ public virtual void TestNoOverrides() public virtual void TestRandomRealisticWhiteSpace() { IDictionary map = new Dictionary(); + // LUCENENET: Ported the patch from https://github.com/apache/lucene/commit/bce10efeb40c11271cb398c37b859408818b8a00 + // so we don't have random failures. + ISet seen = new HashSet(); int numTerms = AtLeast(50); + bool ignoreCase = Random.nextBoolean(); + + CharacterUtils charUtils = CharacterUtils.GetInstance(TEST_VERSION_CURRENT); + for (int i = 0; i < numTerms; i++) { string randomRealisticUnicodeString = TestUtil.RandomRealisticUnicodeString(Random); @@ -116,16 +124,36 @@ public virtual void TestRandomRealisticWhiteSpace() } if (sb.Length > 0) { - string value = TestUtil.RandomSimpleString(Random); - map[sb.ToString()] = value.Length == 0 ? "a" : value; + string inputValue = sb.ToString(); + // Make sure we don't try to add two inputs that vary only by case: + string seenInputValue; + if (ignoreCase) + { + // TODO: can we simply use inputValue.toLowerCase(Locale.ROOT)??? + char[] buffer = inputValue.ToCharArray(); + charUtils.ToLower(buffer, 0, buffer.Length); + seenInputValue = buffer.ToString(); + } + else + { + seenInputValue = inputValue; + } + + if (seen.Contains(seenInputValue) == false) + { + seen.Add(seenInputValue); + string value = TestUtil.RandomSimpleString(Random); + map[inputValue] = + value == string.Empty ? "a" : value; + } } } if (map.Count == 0) { map["booked"] = "books"; } - StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(Random.nextBoolean()); + StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase); IDictionary entrySet = map; StringBuilder input = new StringBuilder(); IList output = new JCG.List();