From 5b7d0ac1ab4e6763f9fe11a6cf2582843b2b5c8c Mon Sep 17 00:00:00 2001 From: Paul Irwin Date: Tue, 3 Dec 2024 09:23:15 -0700 Subject: [PATCH] IAttribute and ICharTermAttribute method changes, #1038 (#1049) * Remove SetLength and SetEmpty from ICharTermAttribute, add extension methods * Remove CopyTo from IAttribute, add Clear to ICharTermAttribute --- .../Analysis/Br/BrazilianStemFilter.cs | 5 +- .../Compound/CompoundWordTokenFilterBase.cs | 1 + .../Analysis/De/GermanStemFilter.cs | 5 +- .../Analysis/En/KStemFilter.cs | 9 +- .../Analysis/Fr/FrenchStemFilter.cs | 9 +- .../Analysis/Hunspell/HunspellStemFilter.cs | 13 +- .../Analysis/Miscellaneous/PatternAnalyzer.cs | 31 ++--- .../Analysis/Miscellaneous/TrimFilter.cs | 1 + .../Miscellaneous/TypeAsSynonymFilter.cs | 1 + .../NGram/Lucene43EdgeNGramTokenizer.cs | 3 +- .../Analysis/NGram/Lucene43NGramTokenizer.cs | 3 +- .../Analysis/NGram/NGramTokenFilter.cs | 8 +- .../Analysis/Nl/DutchStemFilter.cs | 13 +- .../Analysis/Pattern/PatternReplaceFilter.cs | 5 +- .../Analysis/Pattern/PatternTokenizer.cs | 9 +- .../Analysis/Shingle/ShingleFilter.cs | 1 + .../Analysis/Wikipedia/WikipediaTokenizer.cs | 3 +- .../Analysis/Icu/ICUNormalizer2Filter.cs | 3 +- .../Collation/ICUCollationKeyFilter.cs | 8 +- .../JapaneseBaseFormFilter.cs | 1 + .../JapaneseKatakanaStemFilter.cs | 2 +- .../JapaneseReadingFormFilter.cs | 1 + .../Morfologik/MorfologikFilter.cs | 1 + .../OpenNLPLemmatizerFilter.cs | 1 + .../BeiderMorseFilter.cs | 7 +- .../DoubleMetaphoneFilter.cs | 1 + .../PhoneticFilter.cs | 1 + .../SentenceTokenizer.cs | 1 + .../Stempel/StempelFilter.cs | 3 +- .../Directory/DirectoryTaxonomyWriter.cs | 5 +- .../Highlight/TokenGroup.cs | 1 + .../Highlight/TokenSources.cs | 25 ++-- .../TokenStreamFromTermPositionVector.cs | 1 + src/Lucene.Net.Memory/MemoryIndex.cs | 127 +++++++++--------- .../Analysis/BaseTokenStreamTestCase.cs | 9 +- .../Analysis/CannedTokenStream.cs | 4 +- .../Index/BaseTermVectorsFormatTestCase.cs | 3 +- .../Analysis/Core/TestStopFilter.cs | 3 +- .../Miscellaneous/TestKeywordMarkerFilter.cs | 3 +- .../TestRemoveDuplicatesTokenFilter.cs | 3 +- .../Analysis/Miscellaneous/TestTrimFilter.cs | 5 +- .../TestTypeAsSynonymFilterFactory.cs | 2 +- .../Analysis/Pattern/TestPatternTokenizer.cs | 5 +- .../Analysis/Position/PositionFilterTest.cs | 9 +- .../Analysis/Snowball/TestSnowball.cs | 5 +- .../Analysis/Synonym/TestSlowSynonymFilter.cs | 9 +- .../Highlight/HighlighterPhraseTest.cs | 1 + .../Highlight/HighlighterTest.cs | 18 +-- .../Highlight/TokenSourcesTest.cs | 1 + .../VectorHighlight/AbstractTestCase.cs | 1 + .../Index/Memory/MemoryIndexTest.cs | 6 +- .../Classic/TestMultiAnalyzer.cs | 5 +- .../Classic/TestMultiPhraseQueryParsing.cs | 2 +- .../Classic/TestQueryParser.cs | 3 +- .../Precedence/TestPrecedenceQueryParser.cs | 1 + .../Standard/TestMultiAnalyzerQPHelper.cs | 7 +- .../Flexible/Standard/TestQPHelper.cs | 1 + .../Util/QueryParserTestBase.cs | 1 + .../Analysis/TrivialLookaheadFilter.cs | 2 +- src/Lucene.Net.Tests/Analysis/TestToken.cs | 1 + .../TestCharTermAttributeImpl.cs | 1 + .../Analysis/TrivialLookaheadFilter.cs | 4 +- .../Index/TestDocumentWriter.cs | 1 + src/Lucene.Net.Tests/Index/TestIndexWriter.cs | 2 +- .../Index/TestSameTokenSamePosition.cs | 1 + .../Util/TestAttributeSource.cs | 1 + src/Lucene.Net.Tests/Util/TestQueryBuilder.cs | 1 + src/Lucene.Net/Analysis/Token.cs | 16 +-- .../TokenAttributes/CharTermAttribute.cs | 48 ++++--- .../TokenAttributes/CharTermAttributeImpl.cs | 47 ++----- .../Extensions/CharTermAttributeExtensions.cs | 75 +++++++++++ src/Lucene.Net/Util/Attribute.cs | 3 +- src/Lucene.Net/Util/AttributeImpl.cs | 15 ++- 73 files changed, 364 insertions(+), 270 deletions(-) create mode 100644 src/Lucene.Net/Support/Analysis/TokenAttributes/Extensions/CharTermAttributeExtensions.cs diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs index 30e8e39b93..790000bf37 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs @@ -1,5 +1,6 @@ // Lucene version compatibility level 4.8.1 using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using System; namespace Lucene.Net.Analysis.Br @@ -41,7 +42,7 @@ public sealed class BrazilianStemFilter : TokenFilter private readonly IKeywordAttribute keywordAttr; /// - /// Creates a new + /// Creates a new /// /// the source public BrazilianStemFilter(TokenStream @in) @@ -74,4 +75,4 @@ public override bool IncrementToken() } } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs index def2884f05..ee9c9d6526 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs @@ -1,6 +1,7 @@ // Lucene version compatibility level 4.8.1 using J2N.Text; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Analysis.Util; using Lucene.Net.Diagnostics; using Lucene.Net.Util; diff --git a/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemFilter.cs index 8223f9c510..e2fbba8fd0 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/De/GermanStemFilter.cs @@ -1,5 +1,6 @@ // Lucene version compatibility level 4.8.1 using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using System; namespace Lucene.Net.Analysis.De @@ -22,7 +23,7 @@ namespace Lucene.Net.Analysis.De */ /// - /// A that stems German words. + /// A that stems German words. /// /// It supports a table of words that should /// not be stemmed at all. The stemmer used can be changed at runtime after the @@ -93,4 +94,4 @@ public GermanStemmer Stemmer } } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemFilter.cs index 64cb187acb..86b70480a5 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemFilter.cs @@ -1,5 +1,6 @@ // Lucene version compatibility level 4.8.1 using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using System.IO; namespace Lucene.Net.Analysis.En @@ -30,13 +31,13 @@ namespace Lucene.Net.Analysis.En /// Conference on Research and Development in Information Retrieval, 191-203, 1993). /// /// All terms must already be lowercased for this filter to work correctly. - /// + /// /// /// Note: This filter is aware of the . To prevent /// certain terms from being passed to the stemmer /// should be set to true /// in a previous . - /// + /// /// Note: For including the original term as well as the stemmed version, see /// /// @@ -47,7 +48,7 @@ public sealed class KStemFilter : TokenFilter private readonly ICharTermAttribute termAttribute; private readonly IKeywordAttribute keywordAtt; - public KStemFilter(TokenStream @in) + public KStemFilter(TokenStream @in) : base(@in) { termAttribute = AddAttribute(); @@ -75,4 +76,4 @@ public override bool IncrementToken() return true; } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchStemFilter.cs index 55ae3f46b0..54fa1b0bca 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchStemFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Fr/FrenchStemFilter.cs @@ -1,5 +1,6 @@ // Lucene version compatibility level 4.8.1 using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using System; namespace Lucene.Net.Analysis.Fr @@ -22,7 +23,7 @@ namespace Lucene.Net.Analysis.Fr */ /// - /// A that stems french words. + /// A that stems french words. /// /// The used stemmer can be changed at runtime after the /// filter object is created (as long as it is a ). @@ -33,9 +34,9 @@ namespace Lucene.Net.Analysis.Fr /// the before this . /// /// - /// @deprecated (3.1) Use with + /// @deprecated (3.1) Use with /// instead, which has the - /// same functionality. This filter will be removed in Lucene 5.0 + /// same functionality. This filter will be removed in Lucene 5.0 [Obsolete("(3.1) Use SnowballFilter with FrenchStemmer instead, which has the same functionality. This filter will be removed in Lucene 5.0")] public sealed class FrenchStemFilter : TokenFilter { @@ -93,4 +94,4 @@ public FrenchStemmer Stemmer } } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs index 23cba01175..0c7579fc18 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs @@ -1,5 +1,6 @@ // Lucene version compatibility level 4.10.4 using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Util; using System.Collections.Generic; using JCG = J2N.Collections.Generic; @@ -24,20 +25,20 @@ namespace Lucene.Net.Analysis.Hunspell */ /// - /// that uses hunspell affix rules and words to stem tokens. - /// Since hunspell supports a word having multiple stems, this filter can emit + /// that uses hunspell affix rules and words to stem tokens. + /// Since hunspell supports a word having multiple stems, this filter can emit /// multiple tokens for each consumed token - /// + /// /// /// Note: This filter is aware of the . To prevent /// certain terms from being passed to the stemmer /// should be set to true /// in a previous . - /// + /// /// Note: For including the original term as well as the stemmed version, see /// /// - /// + /// /// @lucene.experimental /// public sealed class HunspellStemFilter : TokenFilter @@ -160,4 +161,4 @@ public override void Reset() } }); } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs index d19da155d9..8ae425af4e 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs @@ -1,6 +1,7 @@ // Lucene version compatibility level 4.8.1 using Lucene.Net.Analysis.Core; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Analysis.Util; using Lucene.Net.Support; using Lucene.Net.Util; @@ -38,26 +39,26 @@ namespace Lucene.Net.Analysis.Miscellaneous /// into a single efficient /// multi-purpose class. /// - /// If you are unsure how exactly a regular expression should look like, consider + /// If you are unsure how exactly a regular expression should look like, consider /// prototyping by simply trying various expressions on some test texts via - /// . Once you are satisfied, give that regex to - /// . Also see . Once you are satisfied, give that regex to + /// . Also see Regular Expression Tutorial. /// /// - /// This class can be considerably faster than the "normal" Lucene tokenizers. + /// This class can be considerably faster than the "normal" Lucene tokenizers. /// It can also serve as a building block in a compound Lucene - /// chain. For example as in this + /// chain. For example as in this /// stemming example: /// /// PatternAnalyzer pat = ... /// TokenStream tokenStream = new SnowballFilter( - /// pat.GetTokenStream("content", "James is running round in the woods"), + /// pat.GetTokenStream("content", "James is running round in the woods"), /// "English")); /// /// /// - /// @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead. + /// @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead. [Obsolete("(4.0) use the pattern-based analysis in the analysis/pattern package instead.")] public sealed class PatternAnalyzer : Analyzer { @@ -196,8 +197,8 @@ public PatternAnalyzer(LuceneVersion matchVersion, Regex pattern, bool toLowerCa /// a new token stream public TokenStreamComponents CreateComponents(string fieldName, TextReader reader, string text) { - // Ideally the Analyzer superclass should have a method with the same signature, - // with a default impl that simply delegates to the StringReader flavour. + // Ideally the Analyzer superclass should have a method with the same signature, + // with a default impl that simply delegates to the StringReader flavour. if (reader is null) { reader = new FastStringReader(text); @@ -448,9 +449,9 @@ public override void Reset() // LUCENENET: Since we need to "reset" the Match // object, we also need an "isReset" flag to indicate - // whether we are at the head of the match and to - // take the appropriate measures to ensure we don't - // overwrite our matcher variable with + // whether we are at the head of the match and to + // take the appropriate measures to ensure we don't + // overwrite our matcher variable with // matcher = matcher.NextMatch(); // before it is time. A string could potentially // match on index 0, so we need another variable to @@ -528,10 +529,10 @@ public override bool IncrementToken() { text = text.ToLower(); // LUCENENET: Since this class is obsolete, we aren't going to bother with passing culture in the constructor. } - // if (toLowerCase) { + // if (toLowerCase) { //// use next line once JDK 1.5 String.toLowerCase() performance regression is fixed //// see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6265809 - // text = s.substring(start, i).toLowerCase(); + // text = s.substring(start, i).toLowerCase(); //// char[] chars = new char[i-start]; //// for (int j=start; j < i; j++) chars[j-start] = Character.toLowerCase(s.charAt(j)); //// text = new String(chars); @@ -607,4 +608,4 @@ internal FastStringReader(string s) internal string String => s; } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilter.cs index b90e137f67..cef8a525f1 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TrimFilter.cs @@ -1,6 +1,7 @@ // Lucene version compatibility level 4.8.1 using System; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Util; namespace Lucene.Net.Analysis.Miscellaneous diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TypeAsSynonymFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TypeAsSynonymFilter.cs index b72ec02e0d..d9624a31b0 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TypeAsSynonymFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TypeAsSynonymFilter.cs @@ -1,6 +1,7 @@ // Lucene version compatibility level 8.2.0 // LUCENENET NOTE: Ported because Lucene.Net.Analysis.OpenNLP requires this to be useful. using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Util; #nullable enable diff --git a/src/Lucene.Net.Analysis.Common/Analysis/NGram/Lucene43EdgeNGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/NGram/Lucene43EdgeNGramTokenizer.cs index 11f4a5c936..aaf7aa90d2 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/NGram/Lucene43EdgeNGramTokenizer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/NGram/Lucene43EdgeNGramTokenizer.cs @@ -1,5 +1,6 @@ // Lucene version compatibility level 4.8.1 using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Util; using System; using System.IO; @@ -305,4 +306,4 @@ internal static bool IsDefined(this Lucene43EdgeNGramTokenizer.Side side) #pragma warning restore CS0612 // Type or member is obsolete } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Analysis.Common/Analysis/NGram/Lucene43NGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/NGram/Lucene43NGramTokenizer.cs index 2c10821d80..2dc21781ec 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/NGram/Lucene43NGramTokenizer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/NGram/Lucene43NGramTokenizer.cs @@ -1,5 +1,6 @@ // Lucene version compatibility level 4.8.1 using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using System; using System.IO; @@ -171,4 +172,4 @@ public override void Reset() pos = 0; } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenFilter.cs index e4749a4896..ec8824af5c 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/NGram/NGramTokenFilter.cs @@ -121,9 +121,6 @@ public int PositionIncrement get => 0; set => _ = value; } - - // LUCENENET specific - The interface requires this to be implemented, since we added it to avoid casts. - public void CopyTo(IAttribute target) => _ = target; } private sealed class PositionLengthAttributeAnonymousClass : IPositionLengthAttribute @@ -133,9 +130,6 @@ public int PositionLength get => 0; set => _ = value; } - - // LUCENENET specific - The interface requires this to be implemented, since we added it to avoid casts. - public void CopyTo(IAttribute target) => _ = target; } /// @@ -233,4 +227,4 @@ public override void Reset() curTermBuffer = null; } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemFilter.cs index 27404c0725..8435427cd6 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemFilter.cs @@ -1,5 +1,6 @@ // Lucene version compatibility level 4.8.1 using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Analysis.Util; using System; using System.Collections.Generic; @@ -24,7 +25,7 @@ namespace Lucene.Net.Analysis.Nl */ /// - /// A that stems Dutch words. + /// A that stems Dutch words. /// /// It supports a table of words that should /// not be stemmed at all. The stemmer used can be changed at runtime after the @@ -34,12 +35,12 @@ namespace Lucene.Net.Analysis.Nl /// To prevent terms from being stemmed use an instance of /// or a custom that sets /// the before this . - /// + /// /// /// - /// @deprecated (3.1) Use with + /// @deprecated (3.1) Use with /// instead, which has the - /// same functionality. This filter will be removed in Lucene 5.0 + /// same functionality. This filter will be removed in Lucene 5.0 [Obsolete("(3.1) Use Snowball.SnowballFilter with Tartarus.Snowball.Ext.DutchStemmer instead, which has the same functionality. This filter will be removed in Lucene 5.0")] public sealed class DutchStemFilter : TokenFilter { @@ -61,7 +62,7 @@ public DutchStemFilter(TokenStream @in) /// Input /// Dictionary of word stem pairs, that overrule the algorithm - public DutchStemFilter(TokenStream @in, IDictionary stemdictionary) + public DutchStemFilter(TokenStream @in, IDictionary stemdictionary) : this(@in) { stemmer.StemDictionary = stemdictionary; @@ -132,4 +133,4 @@ public CharArrayDictionary StemDictionary } } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceFilter.cs index 5327dbc24d..a139077d1f 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceFilter.cs @@ -1,5 +1,6 @@ // Lucene version compatibility level 4.8.1 using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using System.Text.RegularExpressions; namespace Lucene.Net.Analysis.Pattern @@ -24,7 +25,7 @@ namespace Lucene.Net.Analysis.Pattern /// /// A TokenFilter which applies a to each token in the stream, /// replacing match occurances with the specified replacement string. - /// + /// /// /// Note: Depending on the input and the pattern used and the input /// , this may produce s whose text is the empty @@ -73,4 +74,4 @@ public override bool IncrementToken() return true; } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternTokenizer.cs index 1f95a70642..764052e777 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternTokenizer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternTokenizer.cs @@ -1,5 +1,6 @@ // Lucene version compatibility level 4.8.1 using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using System; using System.IO; using System.Text; @@ -185,9 +186,9 @@ public override void Reset() // LUCENENET: Since we need to "reset" the Match // object, we also need an "isReset" flag to indicate - // whether we are at the head of the match and to - // take the appropriate measures to ensure we don't - // overwrite our matcher variable with + // whether we are at the head of the match and to + // take the appropriate measures to ensure we don't + // overwrite our matcher variable with // matcher = matcher.NextMatch(); // before it is time. A string could potentially // match on index 0, so we need another variable to @@ -211,4 +212,4 @@ private void FillBuffer(StringBuilder sb, TextReader input) } } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs index f36f9b8a74..7affe08642 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs @@ -1,5 +1,6 @@ // Lucene version compatibility level 4.8.1 using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Util; using Lucene.Net.Support; using System; diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs index a5e0b04458..82f7ac9186 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs @@ -1,5 +1,6 @@ // Lucene version compatibility level 4.8.1 using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Support; using Lucene.Net.Util; using System; @@ -347,4 +348,4 @@ public override void End() this.offsetAtt.SetOffset(finalOffset, finalOffset); } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUNormalizer2Filter.cs b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUNormalizer2Filter.cs index b081c51ccc..2d69ea6992 100644 --- a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUNormalizer2Filter.cs +++ b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUNormalizer2Filter.cs @@ -1,6 +1,7 @@ // Lucene version compatibility level 7.1.0 using ICU4N.Text; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Analysis.Util; using Lucene.Net.Support; using System; @@ -45,7 +46,7 @@ namespace Lucene.Net.Analysis.Icu /// "Μάϊος" and "ΜΆΪΟΣ" will match correctly. /// /// - /// The normalization will standardizes different forms of the same + /// The normalization will standardizes different forms of the same /// character in Unicode. For example, CJK full-width numbers will be standardized /// to their ASCII forms. /// diff --git a/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationKeyFilter.cs b/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationKeyFilter.cs index 527df4402c..d65d15d5ef 100644 --- a/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationKeyFilter.cs +++ b/src/Lucene.Net.Analysis.ICU/Collation/ICUCollationKeyFilter.cs @@ -33,7 +33,7 @@ namespace Lucene.Net.Collation /// /// WARNING: Make sure you use exactly the same at /// index and query time -- CollationKeys are only comparable when produced by - /// the same . s are + /// the same . s are /// independently versioned, so it is safe to search against stored /// s if the following are exactly the same (best practice is /// to store this information with the index and check that they remain the @@ -44,8 +44,8 @@ namespace Lucene.Net.Collation /// /// /// s generated by ICU Collators are not compatible with those - /// generated by java.text.Collators. Specifically, if you use - /// to generate index terms, do not use + /// generated by java.text.Collators. Specifically, if you use + /// to generate index terms, do not use /// CollationKeyAnalyzer on the query side, or vice versa. /// /// ICUCollationKeyAnalyzer is significantly faster and generates significantly @@ -89,7 +89,7 @@ public override bool IncrementToken() { termAtt.ResizeBuffer(encodedLength); } - termAtt.SetLength(encodedLength); + termAtt.Length = encodedLength; IndexableBinaryStringTools.Encode(reusableKey.Bytes, 0, reusableKey.Length, termAtt.Buffer, 0, encodedLength); return true; diff --git a/src/Lucene.Net.Analysis.Kuromoji/JapaneseBaseFormFilter.cs b/src/Lucene.Net.Analysis.Kuromoji/JapaneseBaseFormFilter.cs index 191de77ed5..fdf2ace7fa 100644 --- a/src/Lucene.Net.Analysis.Kuromoji/JapaneseBaseFormFilter.cs +++ b/src/Lucene.Net.Analysis.Kuromoji/JapaneseBaseFormFilter.cs @@ -1,5 +1,6 @@ using Lucene.Net.Analysis.Ja.TokenAttributes; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; namespace Lucene.Net.Analysis.Ja { diff --git a/src/Lucene.Net.Analysis.Kuromoji/JapaneseKatakanaStemFilter.cs b/src/Lucene.Net.Analysis.Kuromoji/JapaneseKatakanaStemFilter.cs index b250fdbfef..54ad4b2393 100644 --- a/src/Lucene.Net.Analysis.Kuromoji/JapaneseKatakanaStemFilter.cs +++ b/src/Lucene.Net.Analysis.Kuromoji/JapaneseKatakanaStemFilter.cs @@ -65,7 +65,7 @@ public override bool IncrementToken() { if (!keywordAttr.IsKeyword) { - termAttr.SetLength(Stem(termAttr.Buffer, termAttr.Length)); + termAttr.Length = Stem(termAttr.Buffer, termAttr.Length); } return true; } diff --git a/src/Lucene.Net.Analysis.Kuromoji/JapaneseReadingFormFilter.cs b/src/Lucene.Net.Analysis.Kuromoji/JapaneseReadingFormFilter.cs index 57ee016d9e..6d4751757f 100644 --- a/src/Lucene.Net.Analysis.Kuromoji/JapaneseReadingFormFilter.cs +++ b/src/Lucene.Net.Analysis.Kuromoji/JapaneseReadingFormFilter.cs @@ -1,6 +1,7 @@ using Lucene.Net.Analysis.Ja.TokenAttributes; using Lucene.Net.Analysis.Ja.Util; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using System.Text; namespace Lucene.Net.Analysis.Ja diff --git a/src/Lucene.Net.Analysis.Morfologik/Morfologik/MorfologikFilter.cs b/src/Lucene.Net.Analysis.Morfologik/Morfologik/MorfologikFilter.cs index 01d6d92776..fb38d78f32 100644 --- a/src/Lucene.Net.Analysis.Morfologik/Morfologik/MorfologikFilter.cs +++ b/src/Lucene.Net.Analysis.Morfologik/Morfologik/MorfologikFilter.cs @@ -2,6 +2,7 @@ using J2N; using Lucene.Net.Analysis.Morfologik.TokenAttributes; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Support; using Lucene.Net.Util; using Morfologik.Stemming; diff --git a/src/Lucene.Net.Analysis.OpenNLP/OpenNLPLemmatizerFilter.cs b/src/Lucene.Net.Analysis.OpenNLP/OpenNLPLemmatizerFilter.cs index 42593286a4..2b03cd420d 100644 --- a/src/Lucene.Net.Analysis.OpenNLP/OpenNLPLemmatizerFilter.cs +++ b/src/Lucene.Net.Analysis.OpenNLP/OpenNLPLemmatizerFilter.cs @@ -1,6 +1,7 @@ // Lucene version compatibility level 8.2.0 using Lucene.Net.Analysis.OpenNlp.Tools; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Util; using System.Collections.Generic; using JCG = J2N.Collections.Generic; diff --git a/src/Lucene.Net.Analysis.Phonetic/BeiderMorseFilter.cs b/src/Lucene.Net.Analysis.Phonetic/BeiderMorseFilter.cs index 57e04dffee..4c55a1a580 100644 --- a/src/Lucene.Net.Analysis.Phonetic/BeiderMorseFilter.cs +++ b/src/Lucene.Net.Analysis.Phonetic/BeiderMorseFilter.cs @@ -1,6 +1,7 @@ // lucene version compatibility level: 4.8.1 using Lucene.Net.Analysis.Phonetic.Language.Bm; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Diagnostics; using System.Text.RegularExpressions; @@ -123,9 +124,9 @@ public override void Reset() // LUCENENET: Since we need to "reset" the Match // object, we also need an "isReset" flag to indicate - // whether we are at the head of the match and to - // take the appropriate measures to ensure we don't - // overwrite our matcher variable with + // whether we are at the head of the match and to + // take the appropriate measures to ensure we don't + // overwrite our matcher variable with // matcher = matcher.NextMatch(); // before it is time. A string could potentially // match on index 0, so we need another variable to diff --git a/src/Lucene.Net.Analysis.Phonetic/DoubleMetaphoneFilter.cs b/src/Lucene.Net.Analysis.Phonetic/DoubleMetaphoneFilter.cs index bd1d89d34d..7a11fb21c4 100644 --- a/src/Lucene.Net.Analysis.Phonetic/DoubleMetaphoneFilter.cs +++ b/src/Lucene.Net.Analysis.Phonetic/DoubleMetaphoneFilter.cs @@ -1,6 +1,7 @@ // lucene version compatibility level: 4.8.1 using Lucene.Net.Analysis.Phonetic.Language; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Support; using System; using System.Collections.Generic; diff --git a/src/Lucene.Net.Analysis.Phonetic/PhoneticFilter.cs b/src/Lucene.Net.Analysis.Phonetic/PhoneticFilter.cs index c4a27e4200..9263ca84b3 100644 --- a/src/Lucene.Net.Analysis.Phonetic/PhoneticFilter.cs +++ b/src/Lucene.Net.Analysis.Phonetic/PhoneticFilter.cs @@ -1,6 +1,7 @@ // lucene version compatibility level: 4.8.1 using Lucene.Net.Analysis.Phonetic.Language; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using System; namespace Lucene.Net.Analysis.Phonetic diff --git a/src/Lucene.Net.Analysis.SmartCn/SentenceTokenizer.cs b/src/Lucene.Net.Analysis.SmartCn/SentenceTokenizer.cs index 791cd1dbba..3308907972 100644 --- a/src/Lucene.Net.Analysis.SmartCn/SentenceTokenizer.cs +++ b/src/Lucene.Net.Analysis.SmartCn/SentenceTokenizer.cs @@ -1,5 +1,6 @@ // lucene version compatibility level: 4.8.1 using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using System; using System.IO; using System.Text; diff --git a/src/Lucene.Net.Analysis.Stempel/Stempel/StempelFilter.cs b/src/Lucene.Net.Analysis.Stempel/Stempel/StempelFilter.cs index 8ad4f2655c..8e3ec4dbc0 100644 --- a/src/Lucene.Net.Analysis.Stempel/Stempel/StempelFilter.cs +++ b/src/Lucene.Net.Analysis.Stempel/Stempel/StempelFilter.cs @@ -1,4 +1,5 @@ using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using System.Text; namespace Lucene.Net.Analysis.Stempel @@ -56,7 +57,7 @@ public StempelFilter(TokenStream @in, StempelStemmer stemmer) /// /// input token stream /// stemmer - /// For performance reasons words shorter than minLength + /// For performance reasons words shorter than minLength /// characters are not processed, but simply returned. public StempelFilter(TokenStream @in, StempelStemmer stemmer, int minLength) : base(@in) diff --git a/src/Lucene.Net.Facet/Taxonomy/Directory/DirectoryTaxonomyWriter.cs b/src/Lucene.Net.Facet/Taxonomy/Directory/DirectoryTaxonomyWriter.cs index 12b76dea12..857a30b96f 100644 --- a/src/Lucene.Net.Facet/Taxonomy/Directory/DirectoryTaxonomyWriter.cs +++ b/src/Lucene.Net.Facet/Taxonomy/Directory/DirectoryTaxonomyWriter.cs @@ -1,6 +1,7 @@ // Lucene version compatibility level 4.8.1 using J2N.Threading.Atomic; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Diagnostics; using Lucene.Net.Index; using Lucene.Net.Index.Extensions; @@ -387,7 +388,7 @@ public void Dispose() /// /// A hook for extending classes to close additional resources that were used. - /// The default implementation closes the as well as the + /// The default implementation closes the as well as the /// instances that were used. /// /// NOTE: if you override this method, you should include a @@ -419,7 +420,7 @@ private void DoClose() } /// - /// Closes the as well as the + /// Closes the as well as the /// instances that were used. /// private void CloseResources() // LUCENENET: Made private, since this has the same purpose as Dispose(bool). diff --git a/src/Lucene.Net.Highlighter/Highlight/TokenGroup.cs b/src/Lucene.Net.Highlighter/Highlight/TokenGroup.cs index 21d276088a..aa3aa82d92 100644 --- a/src/Lucene.Net.Highlighter/Highlight/TokenGroup.cs +++ b/src/Lucene.Net.Highlighter/Highlight/TokenGroup.cs @@ -1,6 +1,7 @@ using System; using Lucene.Net.Analysis; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; namespace Lucene.Net.Search.Highlight { diff --git a/src/Lucene.Net.Highlighter/Highlight/TokenSources.cs b/src/Lucene.Net.Highlighter/Highlight/TokenSources.cs index d09460abe7..24c2154690 100644 --- a/src/Lucene.Net.Highlighter/Highlight/TokenSources.cs +++ b/src/Lucene.Net.Highlighter/Highlight/TokenSources.cs @@ -1,5 +1,6 @@ using Lucene.Net.Analysis; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Documents; using Lucene.Net.Index; using Lucene.Net.Util; @@ -28,7 +29,7 @@ namespace Lucene.Net.Search.Highlight */ /// - /// Hides implementation issues associated with obtaining a for use with + /// Hides implementation issues associated with obtaining a for use with /// the - can obtain from /// term vectors with offsets and positions or from an Analyzer re-parsing the stored content. /// see TokenStreamFromTermVector @@ -61,7 +62,7 @@ internal sealed class StoredTokenStream : TokenStream internal ICharTermAttribute termAtt; internal IOffsetAttribute offsetAtt; internal IPositionIncrementAttribute posincAtt; - internal IPayloadAttribute payloadAtt; + internal IPayloadAttribute payloadAtt; internal StoredTokenStream(Token[] tokens) { @@ -87,9 +88,9 @@ public override bool IncrementToken() { payloadAtt.Payload = payload; } - posincAtt.PositionIncrement = - (currentToken <= 1 || - tokens[currentToken - 1].StartOffset > tokens[currentToken - 2].StartOffset + posincAtt.PositionIncrement = + (currentToken <= 1 || + tokens[currentToken - 1].StartOffset > tokens[currentToken - 2].StartOffset ? 1 : 0); return true; } @@ -126,7 +127,7 @@ public static TokenStream GetAnyTokenStream(IndexReader reader, int docId, /// /// A convenience method that tries a number of approaches to getting a token stream. - /// The cost of finding there are no termVectors in the index is minimal (1000 invocations still + /// The cost of finding there are no termVectors in the index is minimal (1000 invocations still /// registers 0 ms). So this "lazy" (flexible?) approach to coding is probably acceptable /// /// null if field not stored correctly @@ -161,7 +162,7 @@ public static TokenStream GetTokenStream(Terms vector) /// In my tests the speeds to recreate 1000 token streams using this method are: /// /// - /// with TermVector offset only data stored - 420 milliseconds + /// with TermVector offset only data stored - 420 milliseconds /// /// /// with TermVector offset AND position data stored - 271 milliseconds @@ -174,7 +175,7 @@ public static TokenStream GetTokenStream(Terms vector) /// - reanalyzing the original content - 980 milliseconds /// /// - /// + /// /// The re-analyze timings will typically vary depending on - /// /// @@ -182,7 +183,7 @@ public static TokenStream GetTokenStream(Terms vector) /// stemmer/lowercaser/stopword combo) /// /// - /// The number of other fields (Lucene reads ALL fields off the disk + /// The number of other fields (Lucene reads ALL fields off the disk /// when accessing just one document field - can cost dear!) /// /// @@ -290,7 +291,7 @@ public static TokenStream GetTokenStream(Terms tpv, /// /// Returns a with positions and offsets constructed from /// field termvectors. If the field has no termvectors or offsets - /// are not included in the termvector, return null. See + /// are not included in the termvector, return null. See /// /// for an explanation of what happens when positions aren't present. /// @@ -299,7 +300,7 @@ public static TokenStream GetTokenStream(Terms tpv, /// the field to retrieve term vectors for /// a , or null if offsets are not available /// If there is a low-level I/O error - public static TokenStream GetTokenStreamWithOffsets(IndexReader reader, int docId, string field) + public static TokenStream GetTokenStreamWithOffsets(IndexReader reader, int docId, string field) { Fields vectors = reader.GetTermVectors(docId); if (vectors is null) { @@ -314,7 +315,7 @@ public static TokenStream GetTokenStreamWithOffsets(IndexReader reader, int docI if (!vector.HasPositions || !vector.HasOffsets) { return null; } - + return GetTokenStream(vector); } diff --git a/src/Lucene.Net.Highlighter/Highlight/TokenStreamFromTermPositionVector.cs b/src/Lucene.Net.Highlighter/Highlight/TokenStreamFromTermPositionVector.cs index cf7b1873f3..50f1551684 100644 --- a/src/Lucene.Net.Highlighter/Highlight/TokenStreamFromTermPositionVector.cs +++ b/src/Lucene.Net.Highlighter/Highlight/TokenStreamFromTermPositionVector.cs @@ -1,5 +1,6 @@ using Lucene.Net.Analysis; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Index; using Lucene.Net.Util; using System.Collections.Generic; diff --git a/src/Lucene.Net.Memory/MemoryIndex.cs b/src/Lucene.Net.Memory/MemoryIndex.cs index d595ba679f..b72da0eb9c 100644 --- a/src/Lucene.Net.Memory/MemoryIndex.cs +++ b/src/Lucene.Net.Memory/MemoryIndex.cs @@ -1,5 +1,6 @@ using Lucene.Net.Analysis; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Diagnostics; using Lucene.Net.Search; using Lucene.Net.Util; @@ -29,59 +30,59 @@ namespace Lucene.Net.Index.Memory */ /// - /// High-performance single-document main memory Apache Lucene fulltext search index. - /// + /// High-performance single-document main memory Apache Lucene fulltext search index. + /// ///

Overview

- /// + /// /// This class is a replacement/substitute for a large subset of /// functionality. It is designed to - /// enable maximum efficiency for on-the-fly matchmaking combining structured and - /// fuzzy fulltext search in realtime streaming applications such as Nux XQuery based XML - /// message queues, publish-subscribe systems for Blogs/newsfeeds, text chat, data acquisition and - /// distribution systems, application level routers, firewalls, classifiers, etc. - /// Rather than targeting fulltext search of infrequent queries over huge persistent - /// data archives (historic search), this class targets fulltext search of huge - /// numbers of queries over comparatively small transient realtime data (prospective - /// search). - /// For example as in + /// enable maximum efficiency for on-the-fly matchmaking combining structured and + /// fuzzy fulltext search in realtime streaming applications such as Nux XQuery based XML + /// message queues, publish-subscribe systems for Blogs/newsfeeds, text chat, data acquisition and + /// distribution systems, application level routers, firewalls, classifiers, etc. + /// Rather than targeting fulltext search of infrequent queries over huge persistent + /// data archives (historic search), this class targets fulltext search of huge + /// numbers of queries over comparatively small transient realtime data (prospective + /// search). + /// For example as in /// /// float score = Search(string text, Query query) /// /// /// Each instance can hold at most one Lucene "document", with a document containing /// zero or more "fields", each field having a name and a fulltext value. The - /// fulltext value is tokenized (split and transformed) into zero or more index terms + /// fulltext value is tokenized (split and transformed) into zero or more index terms /// (aka words) on AddField(), according to the policy implemented by an /// Analyzer. For example, Lucene analyzers can split on whitespace, normalize to lower case /// for case insensitivity, ignore common terms with little discriminatory value such as "he", "in", "and" (stop /// words), reduce the terms to their natural linguistic root form such as "fishing" - /// being reduced to "fish" (stemming), resolve synonyms/inflexions/thesauri + /// being reduced to "fish" (stemming), resolve synonyms/inflexions/thesauri /// (upon indexing and/or querying), etc. For details, see /// Lucene Analyzer Intro. /// /// - /// Arbitrary Lucene queries can be run against this class - see /// Lucene Query Syntax - /// as well as Query Parser Rules. - /// Note that a Lucene query selects on the field names and associated (indexed) - /// tokenized terms, not on the original fulltext(s) - the latter are not stored + /// Note that a Lucene query selects on the field names and associated (indexed) + /// tokenized terms, not on the original fulltext(s) - the latter are not stored /// but rather thrown away immediately after tokenization. /// /// /// For some interesting background information on search technology, see Bob Wyman's - /// Prospective Search, + /// Prospective Search, /// Jim Gray's /// /// A Call to Arms - Custom subscriptions, and Tim Bray's - /// On Search, the Series. - /// - /// - ///

Example Usage

- /// + /// + /// + ///

Example Usage

+ /// /// /// Analyzer analyzer = new SimpleAnalyzer(version); /// MemoryIndex index = new MemoryIndex(); @@ -96,52 +97,52 @@ namespace Lucene.Net.Index.Memory /// } /// Console.WriteLine("indexData=" + index.toString()); /// - /// - /// - ///

Example XQuery Usage

- /// + /// + /// + ///

Example XQuery Usage

+ /// /// /// (: An XQuery that finds all books authored by James that have something to do with "salmon fishing manuals", sorted by relevance :) /// declare namespace lucene = "java:nux.xom.pool.FullTextUtil"; /// declare variable $query := "+salmon~ +fish* manual~"; (: any arbitrary Lucene query can go here :) - /// + /// /// for $book in /books/book[author="James" and lucene:match(abstract, $query) > 0.0] /// let $score := lucene:match($book/abstract, $query) /// order by $score descending /// return $book /// - /// - /// + /// + /// ///

No thread safety guarantees

- /// + /// /// An instance can be queried multiple times with the same or different queries, /// but an instance is not thread-safe. If desired use idioms such as: /// /// MemoryIndex index = ... /// lock (index) { /// // read and/or write index (i.e. add fields and/or query) - /// } + /// } /// - /// - /// + /// + /// ///

Performance Notes

- /// - /// Internally there's a new data structure geared towards efficient indexing - /// and searching, plus the necessary support code to seamlessly plug into the Lucene + /// + /// Internally there's a new data structure geared towards efficient indexing + /// and searching, plus the necessary support code to seamlessly plug into the Lucene /// framework. ///
/// - /// This class performs very well for very small texts (e.g. 10 chars) - /// as well as for large texts (e.g. 10 MB) and everything in between. + /// This class performs very well for very small texts (e.g. 10 chars) + /// as well as for large texts (e.g. 10 MB) and everything in between. /// Typically, it is about 10-100 times faster than . - /// Note that has particularly + /// Note that has particularly /// large efficiency overheads for small to medium sized texts, both in time and space. - /// Indexing a field with N tokens takes O(N) in the best case, and O(N logN) in the worst + /// Indexing a field with N tokens takes O(N) in the best case, and O(N logN) in the worst /// case. Memory consumption is probably larger than for . /// /// - /// Example throughput of many simple term queries over a single MemoryIndex: - /// ~500000 queries/sec on a MacBook Pro, jdk 1.5.0_06, server VM. + /// Example throughput of many simple term queries over a single MemoryIndex: + /// ~500000 queries/sec on a MacBook Pro, jdk 1.5.0_06, server VM. /// As always, your mileage may vary. /// /// @@ -152,7 +153,7 @@ namespace Lucene.Net.Index.Memory /// target="_blank" /// href="http://java.sun.com/developer/technicalArticles/Programming/HPROF.html"> /// hprof tracing ). - /// + /// /// ///
public partial class MemoryIndex @@ -349,7 +350,7 @@ public virtual void AddField(string fieldName, TokenStream stream) /// Iterates over the given token stream and adds the resulting terms to the index; /// Equivalent to adding a tokenized, indexed, termVectorStored, unstored, /// Lucene . - /// Finally closes the token stream. Note that untokenized keywords can be added with this method via + /// Finally closes the token stream. Note that untokenized keywords can be added with this method via /// )"/>, the Lucene KeywordTokenizer or similar utilities. ///
/// a name to be associated with the text @@ -372,7 +373,7 @@ public virtual void AddField(string fieldName, TokenStream stream, float boost) /// a name to be associated with the text /// the token stream to retrieve tokens from. /// the boost factor for hits for this field - /// + /// /// the position increment gap if fields with the same name are added more than once /// /// @@ -385,9 +386,9 @@ public virtual void AddField(string fieldName, TokenStream stream, float boost, /// Iterates over the given token stream and adds the resulting terms to the index; /// Equivalent to adding a tokenized, indexed, termVectorStored, unstored, /// Lucene . - /// Finally closes the token stream. Note that untokenized keywords can be added with this method via + /// Finally closes the token stream. Note that untokenized keywords can be added with this method via /// )"/>, the Lucene KeywordTokenizer or similar utilities. - /// + /// ///
/// a name to be associated with the text /// the token stream to retrieve tokens from. @@ -437,15 +438,15 @@ public virtual void AddField(string fieldName, TokenStream stream, float boost, if (!fieldInfos.ContainsKey(fieldName)) { - fieldInfos[fieldName] = new FieldInfo(fieldName, - true, - fieldInfos.Count, - false, - false, - false, - this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, - DocValuesType.NONE, - DocValuesType.NONE, + fieldInfos[fieldName] = new FieldInfo(fieldName, + true, + fieldInfos.Count, + false, + false, + false, + this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, + DocValuesType.NONE, + DocValuesType.NONE, null); } ITermToBytesRefAttribute termAtt = stream.GetAttribute(); @@ -568,11 +569,11 @@ public virtual float Search(Query query) * unnecessary baggage and locking in the Lucene IndexReader * superclass, all of which is completely unnecessary for this main * memory index data structure without thread-safety claims. - * + * * Wishing IndexReader would be an interface... - * + * * Actually with the new tight createSearcher() API auto-closing is now - * made impossible, hence searcher.close() would be harmless and also + * made impossible, hence searcher.close() would be harmless and also * would not degrade performance... */ } @@ -781,4 +782,4 @@ public override int[] Clear() } } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.TestFramework/Analysis/BaseTokenStreamTestCase.cs b/src/Lucene.Net.TestFramework/Analysis/BaseTokenStreamTestCase.cs index 5e331ab5a7..6ea882ec7a 100644 --- a/src/Lucene.Net.TestFramework/Analysis/BaseTokenStreamTestCase.cs +++ b/src/Lucene.Net.TestFramework/Analysis/BaseTokenStreamTestCase.cs @@ -1,6 +1,7 @@ using J2N.Collections.Generic.Extensions; using J2N.Threading; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Documents; using Lucene.Net.Index; using Lucene.Net.Support; @@ -118,7 +119,7 @@ public abstract class BaseTokenStreamTestCase : LuceneTestCase // lastStartOffset) public static void AssertTokenStreamContents(TokenStream ts, string[] output, int[] startOffsets, int[] endOffsets, string[] types, int[] posIncrements, int[] posLengths, int? finalOffset, int? finalPosInc, bool[] keywordAtts, bool offsetsAreCorrect, byte[][] payloads) { - // LUCENENET: Bug fix: NUnit throws an exception when something fails. + // LUCENENET: Bug fix: NUnit throws an exception when something fails. // This causes Dispose() to be skipped and it pollutes other tests indicating false negatives. // Added this try-finally block to fix this. try @@ -631,7 +632,7 @@ internal class AnalysisThread : ThreadJob public bool Failed { get; set; } public Exception FirstException { get; set; } = null; - internal AnalysisThread(long seed, CountdownEvent latch, Analyzer a, int iterations, int maxWordLength, + internal AnalysisThread(long seed, CountdownEvent latch, Analyzer a, int iterations, int maxWordLength, bool useCharFilter, bool simple, bool offsetsAreCorrect, RandomIndexWriter iw) { this.seed = seed; @@ -689,7 +690,7 @@ public static void CheckRandomData(Random random, Analyzer a, int iterations, in RandomIndexWriter iw = null; string postingsFormat = TestUtil.GetPostingsFormat("dummy"); bool codecOk = iterations * maxWordLength < 100000 - || !(postingsFormat.Equals("Memory", StringComparison.Ordinal) + || !(postingsFormat.Equals("Memory", StringComparison.Ordinal) || postingsFormat.Equals("SimpleText", StringComparison.Ordinal)); if (Rarely(random) && codecOk) { @@ -1253,4 +1254,4 @@ public static AttributeFactory NewAttributeFactory() // *********** End From Lucene 8.2.0 ************** } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.TestFramework/Analysis/CannedTokenStream.cs b/src/Lucene.Net.TestFramework/Analysis/CannedTokenStream.cs index 824eaec95d..815036eca3 100644 --- a/src/Lucene.Net.TestFramework/Analysis/CannedTokenStream.cs +++ b/src/Lucene.Net.TestFramework/Analysis/CannedTokenStream.cs @@ -72,7 +72,7 @@ public override bool IncrementToken() // TODO: can we just capture/restoreState so // we get all attrs...? ClearAttributes(); - termAtt.SetEmpty(); + termAtt.Clear(); termAtt.Append(token.ToString()); posIncrAtt.PositionIncrement = token.PositionIncrement; posLengthAtt.PositionLength = token.PositionLength; @@ -92,4 +92,4 @@ public override bool IncrementToken() } } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.TestFramework/Index/BaseTermVectorsFormatTestCase.cs b/src/Lucene.Net.TestFramework/Index/BaseTermVectorsFormatTestCase.cs index 4a28cb3aab..4e99b00efa 100644 --- a/src/Lucene.Net.TestFramework/Index/BaseTermVectorsFormatTestCase.cs +++ b/src/Lucene.Net.TestFramework/Index/BaseTermVectorsFormatTestCase.cs @@ -2,6 +2,7 @@ using J2N.Threading.Atomic; using Lucene.Net.Analysis; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Codecs; using Lucene.Net.Documents; using Lucene.Net.Search; @@ -913,4 +914,4 @@ public override void Run() } } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStopFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStopFilter.cs index 409f5d7725..c010241176 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStopFilter.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStopFilter.cs @@ -1,5 +1,6 @@ // Lucene version compatibility level 4.8.1 using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Analysis.Util; using Lucene.Net.Util; using NUnit.Framework; @@ -224,4 +225,4 @@ public virtual void TestFirstPosInc() AssertAnalyzesTo(analyzer, "the quick brown fox", new string[] { "hte", "quick", "brown", "fox" }, new int[] { 1, 1, 1, 1 }); } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestKeywordMarkerFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestKeywordMarkerFilter.cs index 30ec85377b..a47878a612 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestKeywordMarkerFilter.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestKeywordMarkerFilter.cs @@ -1,5 +1,6 @@ // Lucene version compatibility level 4.8.1 using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Analysis.Util; using NUnit.Framework; using System.Globalization; @@ -100,4 +101,4 @@ public override bool IncrementToken() } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestRemoveDuplicatesTokenFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestRemoveDuplicatesTokenFilter.cs index aa138a95f9..17886b38a7 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestRemoveDuplicatesTokenFilter.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestRemoveDuplicatesTokenFilter.cs @@ -3,6 +3,7 @@ using Lucene.Net.Analysis.Core; using Lucene.Net.Analysis.Synonym; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Util; using NUnit.Framework; using System; @@ -168,4 +169,4 @@ public virtual void TestEmptyTerm() CheckOneTerm(a, "", ""); } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestTrimFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestTrimFilter.cs index c04d28d9ff..741a44f624 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestTrimFilter.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestTrimFilter.cs @@ -2,6 +2,7 @@ using J2N.Collections.Generic.Extensions; using Lucene.Net.Analysis.Core; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Util; using NUnit.Framework; using System; @@ -54,7 +55,7 @@ public virtual void TestTrim() AssertTokenStreamContents(ts, new string[] { "a", "b", "c", "" }, new int[] { 1, 0, 1, 3 }, new int[] { 2, 1, 2, 3 }, null, new int[] { 1, 1, 1, 1 }, null, null, false); } - /// @deprecated (3.0) does not support custom attributes + /// @deprecated (3.0) does not support custom attributes [Obsolete("(3.0) does not support custom attributes")] private sealed class IterTokenStream : TokenStream { @@ -139,4 +140,4 @@ public virtual void TestEmptyTerm() } } #pragma warning restore 612, 618 -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestTypeAsSynonymFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestTypeAsSynonymFilterFactory.cs index 8f363f4f9b..995c0ba5cb 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestTypeAsSynonymFilterFactory.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Miscellaneous/TestTypeAsSynonymFilterFactory.cs @@ -47,7 +47,7 @@ public void TestPrefix() private static Token token(string term, string type) { Token token = new Token(); - token.SetEmpty(); + token.Clear(); token.Append(term); token.Type = type; return token; diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Pattern/TestPatternTokenizer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Pattern/TestPatternTokenizer.cs index ed4391dc8b..f7a0a939d6 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Pattern/TestPatternTokenizer.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Pattern/TestPatternTokenizer.cs @@ -1,6 +1,7 @@ // Lucene version compatibility level 4.8.1 using Lucene.Net.Analysis.CharFilters; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using NUnit.Framework; using System.Collections.Generic; using System.Globalization; @@ -60,7 +61,7 @@ public virtual void TestSplitting() String[] split = test[2].split( test[1] ); stream = tokenizer.create( new StringReader( test[2] ) ); int i=0; - for( Token t = stream.next(); null != t; t = stream.next() ) + for( Token t = stream.next(); null != t; t = stream.next() ) { assertEquals( "split: "+test[1] + " "+i, split[i++], new String(t.termBuffer(), 0, t.termLength()) ); } @@ -137,4 +138,4 @@ public virtual void TestRandomStrings() CheckRandomData(Random, b, 1000 * RandomMultiplier); } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Position/PositionFilterTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Position/PositionFilterTest.cs index 38c2b256f1..b7835aa8e4 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Position/PositionFilterTest.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Position/PositionFilterTest.cs @@ -1,7 +1,9 @@ // Lucene version compatibility level 4.8.1 using Lucene.Net.Analysis.Shingle; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using NUnit.Framework; +using System.IO; namespace Lucene.Net.Analysis.Position { @@ -104,12 +106,11 @@ public virtual void TestReset() /// /// Tests ShingleFilter up to six shingles against six terms. - /// Tests PositionFilter setting all but the first positionIncrement to zero. - /// + /// Tests PositionFilter setting all but the first positionIncrement to zero. + ///
[Test] public virtual void Test6GramFilterNoPositions() { - ShingleFilter filter = new ShingleFilter(new TestTokenStream(this, TEST_TOKEN), 6); AssertTokenStreamContents #pragma warning disable 612, 618 @@ -118,4 +119,4 @@ public virtual void Test6GramFilterNoPositions() SIX_GRAM_NO_POSITIONS_TOKENS, SIX_GRAM_NO_POSITIONS_INCREMENTS); } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Snowball/TestSnowball.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Snowball/TestSnowball.cs index ebcb724605..cb7fc91595 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Snowball/TestSnowball.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Snowball/TestSnowball.cs @@ -2,6 +2,7 @@ using Lucene.Net.Analysis.Core; using Lucene.Net.Analysis.Standard; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Attributes; using Lucene.Net.Tartarus.Snowball.Ext; using Lucene.Net.Util; @@ -74,7 +75,7 @@ public virtual void TestTurkish() /// /// Test turkish lowercasing (old buggy behavior) - /// @deprecated (3.1) Remove this when support for 3.0 indexes is no longer required (5.0) + /// @deprecated (3.1) Remove this when support for 3.0 indexes is no longer required (5.0) [Test] [Obsolete("(3.1) Remove this when support for 3.0 indexes is no longer required (5.0)")] public virtual void TestTurkishBWComp() @@ -197,4 +198,4 @@ public virtual void CheckRandomStrings(string snowballLanguage) } } #pragma warning restore 612, 618 -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSlowSynonymFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSlowSynonymFilter.cs index fc3b52de88..dbe538f859 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSlowSynonymFilter.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSlowSynonymFilter.cs @@ -2,6 +2,7 @@ using J2N.Collections.Generic.Extensions; using J2N.Text; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using NUnit.Framework; using System; using System.Collections.Generic; @@ -32,7 +33,7 @@ namespace Lucene.Net.Analysis.Synonym //using org.apache.lucene.analysis.tokenattributes; - /// @deprecated Remove this test in Lucene 5.0 + /// @deprecated Remove this test in Lucene 5.0 [Obsolete("Remove this test in Lucene 5.0")] public class TestSlowSynonymFilter : BaseTokenStreamTestCase { @@ -274,7 +275,7 @@ public virtual void TestOffsetBug() /// a/b => tokens a and b share the same spot (b.positionIncrement=0) /// a,3/b/c => a,b,c all share same position (a.positionIncrement=3, b.positionIncrement=0, c.positionIncrement=0) /// a,1,10,11 => "a" with positionIncrement=1, startOffset=10, endOffset=11 - /// @deprecated (3.0) does not support attributes api + /// @deprecated (3.0) does not support attributes api [Obsolete("(3.0) does not support attributes api")] private IList Tokens(string str) { @@ -330,7 +331,7 @@ private IList Tokens(string str) return result; } - /// @deprecated (3.0) does not support custom attributes + /// @deprecated (3.0) does not support custom attributes [Obsolete("(3.0) does not support custom attributes")] private sealed class IterTokenStream : TokenStream { @@ -379,4 +380,4 @@ public override sealed bool IncrementToken() } } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Tests.Highlighter/Highlight/HighlighterPhraseTest.cs b/src/Lucene.Net.Tests.Highlighter/Highlight/HighlighterPhraseTest.cs index 0d0d476851..f94e4cfb1b 100644 --- a/src/Lucene.Net.Tests.Highlighter/Highlight/HighlighterPhraseTest.cs +++ b/src/Lucene.Net.Tests.Highlighter/Highlight/HighlighterPhraseTest.cs @@ -1,5 +1,6 @@ using Lucene.Net.Analysis; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Documents; using Lucene.Net.Index; using Lucene.Net.Search.Spans; diff --git a/src/Lucene.Net.Tests.Highlighter/Highlight/HighlighterTest.cs b/src/Lucene.Net.Tests.Highlighter/Highlight/HighlighterTest.cs index dac690483c..23b608f53f 100644 --- a/src/Lucene.Net.Tests.Highlighter/Highlight/HighlighterTest.cs +++ b/src/Lucene.Net.Tests.Highlighter/Highlight/HighlighterTest.cs @@ -1,6 +1,7 @@ using J2N.Text; using Lucene.Net.Analysis; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Attributes; using Lucene.Net.Documents; using Lucene.Net.Index; @@ -522,7 +523,7 @@ public void TestNumericRangeQuery() highlighter.TextFragmenter = (new SimpleFragmenter(40)); - // String result = + // String result = highlighter.GetBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); //if (VERBOSE) Console.WriteLine("\t" + result); } @@ -1335,7 +1336,7 @@ public void TestGetTextFragments() public void TestMaxSizeHighlight() { MockAnalyzer analyzer = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET); - // we disable MockTokenizer checks because we will forcefully limit the + // we disable MockTokenizer checks because we will forcefully limit the // tokenstream and call end() before incrementToken() returns false. analyzer.EnableChecks = (false); @@ -1365,7 +1366,7 @@ public void TestMaxSizeHighlightTruncates() { String goodWord = "goodtoken"; CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.MakeString("stoppedtoken")); - // we disable MockTokenizer checks because we will forcefully limit the + // we disable MockTokenizer checks because we will forcefully limit the // tokenstream and call end() before incrementToken() returns false. MockAnalyzer analyzer = new MockAnalyzer(Random, MockTokenizer.SIMPLE, true, stopWords); analyzer.EnableChecks = (false); @@ -1887,7 +1888,7 @@ private void searchIndex() } /* - * + * * public void TestBigramAnalyzer() throws IOException, ParseException { * //test to ensure analyzers with none-consecutive start/end offsets //dont * double-highlight text //setup index 1 RAMDirectory ramDir = new @@ -1896,15 +1897,15 @@ private void searchIndex() * Document(); Field f = new Field(FIELD_NAME, "java abc def", true, true, * true); d.Add(f); writer.addDocument(d); writer.close(); IndexReader reader = * DirectoryReader.open(ramDir); - * + * * IndexSearcher searcher=new IndexSearcher(reader); query = * QueryParser.parse("abc", FIELD_NAME, bigramAnalyzer); * Console.WriteLine("Searching for: " + query.toString(FIELD_NAME)); hits = * searcher.Search(query); - * + * * Highlighter highlighter = new Highlighter(this,new * QueryFragmentScorer(query)); - * + * * for (int i = 0; i < hits.TotalHits; i++) { String text = * searcher.doc2(hits.ScoreDocs[i].doc).Get(FIELD_NAME); TokenStream * tokenStream=bigramAnalyzer.TokenStream(FIELD_NAME,text); @@ -2053,7 +2054,7 @@ public SynonymAnalyzer(IDictionary synonyms) /* * (non-Javadoc) - * + * * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, * java.io.Reader) */ @@ -2294,4 +2295,3 @@ public void Run() } } } - diff --git a/src/Lucene.Net.Tests.Highlighter/Highlight/TokenSourcesTest.cs b/src/Lucene.Net.Tests.Highlighter/Highlight/TokenSourcesTest.cs index 24b729a46a..c232e40294 100644 --- a/src/Lucene.Net.Tests.Highlighter/Highlight/TokenSourcesTest.cs +++ b/src/Lucene.Net.Tests.Highlighter/Highlight/TokenSourcesTest.cs @@ -1,5 +1,6 @@ using Lucene.Net.Analysis; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Documents; using Lucene.Net.Index; using Lucene.Net.Search.Spans; diff --git a/src/Lucene.Net.Tests.Highlighter/VectorHighlight/AbstractTestCase.cs b/src/Lucene.Net.Tests.Highlighter/VectorHighlight/AbstractTestCase.cs index b1f7f209db..80d9fbb5e5 100644 --- a/src/Lucene.Net.Tests.Highlighter/VectorHighlight/AbstractTestCase.cs +++ b/src/Lucene.Net.Tests.Highlighter/VectorHighlight/AbstractTestCase.cs @@ -1,5 +1,6 @@ using Lucene.Net.Analysis; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Documents; using Lucene.Net.Index; using Lucene.Net.Index.Extensions; diff --git a/src/Lucene.Net.Tests.Memory/Index/Memory/MemoryIndexTest.cs b/src/Lucene.Net.Tests.Memory/Index/Memory/MemoryIndexTest.cs index 15bec38cb9..f80fd19589 100644 --- a/src/Lucene.Net.Tests.Memory/Index/Memory/MemoryIndexTest.cs +++ b/src/Lucene.Net.Tests.Memory/Index/Memory/MemoryIndexTest.cs @@ -306,7 +306,7 @@ public override bool IncrementToken() { if (termAtt.Length > 0 && termAtt.Buffer[0] == 't') { - termAtt.SetLength(0); + termAtt.Length = 0; } return true; } @@ -318,8 +318,8 @@ public override bool IncrementToken() }; /** - * Some terms to be indexed, in addition to random words. - * These terms are commonly used in the queries. + * Some terms to be indexed, in addition to random words. + * These terms are commonly used in the queries. */ private static readonly string[] TEST_TERMS = {"term", "Term", "tErm", "TERM", "telm", "stop", "drop", "roll", "phrase", "a", "c", "bar", "blar", diff --git a/src/Lucene.Net.Tests.QueryParser/Classic/TestMultiAnalyzer.cs b/src/Lucene.Net.Tests.QueryParser/Classic/TestMultiAnalyzer.cs index a9d5c5542f..ebb42e9f7b 100644 --- a/src/Lucene.Net.Tests.QueryParser/Classic/TestMultiAnalyzer.cs +++ b/src/Lucene.Net.Tests.QueryParser/Classic/TestMultiAnalyzer.cs @@ -27,6 +27,7 @@ using NUnit.Framework; using Lucene.Net.Analysis; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Search; using Lucene.Net.Util; @@ -128,7 +129,7 @@ public virtual void TestPosIncrementAnalyzer() /// /// Expands "multi" to "multi" and "multi2", both at the same position, - /// and expands "triplemulti" to "triplemulti", "multi3", and "multi2". + /// and expands "triplemulti" to "triplemulti", "multi3", and "multi2". /// private class MultiAnalyzer : Analyzer { @@ -266,7 +267,7 @@ public DumbQueryParser(string f, Analyzer a) { } - // expose super's version + // expose super's version public Query GetSuperFieldQuery(string f, string t, bool quoted) { return base.GetFieldQuery(f, t, quoted); diff --git a/src/Lucene.Net.Tests.QueryParser/Classic/TestMultiPhraseQueryParsing.cs b/src/Lucene.Net.Tests.QueryParser/Classic/TestMultiPhraseQueryParsing.cs index 7e0a4999d7..5b1ff75c64 100644 --- a/src/Lucene.Net.Tests.QueryParser/Classic/TestMultiPhraseQueryParsing.cs +++ b/src/Lucene.Net.Tests.QueryParser/Classic/TestMultiPhraseQueryParsing.cs @@ -75,7 +75,7 @@ public override sealed bool IncrementToken() if (upto < tokens.Length) { TokenAndPos token = tokens[upto++]; - termAtt.SetEmpty(); + termAtt.Clear(); termAtt.Append(token.token); posIncrAtt.PositionIncrement = (token.pos - lastPos); lastPos = token.pos; diff --git a/src/Lucene.Net.Tests.QueryParser/Classic/TestQueryParser.cs b/src/Lucene.Net.Tests.QueryParser/Classic/TestQueryParser.cs index fa749f87e0..3601e84b0b 100644 --- a/src/Lucene.Net.Tests.QueryParser/Classic/TestQueryParser.cs +++ b/src/Lucene.Net.Tests.QueryParser/Classic/TestQueryParser.cs @@ -1,5 +1,6 @@ using Lucene.Net.Analysis; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Diagnostics; using Lucene.Net.Documents; using Lucene.Net.QueryParsers.Flexible.Standard; @@ -460,7 +461,7 @@ public virtual void TestCJKSynonym() } /// - /// synonyms with default OR operator + /// synonyms with default OR operator /// [Test] public virtual void TestCJKSynonymsOR() diff --git a/src/Lucene.Net.Tests.QueryParser/Flexible/Precedence/TestPrecedenceQueryParser.cs b/src/Lucene.Net.Tests.QueryParser/Flexible/Precedence/TestPrecedenceQueryParser.cs index 9ddce61c08..5c11069a64 100644 --- a/src/Lucene.Net.Tests.QueryParser/Flexible/Precedence/TestPrecedenceQueryParser.cs +++ b/src/Lucene.Net.Tests.QueryParser/Flexible/Precedence/TestPrecedenceQueryParser.cs @@ -1,5 +1,6 @@ using Lucene.Net.Analysis; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Documents; using Lucene.Net.QueryParsers.Flexible.Core; using Lucene.Net.QueryParsers.Flexible.Standard.Config; diff --git a/src/Lucene.Net.Tests.QueryParser/Flexible/Standard/TestMultiAnalyzerQPHelper.cs b/src/Lucene.Net.Tests.QueryParser/Flexible/Standard/TestMultiAnalyzerQPHelper.cs index 7487944c9c..2526b32407 100644 --- a/src/Lucene.Net.Tests.QueryParser/Flexible/Standard/TestMultiAnalyzerQPHelper.cs +++ b/src/Lucene.Net.Tests.QueryParser/Flexible/Standard/TestMultiAnalyzerQPHelper.cs @@ -1,5 +1,6 @@ using Lucene.Net.Analysis; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.QueryParsers.Flexible.Standard.Config; using Lucene.Net.Util; using NUnit.Framework; @@ -28,7 +29,7 @@ namespace Lucene.Net.QueryParsers.Flexible.Standard /// /// This test case is a copy of the core Lucene query parser test, it was adapted /// to use new QueryParserHelper instead of the old query parser. - /// + /// /// Test QueryParser's ability to deal with Analyzers that return more than one /// token per position or that return tokens with a position increment > 1. /// @@ -120,11 +121,11 @@ public void TestMultiAnalyzer() // assertEquals("\"(multi multi2) bar\"~99", // qp.getSuperFieldQuery("","multi bar").toString()); // - // + // // // ask sublcass to parse phrase with modified default slop // assertEquals("\"(multi multi2) foo\"~99 bar", // qp.parse("\"multi foo\" bar").toString()); - // + // // } [Test] diff --git a/src/Lucene.Net.Tests.QueryParser/Flexible/Standard/TestQPHelper.cs b/src/Lucene.Net.Tests.QueryParser/Flexible/Standard/TestQPHelper.cs index 13448ef3df..2a20ef5611 100644 --- a/src/Lucene.Net.Tests.QueryParser/Flexible/Standard/TestQPHelper.cs +++ b/src/Lucene.Net.Tests.QueryParser/Flexible/Standard/TestQPHelper.cs @@ -1,5 +1,6 @@ using Lucene.Net.Analysis; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Documents; using Lucene.Net.Index; using Lucene.Net.QueryParsers.Flexible.Core; diff --git a/src/Lucene.Net.Tests.QueryParser/Util/QueryParserTestBase.cs b/src/Lucene.Net.Tests.QueryParser/Util/QueryParserTestBase.cs index 0b7c025d16..142367c183 100644 --- a/src/Lucene.Net.Tests.QueryParser/Util/QueryParserTestBase.cs +++ b/src/Lucene.Net.Tests.QueryParser/Util/QueryParserTestBase.cs @@ -21,6 +21,7 @@ using Lucene.Net.Analysis; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Documents; using Lucene.Net.Index; using Lucene.Net.QueryParsers.Classic; diff --git a/src/Lucene.Net.Tests.TestFramework/Analysis/TrivialLookaheadFilter.cs b/src/Lucene.Net.Tests.TestFramework/Analysis/TrivialLookaheadFilter.cs index f3e8db4f65..c51615c960 100644 --- a/src/Lucene.Net.Tests.TestFramework/Analysis/TrivialLookaheadFilter.cs +++ b/src/Lucene.Net.Tests.TestFramework/Analysis/TrivialLookaheadFilter.cs @@ -69,7 +69,7 @@ protected override void AfterPosition() InsertToken(); // replace term with 'improved' term. ClearAttributes(); - termAtt.SetEmpty(); + termAtt.Clear(); posIncAtt.PositionIncrement = (0); termAtt.Append(m_positions.Get(m_outputPos).Fact); offsetAtt.SetOffset(m_positions.Get(m_outputPos).StartOffset, diff --git a/src/Lucene.Net.Tests/Analysis/TestToken.cs b/src/Lucene.Net.Tests/Analysis/TestToken.cs index 4a83f3a9bf..03e0f2b4e2 100644 --- a/src/Lucene.Net.Tests/Analysis/TestToken.cs +++ b/src/Lucene.Net.Tests/Analysis/TestToken.cs @@ -1,4 +1,5 @@ using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using NUnit.Framework; using System.Collections.Generic; using System.IO; diff --git a/src/Lucene.Net.Tests/Analysis/TokenAttributes/TestCharTermAttributeImpl.cs b/src/Lucene.Net.Tests/Analysis/TokenAttributes/TestCharTermAttributeImpl.cs index 2df8210f4b..02961e29a6 100644 --- a/src/Lucene.Net.Tests/Analysis/TokenAttributes/TestCharTermAttributeImpl.cs +++ b/src/Lucene.Net.Tests/Analysis/TokenAttributes/TestCharTermAttributeImpl.cs @@ -1,5 +1,6 @@ using J2N.IO; using J2N.Text; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Attributes; using NUnit.Framework; using System; diff --git a/src/Lucene.Net.Tests/Analysis/TrivialLookaheadFilter.cs b/src/Lucene.Net.Tests/Analysis/TrivialLookaheadFilter.cs index d2c2a3118e..43562b0b82 100644 --- a/src/Lucene.Net.Tests/Analysis/TrivialLookaheadFilter.cs +++ b/src/Lucene.Net.Tests/Analysis/TrivialLookaheadFilter.cs @@ -68,7 +68,7 @@ protected override void AfterPosition() InsertToken(); // replace term with 'improved' term. ClearAttributes(); - termAtt.SetEmpty(); + termAtt.Clear(); posIncAtt.PositionIncrement = 0; termAtt.Append(((TestPosition)m_positions.Get(m_outputPos)).Fact); offsetAtt.SetOffset(m_positions.Get(m_outputPos).StartOffset, m_positions.Get(m_outputPos + 1).EndOffset); @@ -105,4 +105,4 @@ private void PeekSentence() } } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Tests/Index/TestDocumentWriter.cs b/src/Lucene.Net.Tests/Index/TestDocumentWriter.cs index cc7a4a1b77..60056d5b42 100644 --- a/src/Lucene.Net.Tests/Index/TestDocumentWriter.cs +++ b/src/Lucene.Net.Tests/Index/TestDocumentWriter.cs @@ -1,5 +1,6 @@ using Lucene.Net.Analysis; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Documents; using NUnit.Framework; using System; diff --git a/src/Lucene.Net.Tests/Index/TestIndexWriter.cs b/src/Lucene.Net.Tests/Index/TestIndexWriter.cs index ef6c5ebadb..905e5256ee 100644 --- a/src/Lucene.Net.Tests/Index/TestIndexWriter.cs +++ b/src/Lucene.Net.Tests/Index/TestIndexWriter.cs @@ -2002,7 +2002,7 @@ public sealed override bool IncrementToken() ClearAttributes(); if (upto < tokens.Length) { - termAtt.SetEmpty(); + termAtt.Clear(); termAtt.Append(tokens[upto]); upto++; return true; diff --git a/src/Lucene.Net.Tests/Index/TestSameTokenSamePosition.cs b/src/Lucene.Net.Tests/Index/TestSameTokenSamePosition.cs index bcefea9011..d1beae32a7 100644 --- a/src/Lucene.Net.Tests/Index/TestSameTokenSamePosition.cs +++ b/src/Lucene.Net.Tests/Index/TestSameTokenSamePosition.cs @@ -1,4 +1,5 @@ using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Documents; using NUnit.Framework; diff --git a/src/Lucene.Net.Tests/Util/TestAttributeSource.cs b/src/Lucene.Net.Tests/Util/TestAttributeSource.cs index 36dd232533..8d710ac489 100644 --- a/src/Lucene.Net.Tests/Util/TestAttributeSource.cs +++ b/src/Lucene.Net.Tests/Util/TestAttributeSource.cs @@ -1,4 +1,5 @@ using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using NUnit.Framework; using System; using System.Collections.Generic; diff --git a/src/Lucene.Net.Tests/Util/TestQueryBuilder.cs b/src/Lucene.Net.Tests/Util/TestQueryBuilder.cs index 794d460d27..2c893cd3ab 100644 --- a/src/Lucene.Net.Tests/Util/TestQueryBuilder.cs +++ b/src/Lucene.Net.Tests/Util/TestQueryBuilder.cs @@ -1,5 +1,6 @@ using Lucene.Net.Analysis; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using NUnit.Framework; using System; using System.IO; diff --git a/src/Lucene.Net/Analysis/Token.cs b/src/Lucene.Net/Analysis/Token.cs index 006bd7c8a2..e4b5f4ecaa 100644 --- a/src/Lucene.Net/Analysis/Token.cs +++ b/src/Lucene.Net/Analysis/Token.cs @@ -1,7 +1,8 @@ using J2N.Text; using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Analysis.TokenAttributes.Extensions; +using Lucene.Net.Index; using System; -using System.Reflection; using Attribute = Lucene.Net.Util.Attribute; using AttributeSource = Lucene.Net.Util.AttributeSource; using BytesRef = Lucene.Net.Util.BytesRef; @@ -45,7 +46,7 @@ namespace Lucene.Net.Analysis /// with type "eos". The default token type is "word". /// /// A Token can optionally have metadata (a.k.a. payload) in the form of a variable - /// length byte array. Use to retrieve the + /// length byte array. Use to retrieve the /// payloads from the index. /// /// @@ -64,17 +65,16 @@ namespace Lucene.Net.Analysis /// Failing that, to create a new you should first use /// one of the constructors that starts with null text. To load /// the token from a char[] use . - /// To load from a use followed by + /// To load from a use (or ) followed by /// or . /// Alternatively you can get the 's termBuffer by calling either , /// if you know that your text is shorter than the capacity of the termBuffer /// or , if there is any possibility /// that you may need to grow the buffer. Fill in the characters of your term into this /// buffer, with if loading from a string, - /// or with , - /// and finally call to - /// set the length of the term text. See LUCENE-969 + /// or with , + /// and finally set the of the term text. + /// See LUCENE-969 /// for details. /// Typical Token reuse patterns: /// @@ -567,7 +567,7 @@ public virtual void Reinit(Token prototype) /// new term text public virtual void Reinit(Token prototype, string newTerm) { - SetEmpty().Append(newTerm); + this.SetEmpty().Append(newTerm); positionIncrement = prototype.positionIncrement; flags = prototype.flags; startOffset = prototype.startOffset; diff --git a/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs b/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs index fa3cede774..855c262fc8 100644 --- a/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs +++ b/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs @@ -1,4 +1,5 @@ using J2N.Text; +using Lucene.Net.Analysis.TokenAttributes.Extensions; using Lucene.Net.Util; using System; using System.Diagnostics.CodeAnalysis; @@ -37,13 +38,12 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable /// the number of characters to copy void CopyBuffer(char[] buffer, int offset, int length); - /// /// Returns the internal termBuffer character array which /// you can then directly alter. If the array is too /// small for your token, use /// to increase it. After - /// altering the buffer be sure to call + /// altering the buffer be sure to set /// to record the number of valid /// characters that were placed into the termBuffer. /// @@ -62,37 +62,35 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable char[] ResizeBuffer(int newSize); /// - /// Gets or Sets the number of valid characters (in + /// Gets or sets the number of valid characters (length of the term) in /// the termBuffer array. - /// - /// - new int Length { get; set; } // LUCENENET: To mimic StringBuilder, we allow this to be settable. - - // LUCENENET specific: Redefining this[] to make it settable - new char this[int index] { get; set; } - - /// - /// Set number of valid characters (length of the term) in - /// the termBuffer array. Use this to truncate the termBuffer + /// Use this setter to truncate the termBuffer /// or to synchronize with external manipulation of the termBuffer. /// Note: to grow the size of the array, /// use first. - /// NOTE: This is exactly the same operation as calling the setter, the primary - /// difference is that this method returns a reference to the current object so it can be chained. - /// - /// obj.SetLength(30).Append("hey you"); - /// /// - /// the truncated length - ICharTermAttribute SetLength(int length); + /// + /// LUCENENET: To mimic StringBuilder, we allow this to be settable. + /// The setter may be used as an alternative to + /// if + /// chaining is not required. + /// + /// + new int Length { get; set; } + + // LUCENENET specific: Redefining this[] to make it settable + new char this[int index] { get; set; } /// - /// Sets the length of the termBuffer to zero. - /// Use this method before appending contents. + /// Clears the values in this attribute and resets it to its + /// default value. /// - ICharTermAttribute SetEmpty(); - - // the following methods are redefined to get rid of IOException declaration: + /// + /// LUCENENET specific - This method is not part of the Java Lucene API. + /// This was added to be a more consistent way to clear attributes than SetEmpty(). + /// + /// + void Clear(); /// /// Appends the contents of the to this character sequence. diff --git a/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs b/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs index 8ab9aca194..c8762e7d88 100644 --- a/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs +++ b/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs @@ -70,8 +70,6 @@ public void CopyBuffer(char[] buffer, int offset, int length) termLength = length; } - char[] ICharTermAttribute.Buffer => termBuffer; - [WritableArray] [SuppressMessage("Microsoft.Performance", "CA1819", Justification = "Lucene's design requires some writable array properties")] public char[] Buffer => termBuffer; @@ -107,32 +105,19 @@ private void GrowTermBuffer(int newSize) } } - int ICharTermAttribute.Length { get => Length; set => SetLength(value); } - - int ICharSequence.Length => Length; - public int Length { get => termLength; - set => SetLength(value); - } - - public CharTermAttribute SetLength(int length) - { - // LUCENENET: added guard clause - if (length < 0) - throw new ArgumentOutOfRangeException(nameof(length), length, $"{nameof(length)} must not be negative."); - if (length > termBuffer.Length) - throw new ArgumentOutOfRangeException(nameof(length), length, "length " + length + " exceeds the size of the termBuffer (" + termBuffer.Length + ")"); - - termLength = length; - return this; - } + set + { + // LUCENENET: added guard clause + if (value < 0) + throw new ArgumentOutOfRangeException(nameof(value), value, $"{nameof(value)} must not be negative."); + if (value > termBuffer.Length) + throw new ArgumentOutOfRangeException(nameof(value), value, $"length {value} exceeds the size of the termBuffer ({termBuffer.Length})"); - public CharTermAttribute SetEmpty() - { - termLength = 0; - return this; + termLength = value; + } } // *** TermToBytesRefAttribute interface *** @@ -147,12 +132,6 @@ public virtual void FillBytesRef() // *** CharSequence interface *** - // LUCENENET specific: Replaced CharAt(int) with this[int] to .NETify - - char ICharSequence.this[int index] => this[index]; - - char ICharTermAttribute.this[int index] { get => this[index]; set => this[index] = value; } - // LUCENENET specific indexer to make CharTermAttribute act more like a .NET type public char this[int index] { @@ -485,14 +464,6 @@ public override void CopyTo(IAttribute target) // LUCENENET specific - intention #region ICharTermAttribute Members - void ICharTermAttribute.CopyBuffer(char[] buffer, int offset, int length) => CopyBuffer(buffer, offset, length); - - char[] ICharTermAttribute.ResizeBuffer(int newSize) => ResizeBuffer(newSize); - - ICharTermAttribute ICharTermAttribute.SetLength(int length) => SetLength(length); - - ICharTermAttribute ICharTermAttribute.SetEmpty() => SetEmpty(); - ICharTermAttribute ICharTermAttribute.Append(ICharSequence value) => Append(value); ICharTermAttribute ICharTermAttribute.Append(ICharSequence value, int startIndex, int count) => Append(value, startIndex, count); diff --git a/src/Lucene.Net/Support/Analysis/TokenAttributes/Extensions/CharTermAttributeExtensions.cs b/src/Lucene.Net/Support/Analysis/TokenAttributes/Extensions/CharTermAttributeExtensions.cs new file mode 100644 index 0000000000..72796eb520 --- /dev/null +++ b/src/Lucene.Net/Support/Analysis/TokenAttributes/Extensions/CharTermAttributeExtensions.cs @@ -0,0 +1,75 @@ +using System; + +namespace Lucene.Net.Analysis.TokenAttributes.Extensions +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// + /// Extension methods on . + /// + public static class CharTermAttributeExtensions + { + /// + /// Set number of valid characters (length of the term) in + /// the termBuffer array. Use this to truncate the termBuffer + /// or to synchronize with external manipulation of the termBuffer. + /// Note: to grow the size of the array, + /// use first. + /// + /// NOTE: This is exactly the same operation as calling the setter, the primary + /// difference is that this method returns a reference to the current object so it can be chained. + /// + /// obj.SetLength(30).Append("hey you"); + /// + /// + /// The truncated length + public static T SetLength(this T termAttr, int length) + where T : ICharTermAttribute + { + if (termAttr is null) + { + throw new ArgumentNullException(nameof(termAttr)); + } + + termAttr.Length = length; + return termAttr; + } + + /// + /// Sets the length of the termBuffer to zero. + /// Use this method before appending contents. + /// + /// NOTE: This is exactly the same operation as calling , the primary + /// difference is that this method returns a reference to the current object so it can be chained. + /// + /// obj.SetEmpty().Append("hey you"); + /// + /// + public static T SetEmpty(this T termAttr) + where T : ICharTermAttribute + { + if (termAttr is null) + { + throw new ArgumentNullException(nameof(termAttr)); + } + + termAttr.Clear(); + return termAttr; + } + } +} diff --git a/src/Lucene.Net/Util/Attribute.cs b/src/Lucene.Net/Util/Attribute.cs index 6c13724622..1932528494 100644 --- a/src/Lucene.Net/Util/Attribute.cs +++ b/src/Lucene.Net/Util/Attribute.cs @@ -20,6 +20,5 @@ /// Base interface for attributes. public interface IAttribute { - void CopyTo(IAttribute target); // LUCENENET specific - .NET doesn't recognize this method without a cast, so we define it here to ensure it is visible on all IAttribute interfaces } -} \ No newline at end of file +} diff --git a/src/Lucene.Net/Util/AttributeImpl.cs b/src/Lucene.Net/Util/AttributeImpl.cs index b2fe0d24e3..f136c7b5dd 100644 --- a/src/Lucene.Net/Util/AttributeImpl.cs +++ b/src/Lucene.Net/Util/AttributeImpl.cs @@ -31,7 +31,8 @@ namespace Lucene.Net.Util /// public abstract class Attribute : IAttribute // LUCENENET specific: Not implementing ICloneable per Microsoft's recommendation { - /// Clears the values in this and resets it to its + /// + /// Clears the values in this and resets it to its /// default value. If this implementation implements more than one interface /// it clears all. /// @@ -97,22 +98,22 @@ public string ReflectAsString(bool prependAttClass) /// /// This method is for introspection of attributes, it should simply /// add the key/values this attribute holds to the given . - /// + /// /// The default implementation calls for all /// non-static fields from the implementing class, using the field name as key /// and the field value as value. The class is also determined by Reflection. /// Please note that the default implementation can only handle single-Attribute /// implementations. - /// + /// /// Custom implementations look like this (e.g. for a combined attribute implementation): /// - /// public void ReflectWith(IAttributeReflector reflector) + /// public void ReflectWith(IAttributeReflector reflector) /// { /// reflector.Reflect(typeof(ICharTermAttribute), "term", GetTerm()); /// reflector.Reflect(typeof(IPositionIncrementAttribute), "positionIncrement", GetPositionIncrement()); /// } /// - /// + /// /// If you implement this method, make sure that for each invocation, the same set of /// interfaces and keys are passed to in the same order, but possibly /// different values. So don't automatically exclude e.g. null properties! @@ -154,7 +155,7 @@ public virtual void ReflectWith(IAttributeReflector reflector) // LUCENENET NOTE /// fields of this object and prints the values in the following syntax: /// /// - /// public String ToString() + /// public String ToString() /// { /// return "start=" + startOffset + ",end=" + endOffset; /// } @@ -205,4 +206,4 @@ public virtual object Clone() return base.MemberwiseClone(); } } -} \ No newline at end of file +}