Skip to content

Commit

Permalink
IAttribute and ICharTermAttribute method changes, #1038 (#1049)
Browse files Browse the repository at this point in the history
* Remove SetLength and SetEmpty from ICharTermAttribute, add extension methods

* Remove CopyTo from IAttribute, add Clear to ICharTermAttribute
  • Loading branch information
paulirwin authored Dec 3, 2024
1 parent d1386d5 commit 5b7d0ac
Show file tree
Hide file tree
Showing 73 changed files with 364 additions and 270 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Lucene version compatibility level 4.8.1
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using System;

namespace Lucene.Net.Analysis.Br
Expand Down Expand Up @@ -41,7 +42,7 @@ public sealed class BrazilianStemFilter : TokenFilter
private readonly IKeywordAttribute keywordAttr;

/// <summary>
/// Creates a new <see cref="BrazilianStemFilter"/>
/// Creates a new <see cref="BrazilianStemFilter"/>
/// </summary>
/// <param name="in"> the source <see cref="TokenStream"/> </param>
public BrazilianStemFilter(TokenStream @in)
Expand Down Expand Up @@ -74,4 +75,4 @@ public override bool IncrementToken()
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Lucene version compatibility level 4.8.1
using J2N.Text;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using Lucene.Net.Analysis.Util;
using Lucene.Net.Diagnostics;
using Lucene.Net.Util;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Lucene version compatibility level 4.8.1
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using System;

namespace Lucene.Net.Analysis.De
Expand All @@ -22,7 +23,7 @@ namespace Lucene.Net.Analysis.De
*/

/// <summary>
/// A <see cref="TokenFilter"/> that stems German words.
/// A <see cref="TokenFilter"/> that stems German words.
/// <para>
/// It supports a table of words that should
/// not be stemmed at all. The stemmer used can be changed at runtime after the
Expand Down Expand Up @@ -93,4 +94,4 @@ public GermanStemmer Stemmer
}
}
}
}
}
9 changes: 5 additions & 4 deletions src/Lucene.Net.Analysis.Common/Analysis/En/KStemFilter.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Lucene version compatibility level 4.8.1
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using System.IO;

namespace Lucene.Net.Analysis.En
Expand Down Expand Up @@ -30,13 +31,13 @@ namespace Lucene.Net.Analysis.En
/// Conference on Research and Development in Information Retrieval, 191-203, 1993).
/// <para/>
/// All terms must already be lowercased for this filter to work correctly.
///
///
/// <para>
/// Note: This filter is aware of the <see cref="IKeywordAttribute"/>. To prevent
/// certain terms from being passed to the stemmer
/// <see cref="IKeywordAttribute.IsKeyword"/> should be set to <code>true</code>
/// in a previous <see cref="TokenStream"/>.
///
///
/// Note: For including the original term as well as the stemmed version, see
/// <see cref="Miscellaneous.KeywordRepeatFilterFactory"/>
/// </para>
Expand All @@ -47,7 +48,7 @@ public sealed class KStemFilter : TokenFilter
private readonly ICharTermAttribute termAttribute;
private readonly IKeywordAttribute keywordAtt;

public KStemFilter(TokenStream @in)
public KStemFilter(TokenStream @in)
: base(@in)
{
termAttribute = AddAttribute<ICharTermAttribute>();
Expand Down Expand Up @@ -75,4 +76,4 @@ public override bool IncrementToken()
return true;
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Lucene version compatibility level 4.8.1
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using System;

namespace Lucene.Net.Analysis.Fr
Expand All @@ -22,7 +23,7 @@ namespace Lucene.Net.Analysis.Fr
*/

/// <summary>
/// A <see cref="TokenFilter"/> that stems french words.
/// A <see cref="TokenFilter"/> that stems french words.
/// <para>
/// The used stemmer can be changed at runtime after the
/// filter object is created (as long as it is a <see cref="FrenchStemmer"/>).
Expand All @@ -33,9 +34,9 @@ namespace Lucene.Net.Analysis.Fr
/// the <see cref="IKeywordAttribute"/> before this <see cref="TokenStream"/>.
/// </para> </summary>
/// <seealso cref="Miscellaneous.KeywordMarkerFilter"/>
/// @deprecated (3.1) Use <see cref="Snowball.SnowballFilter"/> with
/// @deprecated (3.1) Use <see cref="Snowball.SnowballFilter"/> with
/// <see cref="Tartarus.Snowball.Ext.FrenchStemmer"/> instead, which has the
/// same functionality. This filter will be removed in Lucene 5.0
/// same functionality. This filter will be removed in Lucene 5.0
[Obsolete("(3.1) Use SnowballFilter with FrenchStemmer instead, which has the same functionality. This filter will be removed in Lucene 5.0")]
public sealed class FrenchStemFilter : TokenFilter
{
Expand Down Expand Up @@ -93,4 +94,4 @@ public FrenchStemmer Stemmer
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Lucene version compatibility level 4.10.4
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using Lucene.Net.Util;
using System.Collections.Generic;
using JCG = J2N.Collections.Generic;
Expand All @@ -24,20 +25,20 @@ namespace Lucene.Net.Analysis.Hunspell
*/

/// <summary>
/// <see cref="TokenFilter"/> that uses hunspell affix rules and words to stem tokens.
/// Since hunspell supports a word having multiple stems, this filter can emit
/// <see cref="TokenFilter"/> that uses hunspell affix rules and words to stem tokens.
/// Since hunspell supports a word having multiple stems, this filter can emit
/// multiple tokens for each consumed token
///
///
/// <para>
/// Note: This filter is aware of the <see cref="IKeywordAttribute"/>. To prevent
/// certain terms from being passed to the stemmer
/// <see cref="IKeywordAttribute.IsKeyword"/> should be set to <c>true</c>
/// in a previous <see cref="TokenStream"/>.
///
///
/// Note: For including the original term as well as the stemmed version, see
/// <see cref="Miscellaneous.KeywordRepeatFilterFactory"/>
/// </para>
///
///
/// @lucene.experimental
/// </summary>
public sealed class HunspellStemFilter : TokenFilter
Expand Down Expand Up @@ -160,4 +161,4 @@ public override void Reset()
}
});
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Lucene version compatibility level 4.8.1
using Lucene.Net.Analysis.Core;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using Lucene.Net.Analysis.Util;
using Lucene.Net.Support;
using Lucene.Net.Util;
Expand Down Expand Up @@ -38,26 +39,26 @@ namespace Lucene.Net.Analysis.Miscellaneous
/// <see cref="StopFilter"/> into a single efficient
/// multi-purpose class.
/// <para>
/// If you are unsure how exactly a regular expression should look like, consider
/// If you are unsure how exactly a regular expression should look like, consider
/// prototyping by simply trying various expressions on some test texts via
/// <see cref="Regex.Split(string)"/>. Once you are satisfied, give that regex to
/// <see cref="PatternAnalyzer"/>. Also see <a target="_blank"
/// <see cref="Regex.Split(string)"/>. Once you are satisfied, give that regex to
/// <see cref="PatternAnalyzer"/>. Also see <a target="_blank"
/// href="http://www.regular-expressions.info/">Regular Expression Tutorial</a>.
/// </para>
/// <para>
/// This class can be considerably faster than the "normal" Lucene tokenizers.
/// This class can be considerably faster than the "normal" Lucene tokenizers.
/// It can also serve as a building block in a compound Lucene
/// <see cref="TokenFilter"/> chain. For example as in this
/// <see cref="TokenFilter"/> chain. For example as in this
/// stemming example:
/// <code>
/// PatternAnalyzer pat = ...
/// TokenStream tokenStream = new SnowballFilter(
/// pat.GetTokenStream("content", "James is running round in the woods"),
/// pat.GetTokenStream("content", "James is running round in the woods"),
/// "English"));
/// </code>
/// </para>
/// </summary>
/// @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead.
/// @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead.
[Obsolete("(4.0) use the pattern-based analysis in the analysis/pattern package instead.")]
public sealed class PatternAnalyzer : Analyzer
{
Expand Down Expand Up @@ -196,8 +197,8 @@ public PatternAnalyzer(LuceneVersion matchVersion, Regex pattern, bool toLowerCa
/// <returns> a new token stream </returns>
public TokenStreamComponents CreateComponents(string fieldName, TextReader reader, string text)
{
// Ideally the Analyzer superclass should have a method with the same signature,
// with a default impl that simply delegates to the StringReader flavour.
// Ideally the Analyzer superclass should have a method with the same signature,
// with a default impl that simply delegates to the StringReader flavour.
if (reader is null)
{
reader = new FastStringReader(text);
Expand Down Expand Up @@ -448,9 +449,9 @@ public override void Reset()

// LUCENENET: Since we need to "reset" the Match
// object, we also need an "isReset" flag to indicate
// whether we are at the head of the match and to
// take the appropriate measures to ensure we don't
// overwrite our matcher variable with
// whether we are at the head of the match and to
// take the appropriate measures to ensure we don't
// overwrite our matcher variable with
// matcher = matcher.NextMatch();
// before it is time. A string could potentially
// match on index 0, so we need another variable to
Expand Down Expand Up @@ -528,10 +529,10 @@ public override bool IncrementToken()
{
text = text.ToLower(); // LUCENENET: Since this class is obsolete, we aren't going to bother with passing culture in the constructor.
}
// if (toLowerCase) {
// if (toLowerCase) {
//// use next line once JDK 1.5 String.toLowerCase() performance regression is fixed
//// see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6265809
// text = s.substring(start, i).toLowerCase();
// text = s.substring(start, i).toLowerCase();
//// char[] chars = new char[i-start];
//// for (int j=start; j < i; j++) chars[j-start] = Character.toLowerCase(s.charAt(j));
//// text = new String(chars);
Expand Down Expand Up @@ -607,4 +608,4 @@ internal FastStringReader(string s)
internal string String => s;
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Lucene version compatibility level 4.8.1
using System;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using Lucene.Net.Util;

namespace Lucene.Net.Analysis.Miscellaneous
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Lucene version compatibility level 8.2.0
// LUCENENET NOTE: Ported because Lucene.Net.Analysis.OpenNLP requires this to be useful.
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using Lucene.Net.Util;
#nullable enable

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Lucene version compatibility level 4.8.1
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using Lucene.Net.Util;
using System;
using System.IO;
Expand Down Expand Up @@ -305,4 +306,4 @@ internal static bool IsDefined(this Lucene43EdgeNGramTokenizer.Side side)
#pragma warning restore CS0612 // Type or member is obsolete

}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Lucene version compatibility level 4.8.1
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using System;
using System.IO;

Expand Down Expand Up @@ -171,4 +172,4 @@ public override void Reset()
pos = 0;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,6 @@ public int PositionIncrement
get => 0;
set => _ = value;
}

// LUCENENET specific - The interface requires this to be implemented, since we added it to avoid casts.
public void CopyTo(IAttribute target) => _ = target;
}

private sealed class PositionLengthAttributeAnonymousClass : IPositionLengthAttribute
Expand All @@ -133,9 +130,6 @@ public int PositionLength
get => 0;
set => _ = value;
}

// LUCENENET specific - The interface requires this to be implemented, since we added it to avoid casts.
public void CopyTo(IAttribute target) => _ = target;
}

/// <summary>
Expand Down Expand Up @@ -233,4 +227,4 @@ public override void Reset()
curTermBuffer = null;
}
}
}
}
13 changes: 7 additions & 6 deletions src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemFilter.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Lucene version compatibility level 4.8.1
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using Lucene.Net.Analysis.Util;
using System;
using System.Collections.Generic;
Expand All @@ -24,7 +25,7 @@ namespace Lucene.Net.Analysis.Nl
*/

/// <summary>
/// A <see cref="TokenFilter"/> that stems Dutch words.
/// A <see cref="TokenFilter"/> that stems Dutch words.
/// <para>
/// It supports a table of words that should
/// not be stemmed at all. The stemmer used can be changed at runtime after the
Expand All @@ -34,12 +35,12 @@ namespace Lucene.Net.Analysis.Nl
/// To prevent terms from being stemmed use an instance of
/// <see cref="Miscellaneous.KeywordMarkerFilter"/> or a custom <see cref="TokenFilter"/> that sets
/// the <see cref="IKeywordAttribute"/> before this <see cref="TokenStream"/>.
/// </para>
/// </para>
/// </summary>
/// <seealso cref="Miscellaneous.KeywordMarkerFilter"/>
/// @deprecated (3.1) Use <see cref="Snowball.SnowballFilter"/> with
/// @deprecated (3.1) Use <see cref="Snowball.SnowballFilter"/> with
/// <see cref="Tartarus.Snowball.Ext.DutchStemmer"/> instead, which has the
/// same functionality. This filter will be removed in Lucene 5.0
/// same functionality. This filter will be removed in Lucene 5.0
[Obsolete("(3.1) Use Snowball.SnowballFilter with Tartarus.Snowball.Ext.DutchStemmer instead, which has the same functionality. This filter will be removed in Lucene 5.0")]
public sealed class DutchStemFilter : TokenFilter
{
Expand All @@ -61,7 +62,7 @@ public DutchStemFilter(TokenStream @in)

/// <param name="in"> Input <see cref="TokenStream"/> </param>
/// <param name="stemdictionary"> Dictionary of word stem pairs, that overrule the algorithm </param>
public DutchStemFilter(TokenStream @in, IDictionary<string, string> stemdictionary)
public DutchStemFilter(TokenStream @in, IDictionary<string, string> stemdictionary)
: this(@in)
{
stemmer.StemDictionary = stemdictionary;
Expand Down Expand Up @@ -132,4 +133,4 @@ public CharArrayDictionary<string> StemDictionary
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Lucene version compatibility level 4.8.1
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Analysis.TokenAttributes.Extensions;
using System.Text.RegularExpressions;

namespace Lucene.Net.Analysis.Pattern
Expand All @@ -24,7 +25,7 @@ namespace Lucene.Net.Analysis.Pattern
/// <summary>
/// A TokenFilter which applies a <see cref="Regex"/> to each token in the stream,
/// replacing match occurances with the specified replacement string.
///
///
/// <para>
/// <b>Note:</b> Depending on the input and the pattern used and the input
/// <see cref="TokenStream"/>, this <see cref="TokenFilter"/> may produce <see cref="Token"/>s whose text is the empty
Expand Down Expand Up @@ -73,4 +74,4 @@ public override bool IncrementToken()
return true;
}
}
}
}
Loading

0 comments on commit 5b7d0ac

Please sign in to comment.