Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test review A-D, #259 #1018

Merged
merged 15 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 4 additions & 9 deletions src/Lucene.Net.Tests/Analysis/TestCachingTokenFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -85,22 +85,17 @@ public virtual void TestCaching()

private sealed class TokenStreamAnonymousClass : TokenStream
{
private TestCachingTokenFilter outerInstance;
private readonly TestCachingTokenFilter outerInstance;

public TokenStreamAnonymousClass(TestCachingTokenFilter outerInstance)
{
InitMembers(outerInstance);
}

public void InitMembers(TestCachingTokenFilter outerInstance)
{
this.outerInstance = outerInstance;
index = 0;
// LUCENENET specific - AddAttribute must be called from the constructor
termAtt = AddAttribute<ICharTermAttribute>();
offsetAtt = AddAttribute<IOffsetAttribute>();
}

private int index;
private int index /* = 0 */;
private ICharTermAttribute termAtt;
private IOffsetAttribute offsetAtt;

Expand Down Expand Up @@ -135,4 +130,4 @@ private void CheckTokens(TokenStream stream)
Assert.AreEqual(tokens.Length, count);
}
}
}
}
4 changes: 3 additions & 1 deletion src/Lucene.Net.Tests/Analysis/TestCharFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ protected internal CharFilter1(TextReader @in)

public override int Read(char[] cbuf, int off, int len)
{
// LUCENENET specific: We need to return -1 when there are no more characters to read to match Java
int numRead = m_input.Read(cbuf, off, len);
return numRead == 0 ? -1 : numRead;
}
Expand All @@ -82,6 +83,7 @@ protected internal CharFilter2(TextReader @in)

public override int Read(char[] cbuf, int off, int len)
{
// LUCENENET specific: We need to return -1 when there are no more characters to read to match Java
int numRead = m_input.Read(cbuf, off, len);
return numRead == 0 ? -1 : numRead;
}
Expand All @@ -92,4 +94,4 @@ protected override int Correct(int currentOff)
}
}
}
}
}
141 changes: 108 additions & 33 deletions src/Lucene.Net.Tests/Analysis/TestGraphTokenizers.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using J2N.Text;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Attributes;
using Lucene.Net.Diagnostics;
using NUnit.Framework;
using System;
Expand Down Expand Up @@ -52,18 +53,19 @@ public class TestGraphTokenizers : BaseTokenStreamTestCase

private class GraphTokenizer : Tokenizer
{
internal IList<Token> tokens;
internal int upto;
internal int inputLength;
private IList<Token> tokens;
private int upto;
private int inputLength;

internal readonly ICharTermAttribute termAtt;
internal readonly IOffsetAttribute offsetAtt;
internal readonly IPositionIncrementAttribute posIncrAtt;
internal readonly IPositionLengthAttribute posLengthAtt;
private readonly ICharTermAttribute termAtt;
private readonly IOffsetAttribute offsetAtt;
private readonly IPositionIncrementAttribute posIncrAtt;
private readonly IPositionLengthAttribute posLengthAtt;

public GraphTokenizer(TextReader input)
: base(input)
{
// LUCENENET specific - AddAttribute must be called in the constructor
termAtt = AddAttribute<ICharTermAttribute>();
offsetAtt = AddAttribute<IOffsetAttribute>();
posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
Expand Down Expand Up @@ -124,7 +126,7 @@ internal virtual void FillTokens()
{
int count = m_input.Read(buffer, 0, buffer.Length);

//.NET TextReader.Read(buff, int, int) returns 0, not -1 on no chars
// LUCENENET specific - .NET TextReader.Read(buff, int, int) returns 0, not -1 on no chars
// but in some cases, such as MockCharFilter, it overloads read and returns -1
// so we should handle both 0 and -1 values
if (count <= 0)
Expand Down Expand Up @@ -229,14 +231,15 @@ public virtual void TestMockGraphTokenFilterOnGraphInput()
// Just deletes (leaving hole) token 'a':
private sealed class RemoveATokens : TokenFilter
{
internal int pendingPosInc;
private int pendingPosInc;

internal readonly ICharTermAttribute termAtt;
internal readonly IPositionIncrementAttribute posIncAtt;
private readonly ICharTermAttribute termAtt;
private readonly IPositionIncrementAttribute posIncAtt;

public RemoveATokens(TokenStream @in)
: base(@in)
{
// LUCENENET specific - AddAttribute must be called in the constructor
termAtt = AddAttribute<ICharTermAttribute>();
posIncAtt = AddAttribute<IPositionIncrementAttribute>();
}
Expand Down Expand Up @@ -383,13 +386,13 @@ public virtual void TestDoubleMockGraphTokenFilterRandom()
}

[Test]
[LuceneNetSpecific]
public void TestMockTokenizerCtor()
{
var sr = new StringReader("Hello");
var mt = new MockTokenizer(sr);
_ = new MockTokenizer(sr);
}


[Test]
public virtual void TestMockGraphTokenFilterBeforeHolesRandom()
{
Expand Down Expand Up @@ -459,7 +462,11 @@ private static Token Token(string term, int posInc, int posLength, int startOffs
[Test]
public virtual void TestSingleToken()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("abc", 1, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("abc", 1, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton expected = BasicAutomata.MakeString("abc");
Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
Expand All @@ -468,7 +475,12 @@ public virtual void TestSingleToken()
[Test]
public virtual void TestMultipleHoles()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("a", 1, 1), Token("b", 3, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("a", 1, 1),
Token("b", 3, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton expected = Join(S2a("a"), SEP_A, HOLE_A, SEP_A, HOLE_A, SEP_A, S2a("b"));
Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
Expand All @@ -477,7 +489,13 @@ public virtual void TestMultipleHoles()
[Test]
public virtual void TestSynOverMultipleHoles()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("a", 1, 1), Token("x", 0, 3), Token("b", 3, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("a", 1, 1),
Token("x", 0, 3),
Token("b", 3, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton a1 = Join(S2a("a"), SEP_A, HOLE_A, SEP_A, HOLE_A, SEP_A, S2a("b"));
Automaton a2 = Join(S2a("x"), SEP_A, S2a("b"));
Expand All @@ -499,7 +517,8 @@ private static void toDot(Automaton a) throws IOException {
private static readonly Automaton SEP_A = BasicAutomata.MakeChar(TokenStreamToAutomaton.POS_SEP);
private static readonly Automaton HOLE_A = BasicAutomata.MakeChar(TokenStreamToAutomaton.HOLE);

private Automaton Join(params string[] strings)
// LUCENENET specific - made static
private static Automaton Join(params string[] strings)
{
IList<Automaton> @as = new JCG.List<Automaton>();
foreach (string s in strings)
Expand All @@ -511,20 +530,27 @@ private Automaton Join(params string[] strings)
return BasicOperations.Concatenate(@as);
}

private Automaton Join(params Automaton[] @as)
// LUCENENET specific - made static
private static Automaton Join(params Automaton[] @as)
{
return BasicOperations.Concatenate(@as);
}

private Automaton S2a(string s)
// LUCENENET specific - made static
private static Automaton S2a(string s)
{
return BasicAutomata.MakeString(s);
}

[Test]
public virtual void TestTwoTokens()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("abc", 1, 1), Token("def", 1, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("abc", 1, 1),
Token("def", 1, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton expected = Join("abc", "def");

Expand All @@ -535,7 +561,12 @@ public virtual void TestTwoTokens()
[Test]
public virtual void TestHole()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("abc", 1, 1), Token("def", 2, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("abc", 1, 1),
Token("def", 2, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);

Automaton expected = Join(S2a("abc"), SEP_A, HOLE_A, SEP_A, S2a("def"));
Expand All @@ -548,7 +579,12 @@ public virtual void TestHole()
public virtual void TestOverlappedTokensSausage()
{
// Two tokens on top of each other (sausage):
TokenStream ts = new CannedTokenStream(new Token[] { Token("abc", 1, 1), Token("xyz", 0, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("abc", 1, 1),
Token("xyz", 0, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton a1 = BasicAutomata.MakeString("abc");
Automaton a2 = BasicAutomata.MakeString("xyz");
Expand All @@ -559,7 +595,13 @@ public virtual void TestOverlappedTokensSausage()
[Test]
public virtual void TestOverlappedTokensLattice()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("abc", 1, 1), Token("xyz", 0, 2), Token("def", 1, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("abc", 1, 1),
Token("xyz", 0, 2),
Token("def", 1, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton a1 = BasicAutomata.MakeString("xyz");
Automaton a2 = Join("abc", "def");
Expand All @@ -572,27 +614,51 @@ public virtual void TestOverlappedTokensLattice()
[Test]
public virtual void TestSynOverHole()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("a", 1, 1), Token("X", 0, 2), Token("b", 2, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("a", 1, 1),
Token("X", 0, 2),
Token("b", 2, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton a1 = BasicOperations.Union(Join(S2a("a"), SEP_A, HOLE_A), BasicAutomata.MakeString("X"));
Automaton expected = BasicOperations.Concatenate(a1, Join(SEP_A, S2a("b")));
Automaton a1 = BasicOperations.Union(
Join(S2a("a"), SEP_A, HOLE_A),
BasicAutomata.MakeString("X"));
Automaton expected = BasicOperations.Concatenate(a1,
Join(SEP_A, S2a("b")));
//toDot(actual);
Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
}

[Test]
public virtual void TestSynOverHole2()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("xyz", 1, 1), Token("abc", 0, 3), Token("def", 2, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("xyz", 1, 1),
Token("abc", 0, 3),
Token("def", 2, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton expected = BasicOperations.Union(Join(S2a("xyz"), SEP_A, HOLE_A, SEP_A, S2a("def")), BasicAutomata.MakeString("abc"));
Automaton expected = BasicOperations.Union(
Join(S2a("xyz"), SEP_A, HOLE_A, SEP_A, S2a("def")),
BasicAutomata.MakeString("abc"));
Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
}

[Test]
public virtual void TestOverlappedTokensLattice2()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("abc", 1, 1), Token("xyz", 0, 3), Token("def", 1, 1), Token("ghi", 1, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("abc", 1, 1),
Token("xyz", 0, 3),
Token("def", 1, 1),
Token("ghi", 1, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton a1 = BasicAutomata.MakeString("xyz");
Automaton a2 = Join("abc", "def", "ghi");
Expand All @@ -606,14 +672,18 @@ public virtual void TestToDot()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("abc", 1, 1, 0, 4) });
StringWriter w = new StringWriter();
(new TokenStreamToDot("abcd", ts, (TextWriter)(w))).ToDot();
new TokenStreamToDot("abcd", ts, (TextWriter)(w)).ToDot();
Assert.IsTrue(w.ToString().IndexOf("abc / abcd", StringComparison.Ordinal) != -1);
}

[Test]
public virtual void TestStartsWithHole()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("abc", 2, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("abc", 2, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton expected = Join(HOLE_A, SEP_A, S2a("abc"));
//toDot(actual);
Expand All @@ -625,10 +695,15 @@ public virtual void TestStartsWithHole()
[Test]
public virtual void TestSynHangingOverEnd()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("a", 1, 1), Token("X", 0, 10) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("a", 1, 1),
Token("X", 0, 10)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton expected = BasicOperations.Union(BasicAutomata.MakeString("a"), BasicAutomata.MakeString("X"));
Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
}
}
}
}
22 changes: 20 additions & 2 deletions src/Lucene.Net.Tests/Analysis/TestLookaheadTokenFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,25 @@ public virtual void TestMissedFirstToken()
return new TokenStreamComponents(source, filter);
});

AssertAnalyzesTo(analyzer, "Only he who is running knows .", new string[] { "Only", "Only-huh?", "he", "he-huh?", "who", "who-huh?", "is", "is-huh?", "running", "running-huh?", "knows", "knows-huh?", ".", ".-huh?" });
AssertAnalyzesTo(analyzer,
"Only he who is running knows .",
new string[]
{
"Only",
"Only-huh?",
"he",
"he-huh?",
"who",
"who-huh?",
"is",
"is-huh?",
"running",
"running-huh?",
"knows",
"knows-huh?",
".",
".-huh?"
});
}
}
}
}
Loading
Loading