diff --git a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CategoryPathUtils.cs b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CategoryPathUtils.cs
index 8dd51fd25e..096c6f691b 100644
--- a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CategoryPathUtils.cs
+++ b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CategoryPathUtils.cs
@@ -58,7 +58,8 @@ public static int HashCodeOfSerialized(CharBlockArray charBlockArray, int offset
for (int i = 0; i < length; i++)
{
int len = charBlockArray[offset++];
- hash = hash * 31 + charBlockArray.Subsequence(offset, len).GetHashCode(); // LUCENENET: Corrected 2nd Subsequence parameter
+ // LUCENENET specific - calculate the hash code without the allocation caused by Subsequence
+ hash = hash * 31 + charBlockArray.GetHashCode(offset, len); // LUCENENET: Corrected 2nd parameter
offset += len;
}
return hash;
@@ -88,7 +89,8 @@ public static bool EqualsToSerialized(FacetLabel cp, CharBlockArray charBlockArr
return false;
}
- if (!cp.Components[i].Equals(charBlockArray.Subsequence(offset, len).ToString(), StringComparison.Ordinal)) // LUCENENET: Corrected 2nd Subsequence parameter
+ // LUCENENET specific - calculate the hash code without the allocation caused by Subsequence() and ToString()
+ if (!charBlockArray.Equals(offset, len, cp.Components[i].AsSpan())) // LUCENENET: Corrected 2nd parameter
{
return false;
}
diff --git a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs
index 8fa8b40975..3331091be9 100644
--- a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs
+++ b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs
@@ -5,6 +5,7 @@
using System;
using System.Collections.Generic;
using System.IO;
+using System.Runtime.CompilerServices;
using System.Text;
using JCG = J2N.Collections.Generic;
@@ -120,11 +121,13 @@ private void AddBlock()
this.blocks.Add(this.current);
}
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal virtual int BlockIndex(int index)
{
return index / blockSize;
}
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal virtual int IndexInBlock(int index)
{
return index % blockSize;
@@ -317,5 +320,74 @@ public static CharBlockArray Open(Stream @in)
{
return new CharBlockArray(@in);
}
+
+
+ // LUCENENET specific - Lucene allocated memory using Subsequence and
+ // then called hashCode(), which calculated based on the value of the subsequence.
+ // However, in .NET this uses the indexer of the StringBuilder that Subsequence returned,
+ // which is super slow
+ // (see: https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.chars).
+ // But this operation doesn't require an allocation at all if we simply calculate the
+ // value based off of the chars that are in the CharArrayBlock.
+ //
+ // This is a combination of Subsequence(int, int) and the J2N.Text.CharSequenceComparer.Ordinal.GetHashCode()
+ // implementation. The hash code calculated must be kept in sync with the J2N implementation
+ // (which originated in Apache Harmony) in order to return the correct result.
+ internal int GetHashCode(int startIndex, int length)
+ {
+ if (length == 0)
+ return 0;
+ int hash = 0;
+ int remaining = length;
+ int blockIdx = BlockIndex(startIndex);
+ int indexInBlock = IndexInBlock(startIndex);
+ while (remaining > 0)
+ {
+ Block b = blocks[blockIdx++];
+ int numToCheck = Math.Min(remaining, b.length - indexInBlock);
+ int end = indexInBlock + numToCheck;
+ var chars = b.chars;
+ for (int i = indexInBlock; i < end; i++)
+ {
+ // Hash code calculation from J2N/Apache Harmony
+ hash = chars[i] + ((hash << 5) - hash);
+ }
+ remaining -= numToCheck;
+ indexInBlock = 0; // 2nd+ iterations read from start of the block
+ }
+ return hash;
+ }
+
+ ///
+ /// Compares a slice of this to
+ /// for binary (ordinal) equality. Does not allocate any memory.
+ ///
+ /// LUCENENET specific.
+ ///
+ /// The start index of this .
+ /// The length of characters to compare.
+ /// The other character sequence to check for equality.
+ /// true if the two character sequences are equal; otherwise false
+ internal bool Equals(int startIndex, int length, ReadOnlySpan other)
+ {
+ if (other.Length != length) return false;
+
+ int remaining = length;
+ int blockIdx = BlockIndex(startIndex);
+ int indexInBlock = IndexInBlock(startIndex);
+ int otherIndex = 0;
+ while (remaining > 0)
+ {
+ Block b = blocks[blockIdx++];
+ int numToCheck = Math.Min(remaining, b.length - indexInBlock);
+ var charsToCheck = b.chars.AsSpan(indexInBlock, numToCheck);
+ if (!other.Slice(otherIndex, numToCheck).Equals(charsToCheck, StringComparison.Ordinal))
+ return false;
+ remaining -= numToCheck;
+ otherIndex += numToCheck;
+ indexInBlock = 0; // 2nd+ iterations read from start of the block
+ }
+ return true;
+ }
}
}
\ No newline at end of file
diff --git a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CompactLabelToOrdinal.cs b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CompactLabelToOrdinal.cs
index 4b766e93d8..2f6ed655bb 100644
--- a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CompactLabelToOrdinal.cs
+++ b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CompactLabelToOrdinal.cs
@@ -441,7 +441,8 @@ internal static CompactLabelToOrdinal Open(FileInfo file, float loadFactor, int
for (int i = 0; i < length; i++)
{
int len = (ushort)l2o.labelRepository[offset++];
- hash = hash * 31 + l2o.labelRepository.Subsequence(offset, len).GetHashCode(); // LUCENENET: Corrected 2nd Subsequence parameter
+ // LUCENENET specific - calculate the hash code without the allocation caused by Subsequence
+ hash = hash * 31 + l2o.labelRepository.GetHashCode(offset, len); // LUCENENET: Corrected 2nd parameter
offset += len;
}
}