diff --git a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CategoryPathUtils.cs b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CategoryPathUtils.cs index 8dd51fd25e..096c6f691b 100644 --- a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CategoryPathUtils.cs +++ b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CategoryPathUtils.cs @@ -58,7 +58,8 @@ public static int HashCodeOfSerialized(CharBlockArray charBlockArray, int offset for (int i = 0; i < length; i++) { int len = charBlockArray[offset++]; - hash = hash * 31 + charBlockArray.Subsequence(offset, len).GetHashCode(); // LUCENENET: Corrected 2nd Subsequence parameter + // LUCENENET specific - calculate the hash code without the allocation caused by Subsequence + hash = hash * 31 + charBlockArray.GetHashCode(offset, len); // LUCENENET: Corrected 2nd parameter offset += len; } return hash; @@ -88,7 +89,8 @@ public static bool EqualsToSerialized(FacetLabel cp, CharBlockArray charBlockArr return false; } - if (!cp.Components[i].Equals(charBlockArray.Subsequence(offset, len).ToString(), StringComparison.Ordinal)) // LUCENENET: Corrected 2nd Subsequence parameter + // LUCENENET specific - calculate the hash code without the allocation caused by Subsequence() and ToString() + if (!charBlockArray.Equals(offset, len, cp.Components[i].AsSpan())) // LUCENENET: Corrected 2nd parameter { return false; } diff --git a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs index 8fa8b40975..3331091be9 100644 --- a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs +++ b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs @@ -5,6 +5,7 @@ using System; using System.Collections.Generic; using System.IO; +using System.Runtime.CompilerServices; using System.Text; using JCG = J2N.Collections.Generic; @@ -120,11 +121,13 @@ private void AddBlock() this.blocks.Add(this.current); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] internal virtual int BlockIndex(int index) { return index / blockSize; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] internal virtual int IndexInBlock(int index) { return index % blockSize; @@ -317,5 +320,74 @@ public static CharBlockArray Open(Stream @in) { return new CharBlockArray(@in); } + + + // LUCENENET specific - Lucene allocated memory using Subsequence and + // then called hashCode(), which calculated based on the value of the subsequence. + // However, in .NET this uses the indexer of the StringBuilder that Subsequence returned, + // which is super slow + // (see: https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.chars). + // But this operation doesn't require an allocation at all if we simply calculate the + // value based off of the chars that are in the CharArrayBlock. + // + // This is a combination of Subsequence(int, int) and the J2N.Text.CharSequenceComparer.Ordinal.GetHashCode() + // implementation. The hash code calculated must be kept in sync with the J2N implementation + // (which originated in Apache Harmony) in order to return the correct result. + internal int GetHashCode(int startIndex, int length) + { + if (length == 0) + return 0; + int hash = 0; + int remaining = length; + int blockIdx = BlockIndex(startIndex); + int indexInBlock = IndexInBlock(startIndex); + while (remaining > 0) + { + Block b = blocks[blockIdx++]; + int numToCheck = Math.Min(remaining, b.length - indexInBlock); + int end = indexInBlock + numToCheck; + var chars = b.chars; + for (int i = indexInBlock; i < end; i++) + { + // Hash code calculation from J2N/Apache Harmony + hash = chars[i] + ((hash << 5) - hash); + } + remaining -= numToCheck; + indexInBlock = 0; // 2nd+ iterations read from start of the block + } + return hash; + } + + /// + /// Compares a slice of this to + /// for binary (ordinal) equality. Does not allocate any memory. + /// + /// LUCENENET specific. + /// + /// The start index of this . + /// The length of characters to compare. + /// The other character sequence to check for equality. + /// true if the two character sequences are equal; otherwise false + internal bool Equals(int startIndex, int length, ReadOnlySpan other) + { + if (other.Length != length) return false; + + int remaining = length; + int blockIdx = BlockIndex(startIndex); + int indexInBlock = IndexInBlock(startIndex); + int otherIndex = 0; + while (remaining > 0) + { + Block b = blocks[blockIdx++]; + int numToCheck = Math.Min(remaining, b.length - indexInBlock); + var charsToCheck = b.chars.AsSpan(indexInBlock, numToCheck); + if (!other.Slice(otherIndex, numToCheck).Equals(charsToCheck, StringComparison.Ordinal)) + return false; + remaining -= numToCheck; + otherIndex += numToCheck; + indexInBlock = 0; // 2nd+ iterations read from start of the block + } + return true; + } } } \ No newline at end of file diff --git a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CompactLabelToOrdinal.cs b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CompactLabelToOrdinal.cs index 4b766e93d8..2f6ed655bb 100644 --- a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CompactLabelToOrdinal.cs +++ b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CompactLabelToOrdinal.cs @@ -441,7 +441,8 @@ internal static CompactLabelToOrdinal Open(FileInfo file, float loadFactor, int for (int i = 0; i < length; i++) { int len = (ushort)l2o.labelRepository[offset++]; - hash = hash * 31 + l2o.labelRepository.Subsequence(offset, len).GetHashCode(); // LUCENENET: Corrected 2nd Subsequence parameter + // LUCENENET specific - calculate the hash code without the allocation caused by Subsequence + hash = hash * 31 + l2o.labelRepository.GetHashCode(offset, len); // LUCENENET: Corrected 2nd parameter offset += len; } }