diff --git a/src/AssociationRegistry.Admin.Api/DuplicateDetection/SearchDuplicateVerenigingDetectionService.cs b/src/AssociationRegistry.Admin.Api/DuplicateDetection/SearchDuplicateVerenigingDetectionService.cs index bb8aca8d4..2e0a42567 100644 --- a/src/AssociationRegistry.Admin.Api/DuplicateDetection/SearchDuplicateVerenigingDetectionService.cs +++ b/src/AssociationRegistry.Admin.Api/DuplicateDetection/SearchDuplicateVerenigingDetectionService.cs @@ -23,24 +23,32 @@ public SearchDuplicateVerenigingDetectionService(IElasticClient client) public async Task> GetDuplicates(VerenigingsNaam naam, Locatie[] locaties) { var locatiesMetAdres = locaties.Where(l => l.Adres is not null).ToArray(); + if (locatiesMetAdres.Length == 0) return Array.Empty(); var postcodes = locatiesMetAdres.Select(l => l.Adres!.Postcode).ToArray(); var gemeentes = locatiesMetAdres.Select(l => l.Adres!.Gemeente).ToArray(); + _client.Indices.Refresh(new RefreshRequest()); + var searchResponse = await _client .SearchAsync( - s => s.Query( - q => q.Bool( - b => b.Must(must => must.Match(m => FuzzyMatchOpNaam(m, f => f.Naam, naam))) - .Filter(f => f.Bool( - fb => fb.Should(MatchGemeente(gemeentes), - MatchPostcode(postcodes)) - .MinimumShouldMatch(1)))))); + s => s + .Size(50) + .Query( + q => q.Bool( + b => b.Must(must => must + .Match(m => FuzzyMatchOpNaam(m, path: f => f.Naam, naam)) + ) + .Filter(f => f.Bool( + fb => fb.Should( + MatchGemeente(gemeentes), + MatchPostcode(postcodes) + ) + .MinimumShouldMatch(1)))))); - return searchResponse.Documents.Select(ToDuplicateVereniging) - .ToArray(); + return searchResponse.Documents.Select(ToDuplicateVereniging).ToArray(); } private static Func, QueryContainer> MatchPostcode(string[] postcodes) @@ -67,9 +75,9 @@ private static Func, QueryC .Query(nq => nq .Match(m => FuzzyMatchOpNaam(m, - f => f.Locaties - .First() - .Gemeente, string.Join( + path: f => f.Locaties + .First() + .Gemeente, string.Join( separator: " ", gemeentes)) ) @@ -81,15 +89,12 @@ private static MatchQueryDescriptor FuzzyMatchOpNaam MatchQueryDescriptor m, Expression> path, string query) - { - return m - .Field(path) - .Query(query) - .Analyzer(DuplicateDetectionDocumentMapping - .DuplicateAnalyzer) - .Fuzziness(Fuzziness.Auto) // Assumes this analyzer applies lowercase and asciifolding - .MinimumShouldMatch("90%"); - } + => m + .Field(path) + .Query(query) + .Analyzer(DuplicateDetectionDocumentMapping.DuplicateAnalyzer) + .Fuzziness(Fuzziness.Auto) // Assumes this analyzer applies lowercase and asciifolding + .MinimumShouldMatch("70%"); private static DuplicaatVereniging ToDuplicateVereniging(DuplicateDetectionDocument document) => new( diff --git a/src/AssociationRegistry.Admin.ProjectionHost/Infrastructure/Extensions/ElasticClientExtensions.cs b/src/AssociationRegistry.Admin.ProjectionHost/Infrastructure/Extensions/ElasticClientExtensions.cs index 055c6956c..7768536e7 100644 --- a/src/AssociationRegistry.Admin.ProjectionHost/Infrastructure/Extensions/ElasticClientExtensions.cs +++ b/src/AssociationRegistry.Admin.ProjectionHost/Infrastructure/Extensions/ElasticClientExtensions.cs @@ -31,7 +31,7 @@ private static AnalyzersDescriptor AddDuplicateDetectionAnalyzer(AnalyzersDescri => ad.Custom(DuplicateDetectionDocumentMapping.DuplicateAnalyzer, selector: ca => ca - .Tokenizer("standard") - .Filters("lowercase", "asciifolding", "dutch_stop") + .Tokenizer("lowercase") + .Filters("asciifolding", "dutch_stop") ); }