From 67cc87309e804ebd0519ba4be332933483dd3ac9 Mon Sep 17 00:00:00 2001 From: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> Date: Thu, 4 Apr 2024 22:52:41 +0200 Subject: [PATCH 01/19] gather searchables independentyl and sum them up Signed-off-by: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> --- .../conquery/apiv1/FilterTemplate.java | 2 +- .../com/bakdata/conquery/apiv1/LabelMap.java | 70 ++++++++++++++++++ .../conquery/models/datasets/Column.java | 2 +- .../models/datasets/concepts/Searchable.java | 19 +---- .../filters/specific/SelectFilter.java | 72 ++++--------------- .../models/jobs/UpdateFilterSearchJob.java | 13 ++-- .../conquery/models/query/FilterSearch.java | 38 +++++----- .../resources/api/ConceptsProcessor.java | 30 ++++---- 8 files changed, 125 insertions(+), 121 deletions(-) create mode 100644 backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/FilterTemplate.java b/backend/src/main/java/com/bakdata/conquery/apiv1/FilterTemplate.java index 03cda498ba..df4925e718 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/FilterTemplate.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/FilterTemplate.java @@ -40,7 +40,7 @@ @ToString @Slf4j @CPSType(id = "CSV_TEMPLATE", base = SearchIndex.class) -public class FilterTemplate extends IdentifiableImpl implements Searchable, SearchIndex { +public class FilterTemplate extends IdentifiableImpl implements Searchable, SearchIndex { private static final long serialVersionUID = 1L; diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java b/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java new file mode 100644 index 0000000000..45361026af --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java @@ -0,0 +1,70 @@ +package com.bakdata.conquery.apiv1; + +import java.util.List; +import java.util.stream.Collectors; + +import com.bakdata.conquery.apiv1.frontend.FrontendValue; +import com.bakdata.conquery.io.storage.NamespaceStorage; +import com.bakdata.conquery.models.config.IndexConfig; +import com.bakdata.conquery.models.datasets.concepts.Searchable; +import com.bakdata.conquery.models.identifiable.ids.specific.FilterId; +import com.bakdata.conquery.models.query.FilterSearch; +import com.bakdata.conquery.util.search.TrieSearch; +import com.google.common.collect.BiMap; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.experimental.Delegate; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.time.StopWatch; + +@Getter +@RequiredArgsConstructor +@Slf4j +public class LabelMap implements Searchable { + + private final FilterId id; + @Delegate + private final BiMap delegate; + private final int minSuffixLength; + private final boolean generateSearchSuffixes; + + @Override + public TrieSearch createTrieSearch(IndexConfig config, NamespaceStorage storage) { + + final TrieSearch search = config.createTrieSearch(true); + + final List collected = delegate.entrySet().stream() + .map(entry -> new FrontendValue(entry.getKey(), entry.getValue())) + .collect(Collectors.toList()); + + if (log.isTraceEnabled()) { + log.trace("Labels for {}: `{}`", getId(), collected.stream().map(FrontendValue::toString).collect(Collectors.toList())); + } + + StopWatch timer = StopWatch.createStarted(); + log.trace("START-SELECT ADDING_ITEMS for {}", getId()); + + collected.forEach(feValue -> search.addItem(feValue, FilterSearch.extractKeywords(feValue))); + + log.trace("DONE-SELECT ADDING_ITEMS for {} in {}", getId(), timer); + + timer.reset(); + log.trace("START-SELECT SHRINKING for {}", getId()); + + search.shrinkToFit(); + + log.trace("DONE-SELECT SHRINKING for {} in {}", getId(), timer); + + return search; + } + + @Override + public boolean isGenerateSuffixes() { + return generateSearchSuffixes; + } + + @Override + public boolean isSearchDisabled() { + return false; + } +} diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java index 6859976095..b0a167ce76 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java @@ -26,7 +26,7 @@ @Setter @NoArgsConstructor @Slf4j -public class Column extends Labeled implements NamespacedIdentifiable, Searchable { +public class Column extends Labeled implements NamespacedIdentifiable, Searchable { public static final int UNKNOWN_POSITION = -1; diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java index ee8c13d5ab..190d9047a2 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java @@ -7,38 +7,21 @@ import com.bakdata.conquery.apiv1.frontend.FrontendValue; import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.models.config.IndexConfig; -import com.bakdata.conquery.models.datasets.Dataset; -import com.bakdata.conquery.models.identifiable.Identifiable; -import com.bakdata.conquery.models.identifiable.ids.Id; import com.bakdata.conquery.models.query.FilterSearch; import com.bakdata.conquery.util.search.TrieSearch; -import com.fasterxml.jackson.annotation.JsonIgnore; /** * @implNote This class is tightly coupled with {@link FilterSearch} and {@link com.bakdata.conquery.models.datasets.concepts.filters.specific.SelectFilter}. *

* Searchable classes describe how a search should be constructed, and provide the values with getSearchValues. */ -public interface Searchable>> extends Identifiable { - - public Dataset getDataset(); +public interface Searchable { /** * All available {@link FrontendValue}s for searching in a {@link TrieSearch}. */ TrieSearch createTrieSearch(IndexConfig config, NamespaceStorage storage); - /** - * The actual Searchables to use, if there is potential for deduplication/pooling. - * - * @implSpec The order of objects returned is used to also sort search results from different sources. - */ - @JsonIgnore - default List> getSearchReferences() { - //Hopefully the only candidate will be Column - return List.of(this); - } - /** * Parameter used in the construction of {@link com.bakdata.conquery.util.search.TrieSearch}, defining the shortest suffix to create. * Ignored if isGenerateSuffixes is true. diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java index a3aab711a1..3e5a155abe 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java @@ -7,20 +7,16 @@ import java.util.stream.Collectors; import com.bakdata.conquery.apiv1.FilterTemplate; +import com.bakdata.conquery.apiv1.LabelMap; import com.bakdata.conquery.apiv1.frontend.FrontendFilterConfiguration; import com.bakdata.conquery.apiv1.frontend.FrontendValue; import com.bakdata.conquery.io.jackson.View; import com.bakdata.conquery.io.jackson.serializer.NsIdRef; -import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.models.config.ConqueryConfig; -import com.bakdata.conquery.models.config.IndexConfig; import com.bakdata.conquery.models.datasets.concepts.Searchable; import com.bakdata.conquery.models.datasets.concepts.filters.SingleColumnFilter; import com.bakdata.conquery.models.events.MajorTypeId; import com.bakdata.conquery.models.exceptions.ConceptConfigurationException; -import com.bakdata.conquery.models.identifiable.ids.specific.FilterId; -import com.bakdata.conquery.models.query.FilterSearch; -import com.bakdata.conquery.util.search.TrieSearch; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.google.common.collect.BiMap; @@ -30,7 +26,6 @@ import lombok.NoArgsConstructor; import lombok.Setter; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.time.StopWatch; import org.jetbrains.annotations.NotNull; @Setter @@ -38,7 +33,7 @@ @NoArgsConstructor @Slf4j @JsonIgnoreProperties({"searchType"}) -public abstract class SelectFilter extends SingleColumnFilter implements Searchable { +public abstract class SelectFilter extends SingleColumnFilter { /** * user given mapping from the values in the columns to shown labels @@ -71,19 +66,25 @@ public void configureFrontend(FrontendFilterConfiguration.Top f, ConqueryConfig @JsonIgnore public abstract String getFilterType(); - @Override - public List> getSearchReferences() { - final List> out = new ArrayList<>(); + + /** + * The actual Searchables to use, if there is potential for deduplication/pooling. + * + * @implSpec The order of objects returned is used to also sort search results from different sources. + */ + @JsonIgnore + public List getSearchReferences() { + final List out = new ArrayList<>(); if (getTemplate() != null) { out.add(getTemplate()); } if (!labels.isEmpty()) { - out.add(this); + out.add(new LabelMap(getId(), labels, searchMinSuffixLength, generateSearchSuffixes)); } - out.addAll(getColumn().getSearchReferences()); + out.add(getColumn()); return out; } @@ -105,51 +106,4 @@ public boolean isNotUsingTemplateAndLabels() { return (getTemplate() == null) != labels.isEmpty(); } - - @Override - @JsonIgnore - public boolean isGenerateSuffixes() { - return generateSearchSuffixes; - } - - @Override - @JsonIgnore - public int getMinSuffixLength() { - return searchMinSuffixLength; - } - - /** - * Does not make sense to distinguish at Filter level since it's only referenced when labels are set. - */ - @Override - @JsonIgnore - public boolean isSearchDisabled() { - return false; - } - - @Override - public TrieSearch createTrieSearch(IndexConfig config, NamespaceStorage storage) { - - final TrieSearch search = config.createTrieSearch(true); - - if(log.isTraceEnabled()) { - log.trace("Labels for {}: `{}`", getId(), collectLabels().stream().map(FrontendValue::toString).collect(Collectors.toList())); - } - - StopWatch timer = StopWatch.createStarted(); - log.trace("START-SELECT ADDING_ITEMS for {}", getId()); - - collectLabels().forEach(feValue -> search.addItem(feValue, FilterSearch.extractKeywords(feValue))); - - log.trace("DONE-SELECT ADDING_ITEMS for {} in {}", getId(), timer); - - timer.reset(); - log.trace("START-SELECT SHRINKING for {}", getId()); - - search.shrinkToFit(); - - log.trace("DONE-SELECT SHRINKING for {} in {}", getId(), timer); - - return search; - } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java b/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java index c23d065b9b..9ec67f02eb 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java +++ b/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java @@ -1,6 +1,5 @@ package com.bakdata.conquery.models.jobs; -import java.time.Duration; import java.util.Arrays; import java.util.Collection; import java.util.Collections; @@ -71,7 +70,7 @@ public void execute() throws Exception { // Unfortunately the is no ClassToInstanceMultimap yet - final Map, Set>> collectedSearchables = + final Map, Set> collectedSearchables = allSelectFilters.stream() .map(SelectFilter::getSearchReferences) .flatMap(Collection::stream) @@ -85,12 +84,12 @@ public void execute() throws Exception { // Most computations are cheap but data intensive: we fork here to use as many cores as possible. final ExecutorService service = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() - 1); - final HashMap, TrieSearch> searchCache = new HashMap<>(); - final Map, TrieSearch> synchronizedResult = Collections.synchronizedMap(searchCache); + final HashMap> searchCache = new HashMap<>(); + final Map> synchronizedResult = Collections.synchronizedMap(searchCache); log.debug("Found {} searchable Objects.", collectedSearchables.values().stream().mapToLong(Set::size).sum()); - for (Searchable searchable : collectedSearchables.getOrDefault(Searchable.class, Collections.emptySet())) { + for (Searchable searchable : collectedSearchables.getOrDefault(Searchable.class, Collections.emptySet())) { if (searchable instanceof Column column) { throw new IllegalStateException("Columns should have been grouped out previously"); } @@ -99,7 +98,7 @@ public void execute() throws Exception { final StopWatch watch = StopWatch.createStarted(); - log.info("BEGIN collecting entries for `{}`", searchable.getId()); + log.info("BEGIN collecting entries for `{}`", searchable); try { final TrieSearch search = searchable.createTrieSearch(indexConfig, storage); @@ -109,7 +108,7 @@ public void execute() throws Exception { log.debug( "DONE collecting {} entries for `{}`, within {}", search.calculateSize(), - searchable.getId(), + searchable, watch ); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java b/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java index 8c6522eb3c..378403fb1b 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java @@ -40,8 +40,8 @@ public class FilterSearch { * In the code below, the keys of this map will usually be called "reference". */ @JsonIgnore - private Map, TrieSearch> searchCache = new HashMap<>(); - private Object2LongMap> totals = Object2LongMaps.emptyMap(); + private Map> searchCache = new HashMap<>(); + private Object2LongMap totals = Object2LongMaps.emptyMap(); /** * From a given {@link FrontendValue} extract all relevant keywords. @@ -62,11 +62,11 @@ public static List extractKeywords(FrontendValue value) { /** * For a {@link SelectFilter} collect all relevant {@link TrieSearch}. */ - public final List> getSearchesFor(Searchable searchable) { - final List> references = searchable.getSearchReferences(); + public final List> getSearchesFor(SelectFilter searchable) { + final List references = searchable.getSearchReferences(); if (log.isTraceEnabled()) { - log.trace("Got {} as searchables for {}", references.stream().map(Searchable::getId).collect(Collectors.toList()), searchable.getId()); + log.trace("Got {} as searchables for {}", references.stream().map(Searchable::toString).collect(Collectors.toList()), searchable.getId()); } return references.stream() @@ -75,15 +75,15 @@ public final List> getSearchesFor(Searchable search .collect(Collectors.toList()); } - public long getTotal(Searchable searchable) { - return totals.getOrDefault(searchable, 0); + public long getTotal(SelectFilter searchable) { + return searchable.getSearchReferences().stream().mapToLong(totals::getLong).sum(); } /** * Add ready searches to the cache. This assumes that the search already has been shrunken. */ - public synchronized void addSearches(Map, TrieSearch> searchCache) { + public synchronized void addSearches(Map> searchCache) { this.searchCache.putAll(searchCache); log.debug("BEGIN counting Search totals."); @@ -97,7 +97,7 @@ public synchronized void addSearches(Map, TrieSearch searchable, Collection values) { + public void registerValues(Searchable searchable, Collection values) { TrieSearch search = searchCache.computeIfAbsent(searchable, (ignored) -> searchable.createTrieSearch(indexConfig, storage)); synchronized (search) { @@ -110,8 +110,8 @@ public void registerValues(Searchable searchable, Collection values) } } - private void updateTotals(Map, TrieSearch> searchCache) { - final Object2LongOpenHashMap> partialTotals = new Object2LongOpenHashMap<>(searchCache.keySet().stream() + private void updateTotals(Map> searchCache) { + final Object2LongOpenHashMap partialTotals = new Object2LongOpenHashMap<>(searchCache.keySet().stream() .collect(Collectors.toMap( Functions.identity(), this::getTotalFor @@ -119,21 +119,17 @@ private void updateTotals(Map, TrieSearch> searchCa totals.putAll(partialTotals); } - private long getTotalFor(Searchable searchable) { - return searchable.getSearchReferences() - .stream() - .map(searchCache::get) - .filter(Objects::nonNull) // Failed or disabled searches are null - .flatMap(TrieSearch::stream) - .mapToInt(FrontendValue::hashCode) - .distinct() - .count(); + private long getTotalFor(Searchable searchable) { + return searchCache.get(searchable).stream() + .mapToInt(FrontendValue::hashCode) // TODO Use distinct directly? + .distinct() + .count(); } /** * Shrink the memory footprint of a search. After this action, no values can be registered anymore to a search. */ - public void shrinkSearch(Searchable searchable) { + public void shrinkSearch(Searchable searchable) { final TrieSearch search = getSearchCache().get(searchable); if (search == null) { diff --git a/backend/src/main/java/com/bakdata/conquery/resources/api/ConceptsProcessor.java b/backend/src/main/java/com/bakdata/conquery/resources/api/ConceptsProcessor.java index a1a16e1696..02b13d50e2 100644 --- a/backend/src/main/java/com/bakdata/conquery/resources/api/ConceptsProcessor.java +++ b/backend/src/main/java/com/bakdata/conquery/resources/api/ConceptsProcessor.java @@ -30,8 +30,6 @@ import com.bakdata.conquery.models.datasets.PreviewConfig; import com.bakdata.conquery.models.datasets.concepts.Concept; import com.bakdata.conquery.models.datasets.concepts.FrontEndConceptBuilder; -import com.bakdata.conquery.models.datasets.concepts.Searchable; -import com.bakdata.conquery.models.datasets.concepts.filters.Filter; import com.bakdata.conquery.models.datasets.concepts.filters.specific.SelectFilter; import com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeChild; import com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept; @@ -79,14 +77,14 @@ public FrontendList load(Concept concept) { /** * Cache of all search results on SelectFilters. */ - private final LoadingCache, String>, List> + private final LoadingCache, String>, List> searchResults = CacheBuilder.newBuilder().softValues().build(new CacheLoader<>() { @Override - public List load(Pair, String> filterAndSearch) { + public List load(Pair, String> filterAndSearch) { final String searchTerm = filterAndSearch.getValue(); - final Searchable searchable = filterAndSearch.getKey(); + final SelectFilter searchable = filterAndSearch.getKey(); log.trace("Calculating a new search cache for the term \"{}\" on Searchable[{}]", searchTerm, searchable.getId()); @@ -98,9 +96,9 @@ public List load(Pair, String> filterAndSearch) { * Cache of raw listing of values on a filter. * We use Cursor here to reduce strain on memory and increase response time. */ - private final LoadingCache, CursorAndLength> listResults = CacheBuilder.newBuilder().softValues().build(new CacheLoader<>() { + private final LoadingCache, CursorAndLength> listResults = CacheBuilder.newBuilder().softValues().build(new CacheLoader<>() { @Override - public CursorAndLength load(Searchable searchable) { + public CursorAndLength load(SelectFilter searchable) { log.trace("Creating cursor for `{}`", searchable.getId()); return new CursorAndLength(listAllValues(searchable), countAllValues(searchable)); } @@ -160,7 +158,7 @@ public FrontendPreviewConfig getEntityPreviewFrontendConfig(Dataset dataset) { * Search for all search terms at once, with stricter scoring. * The user will upload a file and expect only well-corresponding resolutions. */ - public ResolvedFilterValues resolveFilterValues(Searchable searchable, List searchTerms) { + public ResolvedFilterValues resolveFilterValues(SelectFilter searchable, List searchTerms) { // search in the full text engine final Set openSearchTerms = new HashSet<>(searchTerms); @@ -184,13 +182,17 @@ public ResolvedFilterValues resolveFilterValues(Searchable searchable, List searchable, Optional maybeText, OptionalInt pageNumberOpt, OptionalInt itemsPerPageOpt) { + public AutoCompleteResult autocompleteTextFilter( + SelectFilter searchable, + Optional maybeText, + OptionalInt pageNumberOpt, + OptionalInt itemsPerPageOpt + ) { final int pageNumber = pageNumberOpt.orElse(0); final int itemsPerPage = itemsPerPageOpt.orElse(50); @@ -226,7 +228,7 @@ public AutoCompleteResult autocompleteTextFilter(Searchable searchable, Optio } } - private Cursor listAllValues(Searchable searchable) { + private Cursor listAllValues(SelectFilter searchable) { final Namespace namespace = namespaces.get(searchable.getDataset().getId()); /* Don't worry, I am as confused as you are! @@ -251,7 +253,7 @@ private Cursor listAllValues(Searchable searchable) { return new Cursor<>(Iterators.filter(iterators, seen::add)); } - private long countAllValues(Searchable searchable) { + private long countAllValues(SelectFilter searchable) { final Namespace namespace = namespaces.get(searchable.getDataset().getId()); return namespace.getFilterSearch().getTotal(searchable); @@ -261,7 +263,7 @@ private long countAllValues(Searchable searchable) { * Autocompletion for search terms. For values of {@link SelectFilter }. * Is used by the serach cache to load missing items */ - private List autocompleteTextFilter(Searchable searchable, String text) { + private List autocompleteTextFilter(SelectFilter searchable, String text) { final Namespace namespace = namespaces.get(searchable.getDataset().getId()); // Note that FEValues is equals/hashcode only on value: From bfd703a15322f6072bbeba3895ebdb5f836cef20 Mon Sep 17 00:00:00 2001 From: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> Date: Mon, 8 Apr 2024 08:46:59 +0200 Subject: [PATCH 02/19] removes unecessary members from FilterSearch Signed-off-by: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> --- .../java/com/bakdata/conquery/apiv1/FilterTemplate.java | 3 +-- .../main/java/com/bakdata/conquery/apiv1/LabelMap.java | 3 +-- .../java/com/bakdata/conquery/mode/NamespaceHandler.java | 2 +- .../java/com/bakdata/conquery/models/datasets/Column.java | 3 +-- .../conquery/models/datasets/concepts/Searchable.java | 3 +-- .../conquery/models/jobs/UpdateFilterSearchJob.java | 6 +++--- .../com/bakdata/conquery/models/query/FilterSearch.java | 8 +------- 7 files changed, 9 insertions(+), 19 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/FilterTemplate.java b/backend/src/main/java/com/bakdata/conquery/apiv1/FilterTemplate.java index df4925e718..955b7cb42c 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/FilterTemplate.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/FilterTemplate.java @@ -8,7 +8,6 @@ import com.bakdata.conquery.apiv1.frontend.FrontendValue; import com.bakdata.conquery.io.cps.CPSType; import com.bakdata.conquery.io.jackson.serializer.NsIdRef; -import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.models.config.IndexConfig; import com.bakdata.conquery.models.datasets.Dataset; import com.bakdata.conquery.models.datasets.concepts.Searchable; @@ -90,7 +89,7 @@ public boolean isSearchDisabled() { return false; } - public TrieSearch createTrieSearch(IndexConfig config, NamespaceStorage storage) { + public TrieSearch createTrieSearch(IndexConfig config) { final URI resolvedURI = FileUtil.getResolvedUri(config.getBaseUrl(), getFilePath()); log.trace("Resolved filter template reference url for search '{}': {}", this.getId(), resolvedURI); diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java b/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java index 45361026af..44dff45a8c 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java @@ -4,7 +4,6 @@ import java.util.stream.Collectors; import com.bakdata.conquery.apiv1.frontend.FrontendValue; -import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.models.config.IndexConfig; import com.bakdata.conquery.models.datasets.concepts.Searchable; import com.bakdata.conquery.models.identifiable.ids.specific.FilterId; @@ -29,7 +28,7 @@ public class LabelMap implements Searchable { private final boolean generateSearchSuffixes; @Override - public TrieSearch createTrieSearch(IndexConfig config, NamespaceStorage storage) { + public TrieSearch createTrieSearch(IndexConfig config) { final TrieSearch search = config.createTrieSearch(true); diff --git a/backend/src/main/java/com/bakdata/conquery/mode/NamespaceHandler.java b/backend/src/main/java/com/bakdata/conquery/mode/NamespaceHandler.java index 2d5740e4d1..8f3e4eb1e4 100644 --- a/backend/src/main/java/com/bakdata/conquery/mode/NamespaceHandler.java +++ b/backend/src/main/java/com/bakdata/conquery/mode/NamespaceHandler.java @@ -46,7 +46,7 @@ static NamespaceSetupData createNamespaceSetup(NamespaceStorage storage, final C JobManager jobManager = new JobManager(storage.getDataset().getName(), config.isFailOnError()); - FilterSearch filterSearch = new FilterSearch(storage, jobManager, config.getCsv(), config.getIndex()); + FilterSearch filterSearch = new FilterSearch(config.getIndex()); return new NamespaceSetupData(injectables, indexService, communicationMapper, preprocessMapper, jobManager, filterSearch); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java index b0a167ce76..5a7180976f 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java @@ -5,7 +5,6 @@ import com.bakdata.conquery.apiv1.frontend.FrontendValue; import com.bakdata.conquery.io.jackson.serializer.NsIdRef; -import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.models.config.IndexConfig; import com.bakdata.conquery.models.datasets.concepts.Searchable; import com.bakdata.conquery.models.events.MajorTypeId; @@ -75,7 +74,7 @@ public Dataset getDataset() { * We create only an empty search here, because the content is provided through {@link com.bakdata.conquery.models.messages.namespaces.specific.RegisterColumnValues} and filled by the caller. */ @Override - public TrieSearch createTrieSearch(IndexConfig config, NamespaceStorage storage) { + public TrieSearch createTrieSearch(IndexConfig config) { return config.createTrieSearch(isGenerateSuffixes()); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java index 190d9047a2..931655fd94 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java @@ -5,7 +5,6 @@ import javax.validation.constraints.Min; import com.bakdata.conquery.apiv1.frontend.FrontendValue; -import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.models.config.IndexConfig; import com.bakdata.conquery.models.query.FilterSearch; import com.bakdata.conquery.util.search.TrieSearch; @@ -20,7 +19,7 @@ public interface Searchable { /** * All available {@link FrontendValue}s for searching in a {@link TrieSearch}. */ - TrieSearch createTrieSearch(IndexConfig config, NamespaceStorage storage); + TrieSearch createTrieSearch(IndexConfig config); /** * Parameter used in the construction of {@link com.bakdata.conquery.util.search.TrieSearch}, defining the shortest suffix to create. diff --git a/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java b/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java index 9ec67f02eb..8313b4de2d 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java +++ b/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java @@ -90,7 +90,7 @@ public void execute() throws Exception { log.debug("Found {} searchable Objects.", collectedSearchables.values().stream().mapToLong(Set::size).sum()); for (Searchable searchable : collectedSearchables.getOrDefault(Searchable.class, Collections.emptySet())) { - if (searchable instanceof Column column) { + if (searchable instanceof Column) { throw new IllegalStateException("Columns should have been grouped out previously"); } @@ -101,7 +101,7 @@ public void execute() throws Exception { log.info("BEGIN collecting entries for `{}`", searchable); try { - final TrieSearch search = searchable.createTrieSearch(indexConfig, storage); + final TrieSearch search = searchable.createTrieSearch(indexConfig); synchronizedResult.put(searchable, search); @@ -119,7 +119,7 @@ public void execute() throws Exception { }); } - // The following cast is save + // The following cast is safe final Set searchableColumns = (Set) collectedSearchables.getOrDefault(Column.class, Collections.emptySet()); log.debug("Start collecting column values: {}", Arrays.toString(searchableColumns.toArray())); registerColumnValuesInSearch.accept(searchableColumns); diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java b/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java index 378403fb1b..89a8311e7e 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java @@ -9,12 +9,9 @@ import java.util.stream.Collectors; import com.bakdata.conquery.apiv1.frontend.FrontendValue; -import com.bakdata.conquery.io.storage.NamespaceStorage; -import com.bakdata.conquery.models.config.CSVConfig; import com.bakdata.conquery.models.config.IndexConfig; import com.bakdata.conquery.models.datasets.concepts.Searchable; import com.bakdata.conquery.models.datasets.concepts.filters.specific.SelectFilter; -import com.bakdata.conquery.models.jobs.JobManager; import com.bakdata.conquery.util.search.TrieSearch; import com.fasterxml.jackson.annotation.JsonIgnore; import com.google.common.base.Functions; @@ -29,9 +26,6 @@ @Data public class FilterSearch { - private final NamespaceStorage storage; - private final JobManager jobManager; - private final CSVConfig parserConfig; private final IndexConfig indexConfig; /** @@ -98,7 +92,7 @@ public synchronized void addSearches(Map> * prevents from adding new values. */ public void registerValues(Searchable searchable, Collection values) { - TrieSearch search = searchCache.computeIfAbsent(searchable, (ignored) -> searchable.createTrieSearch(indexConfig, storage)); + TrieSearch search = searchCache.computeIfAbsent(searchable, (ignored) -> searchable.createTrieSearch(indexConfig)); synchronized (search) { values.stream() From fff0bbab87d70be083094c51293e03046ae686ae Mon Sep 17 00:00:00 2001 From: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> Date: Mon, 8 Apr 2024 11:01:03 +0200 Subject: [PATCH 03/19] generate hashcode for LabelMap Signed-off-by: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> --- backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java b/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java index 44dff45a8c..51663d3dfd 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java @@ -10,6 +10,7 @@ import com.bakdata.conquery.models.query.FilterSearch; import com.bakdata.conquery.util.search.TrieSearch; import com.google.common.collect.BiMap; +import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.experimental.Delegate; @@ -19,6 +20,7 @@ @Getter @RequiredArgsConstructor @Slf4j +@EqualsAndHashCode public class LabelMap implements Searchable { private final FilterId id; From 85a86e39db7d9e333ce0e67c944142db376cb94c Mon Sep 17 00:00:00 2001 From: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> Date: Mon, 8 Apr 2024 11:03:23 +0200 Subject: [PATCH 04/19] adds FilterSearchTest Signed-off-by: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> --- .../filters/specific/SelectFilter.java | 9 ++- .../models/jobs/UpdateFilterSearchJob.java | 2 +- .../conquery/models/query/FilterSearch.java | 13 ++-- .../conquery/service/FilterSearchTest.java | 69 +++++++++++++++++++ 4 files changed, 85 insertions(+), 8 deletions(-) create mode 100644 backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java index 3e5a155abe..6e192b13b1 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java @@ -47,6 +47,9 @@ public abstract class SelectFilter extends SingleColumnFilter private int searchMinSuffixLength = 3; private boolean generateSearchSuffixes = true; + @JsonIgnore + private transient LabelMap searchableLabels; + @Override public EnumSet getAcceptedColumnTypes() { return EnumSet.of(MajorTypeId.STRING); @@ -81,7 +84,11 @@ public List getSearchReferences() { } if (!labels.isEmpty()) { - out.add(new LabelMap(getId(), labels, searchMinSuffixLength, generateSearchSuffixes)); + if (searchableLabels == null) { + // No synchronization here should be fine LabelMap and its delegate BiMap implement hash code + searchableLabels = new LabelMap(getId(), labels, searchMinSuffixLength, generateSearchSuffixes); + } + out.add(searchableLabels); } out.add(getColumn()); diff --git a/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java b/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java index 8313b4de2d..ef893d86ab 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java +++ b/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java @@ -59,7 +59,7 @@ public void execute() throws Exception { log.info("BEGIN loading SourceSearch"); - // collect all SelectFilters to the create searches for them + // collect all SelectFilters to create searches for them final List> allSelectFilters = storage.getAllConcepts().stream() .flatMap(c -> c.getConnectors().stream()) diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java b/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java index 89a8311e7e..64fc5a77fb 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java @@ -16,16 +16,17 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.google.common.base.Functions; import it.unimi.dsi.fastutil.objects.Object2LongMap; -import it.unimi.dsi.fastutil.objects.Object2LongMaps; import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap; -import lombok.Data; +import lombok.Getter; +import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @Slf4j -@Data +@RequiredArgsConstructor public class FilterSearch { + @Getter private final IndexConfig indexConfig; /** @@ -35,7 +36,7 @@ public class FilterSearch { */ @JsonIgnore private Map> searchCache = new HashMap<>(); - private Object2LongMap totals = Object2LongMaps.emptyMap(); + private Object2LongMap totals = new Object2LongOpenHashMap<>(); /** * From a given {@link FrontendValue} extract all relevant keywords. @@ -124,7 +125,7 @@ private long getTotalFor(Searchable searchable) { * Shrink the memory footprint of a search. After this action, no values can be registered anymore to a search. */ public void shrinkSearch(Searchable searchable) { - final TrieSearch search = getSearchCache().get(searchable); + final TrieSearch search = searchCache.get(searchable); if (search == null) { log.warn("Searchable has no search associated: {}", searchable); @@ -135,6 +136,6 @@ public void shrinkSearch(Searchable searchable) { public synchronized void clearSearch() { totals = new Object2LongOpenHashMap<>(); - searchCache.clear(); + searchCache = new HashMap<>(); } } diff --git a/backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java b/backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java new file mode 100644 index 0000000000..cf58a8305a --- /dev/null +++ b/backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java @@ -0,0 +1,69 @@ +package com.bakdata.conquery.service; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.List; +import java.util.Map; + +import com.bakdata.conquery.models.config.IndexConfig; +import com.bakdata.conquery.models.datasets.Column; +import com.bakdata.conquery.models.datasets.Dataset; +import com.bakdata.conquery.models.datasets.Table; +import com.bakdata.conquery.models.datasets.concepts.filters.specific.SelectFilter; +import com.bakdata.conquery.models.datasets.concepts.filters.specific.SingleSelectFilter; +import com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeConnector; +import com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept; +import com.bakdata.conquery.models.query.FilterSearch; +import com.google.common.collect.ImmutableBiMap; +import org.junit.jupiter.api.Test; + +public class FilterSearchTest { + + @Test + public void totals() { + final IndexConfig indexConfig = new IndexConfig(); + FilterSearch search = new FilterSearch(indexConfig); + + // Column Searchable + SelectFilter filter = new SingleSelectFilter(); + ConceptTreeConnector connector = new ConceptTreeConnector(); + TreeConcept concept = new TreeConcept(); + Column column = new Column(); + Table table = new Table(); + Dataset dataset = new Dataset("test_dataset"); + + table.setName("test_table"); + table.setDataset(dataset); + concept.setDataset(dataset); + concept.setName("test_concept"); + concept.setConnectors(List.of(connector)); + connector.setName("test_connector"); + connector.setFilters(List.of(filter)); + connector.setConcept(concept); + column.setTable(table); + column.setName("test_column"); + filter.setColumn(column); + filter.setConnector(connector); + + + // Map Searchable + filter.setLabels(ImmutableBiMap.of( + "mm", "MM", + "nn", "NN" + )); + + // Register + filter.getSearchReferences().forEach(searchable -> { + search.addSearches(Map.of(searchable, searchable.createTrieSearch(indexConfig))); + }); + + search.registerValues(column, List.of( + "a", + "bb", + "cc" + )); + search.shrinkSearch(column); + + assertThat(search.getTotal(filter)).isEqualTo(5); + } +} From 8d4c0271055b2d087865b8f00c1c009b31169d49 Mon Sep 17 00:00:00 2001 From: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> Date: Mon, 8 Apr 2024 11:33:52 +0200 Subject: [PATCH 05/19] request all search totals after UpdateFilterSearchJob Signed-off-by: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> --- .../models/jobs/UpdateFilterSearchJob.java | 18 ++++++---- .../specific/CollectColumnValuesJob.java | 6 ++++ .../specific/FinalizeReactionMessage.java | 7 +++- .../conquery/models/query/FilterSearch.java | 33 ++++--------------- 4 files changed, 31 insertions(+), 33 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java b/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java index ef893d86ab..e980c21b1f 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java +++ b/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java @@ -27,6 +27,7 @@ import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.time.StopWatch; +import org.jetbrains.annotations.NotNull; /** * Job that initializes the filter search for the frontend. @@ -61,12 +62,7 @@ public void execute() throws Exception { // collect all SelectFilters to create searches for them final List> allSelectFilters = - storage.getAllConcepts().stream() - .flatMap(c -> c.getConnectors().stream()) - .flatMap(co -> co.collectAllFilters().stream()) - .filter(SelectFilter.class::isInstance) - .map(f -> ((SelectFilter) f)) - .collect(Collectors.toList()); + getAllSelectFilters(storage); // Unfortunately the is no ClassToInstanceMultimap yet @@ -145,6 +141,16 @@ public void execute() throws Exception { } + @NotNull + public static List> getAllSelectFilters(NamespaceStorage storage) { + return storage.getAllConcepts().stream() + .flatMap(c -> c.getConnectors().stream()) + .flatMap(co -> co.collectAllFilters().stream()) + .filter(SelectFilter.class::isInstance) + .map(f -> ((SelectFilter) f)) + .collect(Collectors.toList()); + } + @Override public String getLabel() { return "UpdateFilterSearchJob"; diff --git a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java index 58058b92f9..84a9bdef53 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java +++ b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java @@ -16,6 +16,7 @@ import com.bakdata.conquery.models.datasets.Table; import com.bakdata.conquery.models.events.Bucket; import com.bakdata.conquery.models.events.stores.root.StringStore; +import com.bakdata.conquery.models.jobs.UpdateFilterSearchJob; import com.bakdata.conquery.models.messages.namespaces.ActionReactionMessage; import com.bakdata.conquery.models.messages.namespaces.NamespacedMessage; import com.bakdata.conquery.models.messages.namespaces.WorkerMessage; @@ -103,5 +104,10 @@ public void afterAllReaction() { log.debug("{} shrinking searches", this); final FilterSearch filterSearch = namespace.getFilterSearch(); columns.forEach(filterSearch::shrinkSearch); + + + log.info("BEGIN counting Search totals."); + UpdateFilterSearchJob.getAllSelectFilters(namespace.getStorage()).forEach(namespace.getFilterSearch()::getTotal); + log.debug("FINISHED counting Search totals."); } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/FinalizeReactionMessage.java b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/FinalizeReactionMessage.java index fb383ccacc..e047a56963 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/FinalizeReactionMessage.java +++ b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/FinalizeReactionMessage.java @@ -5,6 +5,7 @@ import com.bakdata.conquery.io.cps.CPSType; import com.bakdata.conquery.models.identifiable.ids.specific.WorkerId; import com.bakdata.conquery.models.messages.ReactionMessage; +import com.bakdata.conquery.models.messages.namespaces.ActionReactionMessage; import com.bakdata.conquery.models.messages.namespaces.NamespaceMessage; import com.bakdata.conquery.models.messages.namespaces.NamespacedMessage; import com.bakdata.conquery.models.worker.DistributedNamespace; @@ -14,12 +15,16 @@ import lombok.ToString; import lombok.extern.slf4j.Slf4j; + +/** + * Use {@link ActionReactionMessage#afterAllReaction()} to processing on initiator side after all reactions where collected. + */ @CPSType(id = "FINALIZE_REACTION_MESSAGE", base = NamespacedMessage.class) @AllArgsConstructor(onConstructor_ = @JsonCreator) @Getter @Slf4j @ToString -public class FinalizeReactionMessage extends NamespaceMessage implements ReactionMessage { +public final class FinalizeReactionMessage extends NamespaceMessage implements ReactionMessage { private UUID callerId; diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java b/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java index 64fc5a77fb..46273c979f 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java @@ -14,7 +14,6 @@ import com.bakdata.conquery.models.datasets.concepts.filters.specific.SelectFilter; import com.bakdata.conquery.util.search.TrieSearch; import com.fasterxml.jackson.annotation.JsonIgnore; -import com.google.common.base.Functions; import it.unimi.dsi.fastutil.objects.Object2LongMap; import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap; import lombok.Getter; @@ -36,7 +35,7 @@ public class FilterSearch { */ @JsonIgnore private Map> searchCache = new HashMap<>(); - private Object2LongMap totals = new Object2LongOpenHashMap<>(); + private Object2LongMap> totals = new Object2LongOpenHashMap<>(); /** * From a given {@link FrontendValue} extract all relevant keywords. @@ -70,8 +69,12 @@ public final List> getSearchesFor(SelectFilter sear .collect(Collectors.toList()); } - public long getTotal(SelectFilter searchable) { - return searchable.getSearchReferences().stream().mapToLong(totals::getLong).sum(); + public long getTotal(SelectFilter filter) { + return totals.computeIfAbsent(filter, (f) -> filter.getSearchReferences().stream() + .map(searchCache::get) + .flatMap(TrieSearch::stream) + .distinct() + .count()); } @@ -81,9 +84,6 @@ public long getTotal(SelectFilter searchable) { public synchronized void addSearches(Map> searchCache) { this.searchCache.putAll(searchCache); - log.debug("BEGIN counting Search totals."); - // Precompute totals as that can be slow when doing it on-demand. - updateTotals(searchCache); } @@ -99,28 +99,9 @@ public void registerValues(Searchable searchable, Collection values) { values.stream() .map(value -> new FrontendValue(value, value)) .forEach(value -> search.addItem(value, extractKeywords(value))); - - // Update totals for search - totals.put(searchable, getTotalFor(searchable)); } } - private void updateTotals(Map> searchCache) { - final Object2LongOpenHashMap partialTotals = new Object2LongOpenHashMap<>(searchCache.keySet().stream() - .collect(Collectors.toMap( - Functions.identity(), - this::getTotalFor - ))); - totals.putAll(partialTotals); - } - - private long getTotalFor(Searchable searchable) { - return searchCache.get(searchable).stream() - .mapToInt(FrontendValue::hashCode) // TODO Use distinct directly? - .distinct() - .count(); - } - /** * Shrink the memory footprint of a search. After this action, no values can be registered anymore to a search. */ From 1ec91d8d36ad5fec484f45255a63b8335ca92e5e Mon Sep 17 00:00:00 2001 From: awildturtok <1553491+awildturtok@users.noreply.github.com> Date: Mon, 8 Apr 2024 11:35:13 +0200 Subject: [PATCH 06/19] Update backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java --- .../java/com/bakdata/conquery/service/FilterSearchTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java b/backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java index cf58a8305a..825c13df7c 100644 --- a/backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java +++ b/backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java @@ -60,7 +60,8 @@ public void totals() { search.registerValues(column, List.of( "a", "bb", - "cc" + "cc", + "mm" )); search.shrinkSearch(column); From ece2ced46d0215feaff8c129e51a6e71e3686430 Mon Sep 17 00:00:00 2001 From: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> Date: Mon, 8 Apr 2024 11:40:32 +0200 Subject: [PATCH 07/19] don't cache label map in SelectFilter Signed-off-by: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> --- .../datasets/concepts/filters/specific/SelectFilter.java | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java index 6e192b13b1..3e5a155abe 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java @@ -47,9 +47,6 @@ public abstract class SelectFilter extends SingleColumnFilter private int searchMinSuffixLength = 3; private boolean generateSearchSuffixes = true; - @JsonIgnore - private transient LabelMap searchableLabels; - @Override public EnumSet getAcceptedColumnTypes() { return EnumSet.of(MajorTypeId.STRING); @@ -84,11 +81,7 @@ public List getSearchReferences() { } if (!labels.isEmpty()) { - if (searchableLabels == null) { - // No synchronization here should be fine LabelMap and its delegate BiMap implement hash code - searchableLabels = new LabelMap(getId(), labels, searchMinSuffixLength, generateSearchSuffixes); - } - out.add(searchableLabels); + out.add(new LabelMap(getId(), labels, searchMinSuffixLength, generateSearchSuffixes)); } out.add(getColumn()); From 78de7950492e63b20ac9f350d2723d87f91f67d5 Mon Sep 17 00:00:00 2001 From: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> Date: Mon, 8 Apr 2024 11:57:40 +0200 Subject: [PATCH 08/19] optimize imports Signed-off-by: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> --- .../bakdata/conquery/models/datasets/concepts/Searchable.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java index 931655fd94..b6c53a1544 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java @@ -1,7 +1,5 @@ package com.bakdata.conquery.models.datasets.concepts; -import java.util.List; - import javax.validation.constraints.Min; import com.bakdata.conquery.apiv1.frontend.FrontendValue; From 2eb9ec60f34e81a196fb2017d0234f017bd302c9 Mon Sep 17 00:00:00 2001 From: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> Date: Mon, 8 Apr 2024 12:43:45 +0200 Subject: [PATCH 09/19] puts search finalizer in job Signed-off-by: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> --- .../specific/CollectColumnValuesJob.java | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java index 84a9bdef53..32f1f03b97 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java +++ b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java @@ -16,6 +16,7 @@ import com.bakdata.conquery.models.datasets.Table; import com.bakdata.conquery.models.events.Bucket; import com.bakdata.conquery.models.events.stores.root.StringStore; +import com.bakdata.conquery.models.jobs.SimpleJob; import com.bakdata.conquery.models.jobs.UpdateFilterSearchJob; import com.bakdata.conquery.models.messages.namespaces.ActionReactionMessage; import com.bakdata.conquery.models.messages.namespaces.NamespacedMessage; @@ -94,20 +95,29 @@ public void react(Worker context) throws Exception { log.debug("Still waiting for jobs: {} of {} done", done.get(), futures.size()); } } - + log.info("Finished collecting values from these columns: {}", Arrays.toString(columns.toArray())); context.send(new FinalizeReactionMessage(getMessageId(), context.getInfo().getId())); } @Override public void afterAllReaction() { - log.debug("{} shrinking searches", this); - final FilterSearch filterSearch = namespace.getFilterSearch(); - columns.forEach(filterSearch::shrinkSearch); - - log.info("BEGIN counting Search totals."); - UpdateFilterSearchJob.getAllSelectFilters(namespace.getStorage()).forEach(namespace.getFilterSearch()::getTotal); - log.debug("FINISHED counting Search totals."); + // Run this in a job, so it is definitely processed after UpdateFilterSearchJob + namespace.getJobManager().addSlowJob( + new SimpleJob( + "Finalize Search update", + () -> { + log.debug("{} shrinking searches", this); + final FilterSearch filterSearch = namespace.getFilterSearch(); + columns.forEach(filterSearch::shrinkSearch); + + + log.info("BEGIN counting Search totals."); + UpdateFilterSearchJob.getAllSelectFilters(namespace.getStorage()).forEach(namespace.getFilterSearch()::getTotal); + log.debug("FINISHED counting Search totals."); + } + ) + ); } } From 9fc8a41f391066a715a8caf1c02d914e282d1846 Mon Sep 17 00:00:00 2001 From: awildturtok <1553491+awildturtok@users.noreply.github.com> Date: Mon, 8 Apr 2024 13:44:46 +0200 Subject: [PATCH 10/19] fix casting of query result preview --- .../conquery/apiv1/QueryProcessor.java | 9 ++++--- .../forms/managed/ManagedInternalForm.java | 7 +++--- .../conquery/models/query/ManagedQuery.java | 2 +- .../models/query/SingleTableResult.java | 8 +++++-- .../query/preview/EntityPreviewExecution.java | 9 ++++--- .../query/statistics/ResultStatistics.java | 24 +++++++++++-------- .../conquery/resources/api/QueryResource.java | 3 +-- 7 files changed, 33 insertions(+), 29 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/QueryProcessor.java b/backend/src/main/java/com/bakdata/conquery/apiv1/QueryProcessor.java index 8ab5c1629a..77d97a265f 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/QueryProcessor.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/QueryProcessor.java @@ -555,15 +555,14 @@ public Stream> resolveEntities(Subject subject, List resultInfos = query.getResultInfos(); + public ResultStatistics getResultStatistics(SingleTableResult managedQuery) { + final List resultInfos = managedQuery.getResultInfos(); final Optional dateInfo = - query.getResultInfos().stream().filter(info -> info.getSemantics().contains(new SemanticType.EventDateT())).findFirst(); + resultInfos.stream().filter(info -> info.getSemantics().contains(new SemanticType.EventDateT())).findFirst(); - final int dateIndex = dateInfo.map(resultInfos::indexOf).orElse(0 /*Discarded if dateInfo is not present*/); + final Optional dateIndex = dateInfo.map(resultInfos::indexOf); final Locale locale = I18n.LOCALE.get(); final NumberFormat decimalFormat = NumberFormat.getNumberInstance(locale); diff --git a/backend/src/main/java/com/bakdata/conquery/models/forms/managed/ManagedInternalForm.java b/backend/src/main/java/com/bakdata/conquery/models/forms/managed/ManagedInternalForm.java index 5e4170944a..367fec4626 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/forms/managed/ManagedInternalForm.java +++ b/backend/src/main/java/com/bakdata/conquery/models/forms/managed/ManagedInternalForm.java @@ -31,7 +31,6 @@ import com.bakdata.conquery.models.query.results.EntityResult; import com.bakdata.conquery.models.query.results.FormShardResult; import com.bakdata.conquery.models.worker.DistributedNamespace; -import com.bakdata.conquery.models.worker.Namespace; import com.fasterxml.jackson.annotation.JacksonInject; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.OptBoolean; @@ -111,8 +110,8 @@ public void start() { } @Override - public List generateColumnDescriptions(boolean isInitialized, Namespace namespace, ConqueryConfig config) { - return subQueries.values().iterator().next().generateColumnDescriptions(isInitialized, namespace, config); + public List generateColumnDescriptions(boolean isInitialized, ConqueryConfig config) { + return subQueries.values().iterator().next().generateColumnDescriptions(isInitialized, config); } @@ -131,7 +130,7 @@ protected void setAdditionalFieldsForStatusWithColumnDescription(Subject subject return; } ManagedQuery subQuery = subQueries.entrySet().iterator().next().getValue(); - status.setColumnDescriptions(subQuery.generateColumnDescriptions(isInitialized(), getNamespace(), getConfig())); + status.setColumnDescriptions(subQuery.generateColumnDescriptions(isInitialized(), getConfig())); } @Override diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/ManagedQuery.java b/backend/src/main/java/com/bakdata/conquery/models/query/ManagedQuery.java index 460fefa906..d1215721e5 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/ManagedQuery.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/ManagedQuery.java @@ -110,7 +110,7 @@ public void setStatusBase(@NonNull Subject subject, @NonNull ExecutionStatus sta protected void setAdditionalFieldsForStatusWithColumnDescription(Subject subject, FullExecutionStatus status) { if (columnDescriptions == null) { - columnDescriptions = generateColumnDescriptions(isInitialized(), getNamespace(), getConfig()); + columnDescriptions = generateColumnDescriptions(isInitialized(), getConfig()); } status.setColumnDescriptions(columnDescriptions); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/SingleTableResult.java b/backend/src/main/java/com/bakdata/conquery/models/query/SingleTableResult.java index 64fd65be1d..87f7c625d9 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/SingleTableResult.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/SingleTableResult.java @@ -18,13 +18,13 @@ public interface SingleTableResult { - default List generateColumnDescriptions(boolean isInitialized, Namespace namespace, ConqueryConfig config) { + default List generateColumnDescriptions(boolean isInitialized, ConqueryConfig config) { Preconditions.checkArgument(isInitialized, "The execution must have been initialized first"); List columnDescriptions = new ArrayList<>(); final Locale locale = I18n.LOCALE.get(); - PrintSettings settings = new PrintSettings(true, locale, namespace, config, null); + PrintSettings settings = new PrintSettings(true, locale, getNamespace(), config, null); UniqueNamer uniqNamer = new UniqueNamer(settings); @@ -53,4 +53,8 @@ default List generateColumnDescriptions(boolean isInitialized, @JsonIgnore long resultRowCount(); + @JsonIgnore + Namespace getNamespace(); + + } diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/preview/EntityPreviewExecution.java b/backend/src/main/java/com/bakdata/conquery/models/query/preview/EntityPreviewExecution.java index 40eb592659..df62608dfb 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/preview/EntityPreviewExecution.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/preview/EntityPreviewExecution.java @@ -42,7 +42,6 @@ import com.bakdata.conquery.models.query.results.MultilineEntityResult; import com.bakdata.conquery.models.types.ResultType; import com.bakdata.conquery.models.types.SemanticType; -import com.bakdata.conquery.models.worker.Namespace; import com.fasterxml.jackson.annotation.JacksonInject; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.OptBoolean; @@ -397,12 +396,12 @@ private Object[] getCompleteLine(EntityResult entityResult) { } protected void setAdditionalFieldsForStatusWithColumnDescription(Subject subject, FullExecutionStatus status) { - status.setColumnDescriptions(generateColumnDescriptions(isInitialized(), getNamespace(), getConfig())); + status.setColumnDescriptions(generateColumnDescriptions(isInitialized(), getConfig())); } @Override - public List generateColumnDescriptions(boolean isInitialized, Namespace namespace, ConqueryConfig config) { - final List descriptors = getValuesQuery().generateColumnDescriptions(isInitialized, namespace, config); + public List generateColumnDescriptions(boolean isInitialized, ConqueryConfig config) { + final List descriptors = getValuesQuery().generateColumnDescriptions(isInitialized, config); for (ColumnDescriptor descriptor : descriptors) { // Add grouping semantics to secondaryIds to group by @@ -432,7 +431,7 @@ private ManagedQuery getValuesQuery() { @Override protected void setAdditionalFieldsForStatusWithSource(Subject subject, FullExecutionStatus status) { - status.setColumnDescriptions(generateColumnDescriptions(isInitialized(), getNamespace(), getConfig())); + status.setColumnDescriptions(generateColumnDescriptions(isInitialized(), getConfig())); } @Override diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/statistics/ResultStatistics.java b/backend/src/main/java/com/bakdata/conquery/models/query/statistics/ResultStatistics.java index ba6ea6e54d..2e6b4894ff 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/statistics/ResultStatistics.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/statistics/ResultStatistics.java @@ -14,7 +14,6 @@ import com.bakdata.conquery.models.common.Range; import com.bakdata.conquery.models.common.daterange.CDateRange; import com.bakdata.conquery.models.config.ConqueryConfig; -import com.bakdata.conquery.models.query.ManagedQuery; import com.bakdata.conquery.models.query.PrintSettings; import com.bakdata.conquery.models.query.SingleTableResult; import com.bakdata.conquery.models.query.resultinfo.ResultInfo; @@ -35,28 +34,30 @@ public record ResultStatistics(int entities, int total, List statistics, Range dateRange) { @SneakyThrows @NotNull - public static ResultStatistics collectResultStatistics(ManagedQuery managedQuery, List resultInfos, Optional dateInfo, int dateIndex, PrintSettings printSettings, UniqueNamer uniqueNamer, ConqueryConfig conqueryConfig) { - + public static ResultStatistics collectResultStatistics(SingleTableResult managedQuery, List resultInfos, Optional dateInfo, Optional dateIndex, PrintSettings printSettings, UniqueNamer uniqueNamer, ConqueryConfig conqueryConfig) { //TODO pull inner executor service from ManagerNode - final ListeningExecutorService executorService = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() - 1)); + final ListeningExecutorService + executorService = + MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() - 1)); // Yes, we are actually iterating the result for every job. // Span date-column final ListenableFuture> futureSpan; - if (managedQuery.isContainsDates()) { - futureSpan = executorService.submit(() -> calculateDateSpan(managedQuery, dateInfo, dateIndex)); + final boolean containsDates = dateInfo.isPresent(); + + if (containsDates) { + futureSpan = executorService.submit(() -> calculateDateSpan(managedQuery, dateInfo, dateIndex.get())); } else { futureSpan = Futures.immediateFuture(CDateRange.all().toSimpleRange()); } // Count result lines and entities (may differ in case of form or SecondaryIdQuery) - final ListenableFuture futureLines = - executorService.submit(() -> (int) managedQuery.getQuery().countResults(managedQuery.streamResults(OptionalLong.empty()))); + final ListenableFuture futureLines = executorService.submit(() -> (int) managedQuery.resultRowCount()); final ListenableFuture futureEntities = executorService.submit(() -> (int) managedQuery.streamResults(OptionalLong.empty()).count()); @@ -66,7 +67,7 @@ public static ResultStatistics collectResultStatistics(ManagedQuery managedQuery futureDescriptions = IntStream.range(0, resultInfos.size()) // If the query doesn't contain dates, we can skip the dates-column. - .filter(col -> !resultInfos.get(col).getSemantics().contains(new SemanticType.EventDateT()) || managedQuery.isContainsDates()) + .filter(col -> !resultInfos.get(col).getSemantics().contains(new SemanticType.EventDateT()) || containsDates) .mapToObj(col -> (Callable) () -> { final StopWatch started = StopWatch.createStarted(); @@ -76,7 +77,10 @@ public static ResultStatistics collectResultStatistics(ManagedQuery managedQuery log.trace("BEGIN stats collection for {}", info); - managedQuery.streamResults(OptionalLong.empty()).map(EntityResult::listResultLines).flatMap(List::stream).forEach(line -> statsCollector.consume(line[col])); + managedQuery.streamResults(OptionalLong.empty()) + .map(EntityResult::listResultLines) + .flatMap(List::stream) + .forEach(line -> statsCollector.consume(line[col])); log.trace("DONE collecting values for {}, in {}", info, started); diff --git a/backend/src/main/java/com/bakdata/conquery/resources/api/QueryResource.java b/backend/src/main/java/com/bakdata/conquery/resources/api/QueryResource.java index c9a21d9b7f..44790c6ee1 100644 --- a/backend/src/main/java/com/bakdata/conquery/resources/api/QueryResource.java +++ b/backend/src/main/java/com/bakdata/conquery/resources/api/QueryResource.java @@ -29,7 +29,6 @@ import com.bakdata.conquery.models.auth.permissions.Ability; import com.bakdata.conquery.models.execution.ExecutionState; import com.bakdata.conquery.models.execution.ManagedExecution; -import com.bakdata.conquery.models.query.ManagedQuery; import com.bakdata.conquery.models.query.SingleTableResult; import io.dropwizard.auth.Auth; import io.dropwizard.jersey.PATCH; @@ -74,7 +73,7 @@ public Response getDescription(@Auth Subject subject, @PathParam(QUERY) ManagedE return Response.status(Response.Status.CONFLICT.getStatusCode(), "Query is still running.").build(); // Request was submitted too early. } - return Response.ok((processor.getResultStatistics(((ManagedQuery) query)))).build(); + return Response.ok((processor.getResultStatistics(((SingleTableResult) query)))).build(); } @PATCH From 50a392aefa14452bd270b57b1e428bbbce4eb134 Mon Sep 17 00:00:00 2001 From: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> Date: Mon, 8 Apr 2024 22:14:00 +0200 Subject: [PATCH 11/19] move sorting out of disabled searchables from the job to the filter Before the search job finalizer tried to compute totals on disabled searchables which had no associated trie search, hence a NPE was thrown Signed-off-by: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> --- .../concepts/filters/specific/SelectFilter.java | 6 ++++-- .../models/jobs/UpdateFilterSearchJob.java | 4 ---- .../specific/CollectColumnValuesJob.java | 14 +++++++++++--- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java index 3e5a155abe..d8087f2aa6 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java @@ -76,7 +76,7 @@ public void configureFrontend(FrontendFilterConfiguration.Top f, ConqueryConfig public List getSearchReferences() { final List out = new ArrayList<>(); - if (getTemplate() != null) { + if (getTemplate() != null && !getTemplate().isSearchDisabled()) { out.add(getTemplate()); } @@ -84,7 +84,9 @@ public List getSearchReferences() { out.add(new LabelMap(getId(), labels, searchMinSuffixLength, generateSearchSuffixes)); } - out.add(getColumn()); + if (!getColumn().isSearchDisabled()) { + out.add(getColumn()); + } return out; } diff --git a/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java b/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java index e980c21b1f..023633f288 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java +++ b/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java @@ -11,7 +11,6 @@ import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; -import java.util.function.Predicate; import java.util.stream.Collectors; import com.bakdata.conquery.apiv1.frontend.FrontendValue; @@ -70,9 +69,6 @@ public void execute() throws Exception { allSelectFilters.stream() .map(SelectFilter::getSearchReferences) .flatMap(Collection::stream) - // Disabling search is only a last resort for when columns are too big to store in memory or process for indexing. - // TODO FK: We want no Searchable to be disabled, better scaling searches or mechanisms to fill search. - .filter(Predicate.not(Searchable::isSearchDisabled)) // Group Searchables into "Columns" and other "Searchables" .collect(Collectors.groupingBy(s -> s instanceof Column ? Column.class : Searchable.class, Collectors.toSet())); diff --git a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java index 32f1f03b97..820087e87a 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java +++ b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java @@ -14,6 +14,7 @@ import com.bakdata.conquery.io.jackson.serializer.NsIdRefCollection; import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.Table; +import com.bakdata.conquery.models.datasets.concepts.filters.specific.SelectFilter; import com.bakdata.conquery.models.events.Bucket; import com.bakdata.conquery.models.events.stores.root.StringStore; import com.bakdata.conquery.models.jobs.SimpleJob; @@ -113,9 +114,16 @@ public void afterAllReaction() { columns.forEach(filterSearch::shrinkSearch); - log.info("BEGIN counting Search totals."); - UpdateFilterSearchJob.getAllSelectFilters(namespace.getStorage()).forEach(namespace.getFilterSearch()::getTotal); - log.debug("FINISHED counting Search totals."); + log.info("BEGIN counting search totals on {}", namespace.getDataset().getId()); + for (SelectFilter filter : UpdateFilterSearchJob.getAllSelectFilters(namespace.getStorage())) { + try { + namespace.getFilterSearch().getTotal(filter); + } + catch (Exception e) { + log.warn("Unable to calculate totals for filter '{}'", filter.getId(), e); + } + } + log.debug("FINISHED counting search totals on {}", namespace.getDataset().getId()); } ) ); From c956935f3f8b73f7206dce833920c8d8c3eab08f Mon Sep 17 00:00:00 2001 From: awildturtok <1553491+awildturtok@users.noreply.github.com> Date: Tue, 9 Apr 2024 11:33:19 +0200 Subject: [PATCH 12/19] drop String-interning in StringStoreString as it causes slow uploads and can be solved by `-XX:+UseStringDeduplication` --- .../models/events/stores/primitive/StringStoreString.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/stores/primitive/StringStoreString.java b/backend/src/main/java/com/bakdata/conquery/models/events/stores/primitive/StringStoreString.java index d4f885e098..05354ddd9d 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/events/stores/primitive/StringStoreString.java +++ b/backend/src/main/java/com/bakdata/conquery/models/events/stores/primitive/StringStoreString.java @@ -27,10 +27,6 @@ public static StringStoreString create(int size) { @JsonCreator public static StringStoreString withInternedStrings(String[] values) { - for (int index = 0; index < values.length; index++) { - values[index] = values[index] != null ? values[index].intern() : null; - } - return new StringStoreString(values); } From 5819348789fcf5a681bab3da9e140084c707c596 Mon Sep 17 00:00:00 2001 From: awildturtok <1553491+awildturtok@users.noreply.github.com> Date: Tue, 9 Apr 2024 14:23:18 +0200 Subject: [PATCH 13/19] use property to make interning configurable --- .../events/stores/primitive/StringStoreString.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/stores/primitive/StringStoreString.java b/backend/src/main/java/com/bakdata/conquery/models/events/stores/primitive/StringStoreString.java index 05354ddd9d..bca5009799 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/events/stores/primitive/StringStoreString.java +++ b/backend/src/main/java/com/bakdata/conquery/models/events/stores/primitive/StringStoreString.java @@ -27,9 +27,20 @@ public static StringStoreString create(int size) { @JsonCreator public static StringStoreString withInternedStrings(String[] values) { + if(shouldIntern()) { + for (int index = 0; index < values.length; index++) { + values[index] = values[index] != null ? values[index].intern() : null; + } + } + return new StringStoreString(values); } + private static boolean shouldIntern() { + //TODO use mixin or properly wire this property + return "yes".equals(System.getProperty("cq.intern", "no")); + } + @Override public boolean has(int event) { return values[event] != null; From 30dc85528fb033a9f02f725a8e8a9081d2de8d29 Mon Sep 17 00:00:00 2001 From: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> Date: Tue, 9 Apr 2024 22:23:48 +0200 Subject: [PATCH 14/19] adds logs and progress reporter to search finalize job Signed-off-by: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> --- .../specific/CollectColumnValuesJob.java | 57 ++++++++++++------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java index 820087e87a..40c16a06db 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java +++ b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java @@ -17,7 +17,7 @@ import com.bakdata.conquery.models.datasets.concepts.filters.specific.SelectFilter; import com.bakdata.conquery.models.events.Bucket; import com.bakdata.conquery.models.events.stores.root.StringStore; -import com.bakdata.conquery.models.jobs.SimpleJob; +import com.bakdata.conquery.models.jobs.Job; import com.bakdata.conquery.models.jobs.UpdateFilterSearchJob; import com.bakdata.conquery.models.messages.namespaces.ActionReactionMessage; import com.bakdata.conquery.models.messages.namespaces.NamespacedMessage; @@ -106,26 +106,45 @@ public void afterAllReaction() { // Run this in a job, so it is definitely processed after UpdateFilterSearchJob namespace.getJobManager().addSlowJob( - new SimpleJob( - "Finalize Search update", - () -> { - log.debug("{} shrinking searches", this); - final FilterSearch filterSearch = namespace.getFilterSearch(); - columns.forEach(filterSearch::shrinkSearch); - - - log.info("BEGIN counting search totals on {}", namespace.getDataset().getId()); - for (SelectFilter filter : UpdateFilterSearchJob.getAllSelectFilters(namespace.getStorage())) { - try { - namespace.getFilterSearch().getTotal(filter); - } - catch (Exception e) { - log.warn("Unable to calculate totals for filter '{}'", filter.getId(), e); - } + new Job() { + + @Override + public void execute() { + + log.debug("{} shrinking searches", this); + final FilterSearch filterSearch = namespace.getFilterSearch(); + columns.forEach(filterSearch::shrinkSearch); + + + log.info("BEGIN counting search totals on {}", namespace.getDataset().getId()); + final List> allSelectFilters = UpdateFilterSearchJob.getAllSelectFilters(namespace.getStorage()); + + getProgressReporter().setMax(allSelectFilters.size()); + + for (SelectFilter filter : allSelectFilters) { + log.trace("Calculate totals for filter: {}", filter.getId()); + try { + final long total = namespace.getFilterSearch().getTotal(filter); + log.trace("Filter '{}' totals: {}", filter, total); + } + catch (Exception e) { + log.warn("Unable to calculate totals for filter '{}'", filter.getId(), e); + } + finally { + getProgressReporter().report(1); } - log.debug("FINISHED counting search totals on {}", namespace.getDataset().getId()); } - ) + + getProgressReporter().done(); + log.debug("FINISHED counting search totals on {}", namespace.getDataset().getId()); + } + + @Override + public String getLabel() { + return "Finalize Search update"; + } + } + ); } } From 2a2b775ad609f5c401431b08c95917075cce4a92 Mon Sep 17 00:00:00 2001 From: awildturtok <1553491+awildturtok@users.noreply.github.com> Date: Wed, 10 Apr 2024 09:51:45 +0200 Subject: [PATCH 15/19] fix humonguous toString --- .../namespaces/specific/RegisterColumnValues.java | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/RegisterColumnValues.java b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/RegisterColumnValues.java index 86cdef9d2f..32587e4bdb 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/RegisterColumnValues.java +++ b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/RegisterColumnValues.java @@ -13,6 +13,7 @@ import com.bakdata.conquery.models.messages.namespaces.NamespacedMessage; import com.bakdata.conquery.models.worker.DistributedNamespace; import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; import lombok.AllArgsConstructor; import lombok.Getter; import lombok.ToString; @@ -34,15 +35,23 @@ public class RegisterColumnValues extends NamespaceMessage implements ReactionMe @NsIdRef private final Column column; + + @ToString.Exclude private final Collection values; + @JsonIgnore + @ToString.Include + public int size() { + return values.size(); + } + @Override public void react(DistributedNamespace context) throws Exception { if (log.isTraceEnabled()) { - log.trace("Registering values for column '{}': {}", column.getId(), Arrays.toString(values.toArray())); + log.trace("Registering {} values for column '{}': {}", size(), column.getId(), Arrays.toString(values.toArray())); } else { - log.debug("Registering {} values for column '{}'", values.size(), column.getId()); + log.debug("Registering {} values for column '{}'", size(), column.getId()); } context.getFilterSearch().registerValues(column, values); From af64097ca46128ad1b5cdb2037d0b92e6cf9e60e Mon Sep 17 00:00:00 2001 From: awildturtok <1553491+awildturtok@users.noreply.github.com> Date: Wed, 10 Apr 2024 10:31:49 +0200 Subject: [PATCH 16/19] shorten full message queue messages without trace --- .../conquery/io/mina/NetworkSession.java | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/io/mina/NetworkSession.java b/backend/src/main/java/com/bakdata/conquery/io/mina/NetworkSession.java index e5d4f91673..c82b8f5dfa 100644 --- a/backend/src/main/java/com/bakdata/conquery/io/mina/NetworkSession.java +++ b/backend/src/main/java/com/bakdata/conquery/io/mina/NetworkSession.java @@ -13,25 +13,29 @@ import lombok.extern.slf4j.Slf4j; import org.apache.mina.core.future.WriteFuture; import org.apache.mina.core.session.IoSession; +import org.jetbrains.annotations.NotNull; @RequiredArgsConstructor @Slf4j public class NetworkSession implements MessageSender> { + public static final int MAX_MESSAGE_LENGTH = 30; + public static final int MAX_QUEUE_LENGTH = 20; @Getter private final IoSession session; - private final LinkedBlockingQueue> queuedMessages = new LinkedBlockingQueue<>(20); + private final LinkedBlockingQueue> queuedMessages = new LinkedBlockingQueue<>(MAX_QUEUE_LENGTH); @Override public WriteFuture send(final NetworkMessage message) { try { while (!queuedMessages.offer(message, 2, TimeUnit.MINUTES)) { - log.debug( - "Waiting for full writing queue for {}\n\tcurrently filled by: {}", - message, - new ArrayList<>(queuedMessages) - .stream() - .map(Objects::toString) - .collect(Collectors.joining("\n\t\t")) + log.debug("Waiting for full writing queue for {} currently filled by:\n\t- {}", + message, + log.isTraceEnabled() + ? new ArrayList<>(queuedMessages).stream() + .map(Objects::toString) + .map(NetworkSession::shorten) + .collect(Collectors.joining("\n\t\t- ")) + : "%s messages".formatted(queuedMessages.size()) ); } } @@ -45,6 +49,16 @@ public WriteFuture send(final NetworkMessage message) { return future; } + @NotNull + private static String shorten(String desc) { + if (desc.length() <= MAX_MESSAGE_LENGTH) { + return desc; + } + + return desc.substring(0, MAX_MESSAGE_LENGTH) + "…"; + + } + @Override public void trySend(final NetworkMessage message) { if (isConnected()) { From 1a5e3f5c3e28067794ed0acf1426ccbc79746beb Mon Sep 17 00:00:00 2001 From: awildturtok <1553491+awildturtok@users.noreply.github.com> Date: Mon, 15 Apr 2024 11:07:48 +0200 Subject: [PATCH 17/19] extract inner class --- .../specific/CollectColumnValuesJob.java | 88 +++++++++++-------- 1 file changed, 49 insertions(+), 39 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java index 40c16a06db..3df723a45b 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java +++ b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java @@ -105,46 +105,56 @@ public void react(Worker context) throws Exception { public void afterAllReaction() { // Run this in a job, so it is definitely processed after UpdateFilterSearchJob - namespace.getJobManager().addSlowJob( - new Job() { - - @Override - public void execute() { - - log.debug("{} shrinking searches", this); - final FilterSearch filterSearch = namespace.getFilterSearch(); - columns.forEach(filterSearch::shrinkSearch); - - - log.info("BEGIN counting search totals on {}", namespace.getDataset().getId()); - final List> allSelectFilters = UpdateFilterSearchJob.getAllSelectFilters(namespace.getStorage()); - - getProgressReporter().setMax(allSelectFilters.size()); - - for (SelectFilter filter : allSelectFilters) { - log.trace("Calculate totals for filter: {}", filter.getId()); - try { - final long total = namespace.getFilterSearch().getTotal(filter); - log.trace("Filter '{}' totals: {}", filter, total); - } - catch (Exception e) { - log.warn("Unable to calculate totals for filter '{}'", filter.getId(), e); - } - finally { - getProgressReporter().report(1); - } - } - - getProgressReporter().done(); - log.debug("FINISHED counting search totals on {}", namespace.getDataset().getId()); - } - - @Override - public String getLabel() { - return "Finalize Search update"; - } + namespace.getJobManager().addSlowJob(new SearchShrinker()); + } + + private class SearchShrinker extends Job { + + @Override + public void execute() { + + final List> allSelectFilters = UpdateFilterSearchJob.getAllSelectFilters(namespace.getStorage()); + final FilterSearch filterSearch = namespace.getFilterSearch(); + + getProgressReporter().setMax(allSelectFilters.size() + columns.size()); + + log.debug("{} shrinking searches", this); + + for (Column column : columns) { + try { + filterSearch.shrinkSearch(column); + } + catch (Exception e) { + log.warn("Unable to shrink search for {}", column, e); } + finally { + getProgressReporter().report(1); + } + } - ); + log.info("BEGIN counting search totals on {}", namespace.getDataset().getId()); + + for (SelectFilter filter : allSelectFilters) { + log.trace("Calculate totals for filter: {}", filter.getId()); + try { + final long total = namespace.getFilterSearch().getTotal(filter); + log.trace("Filter '{}' totals: {}", filter, total); + } + catch (Exception e) { + log.warn("Unable to calculate totals for filter '{}'", filter.getId(), e); + } + finally { + getProgressReporter().report(1); + } + } + + getProgressReporter().done(); + log.debug("FINISHED counting search totals on {}", namespace.getDataset().getId()); + } + + @Override + public String getLabel() { + return "Finalize Search update"; + } } } From 75ff75b0380d7ea345c3ca279f4188faac7433e7 Mon Sep 17 00:00:00 2001 From: awildturtok <1553491+awildturtok@users.noreply.github.com> Date: Mon, 15 Apr 2024 15:41:31 +0200 Subject: [PATCH 18/19] Manually initialise Column.position, as it was kept showing up in flamegraphs. --- .../io/storage/NamespacedStorage.java | 23 ++++++++++--------- .../conquery/models/datasets/Column.java | 9 +++++--- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/io/storage/NamespacedStorage.java b/backend/src/main/java/com/bakdata/conquery/io/storage/NamespacedStorage.java index 3767ce4566..800aae6fa5 100644 --- a/backend/src/main/java/com/bakdata/conquery/io/storage/NamespacedStorage.java +++ b/backend/src/main/java/com/bakdata/conquery/io/storage/NamespacedStorage.java @@ -5,9 +5,6 @@ import java.util.List; import java.util.Set; -import jakarta.validation.ConstraintViolation; -import jakarta.validation.Validator; - import com.bakdata.conquery.io.storage.xodus.stores.SingletonStore; import com.bakdata.conquery.models.config.StoreFactory; import com.bakdata.conquery.models.datasets.Column; @@ -26,6 +23,8 @@ import com.bakdata.conquery.models.identifiable.ids.specific.TableId; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; +import jakarta.validation.ConstraintViolation; +import jakarta.validation.Validator; import lombok.Getter; import lombok.SneakyThrows; import lombok.ToString; @@ -101,14 +100,16 @@ private void decorateSecondaryIdDescriptionStore(IdentifiableStore store) { store.onAdd(table -> { - for (Column c : table.getColumns()) { - getCentralRegistry().register(c); - } - }).onRemove(table -> { - for (Column c : table.getColumns()) { - getCentralRegistry().remove(c); - } - }); + for (Column column : table.getColumns()) { + column.init(); + getCentralRegistry().register(column); + } + }) + .onRemove(table -> { + for (Column c : table.getColumns()) { + getCentralRegistry().remove(c); + } + }); } private void decorateConceptStore(IdentifiableStore> store) { diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java index aa1236e410..b692a8e708 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java @@ -45,8 +45,8 @@ public class Column extends Labeled implements NamespacedIdentifiable< private boolean searchDisabled = false; @JsonIgnore - @Getter(lazy = true) - private final int position = ArrayUtils.indexOf(getTable().getColumns(), this); + private int position = -1; + /** * if this is set this column counts as the secondary id of the given name for this * table @@ -76,7 +76,10 @@ public Dataset getDataset() { */ @Override public TrieSearch createTrieSearch(IndexConfig config, NamespaceStorage storage) { - return config.createTrieSearch(isGenerateSuffixes()); } + + public void init() { + position = ArrayUtils.indexOf(getTable().getColumns(), this); + } } From a6d29ca87b5572545b1445c4a91beedcf16192d5 Mon Sep 17 00:00:00 2001 From: awildturtok <1553491+awildturtok@users.noreply.github.com> Date: Mon, 15 Apr 2024 15:42:21 +0200 Subject: [PATCH 19/19] fix illegal use of getStores --- .../models/query/queryplan/aggregators/ColumnAggregator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/queryplan/aggregators/ColumnAggregator.java b/backend/src/main/java/com/bakdata/conquery/models/query/queryplan/aggregators/ColumnAggregator.java index d47703c3c9..9c4a2dbc02 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/queryplan/aggregators/ColumnAggregator.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/queryplan/aggregators/ColumnAggregator.java @@ -33,7 +33,7 @@ public void collectRequiredTables(Set out) { @Override public boolean isOfInterest(Bucket bucket) { for (Column column : getRequiredColumns()) { - if (!bucket.getStores()[column.getPosition()].isEmpty()) { + if (!bucket.getStore(column).isEmpty()) { return true; } }