Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reintegrate Master #3379

Closed
wants to merge 27 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
67cc873
gather searchables independentyl and sum them up
thoniTUB Apr 4, 2024
bfd703a
removes unecessary members from FilterSearch
thoniTUB Apr 8, 2024
fff0bba
generate hashcode for LabelMap
thoniTUB Apr 8, 2024
85a86e3
adds FilterSearchTest
thoniTUB Apr 8, 2024
8d4c027
request all search totals after UpdateFilterSearchJob
thoniTUB Apr 8, 2024
1ec91d8
Update backend/src/test/java/com/bakdata/conquery/service/FilterSearc…
awildturtok Apr 8, 2024
ece2ced
don't cache label map in SelectFilter
thoniTUB Apr 8, 2024
78de795
optimize imports
thoniTUB Apr 8, 2024
2eb9ec6
puts search finalizer in job
thoniTUB Apr 8, 2024
1310f67
Merge pull request #3376 from ingef/fix/search-totals
awildturtok Apr 8, 2024
9fc8a41
fix casting of query result preview
awildturtok Apr 8, 2024
32d05c4
Merge pull request #3377 from ingef/fix/internal-form-cast
thoniTUB Apr 8, 2024
ca4576b
Merge pull request #3366 from ingef/release
awildturtok Apr 8, 2024
50a392a
move sorting out of disabled searchables from the job to the filter
thoniTUB Apr 8, 2024
c956935
drop String-interning in StringStoreString as it causes slow uploads …
awildturtok Apr 9, 2024
b2b6e47
Merge pull request #3380 from ingef/hotfix/skip-disabled-searchables
thoniTUB Apr 9, 2024
5819348
use property to make interning configurable
awildturtok Apr 9, 2024
c72a756
Merge pull request #3384 from ingef/fix/drop-interning
awildturtok Apr 9, 2024
4d8fec3
Merge pull request #3383 from ingef/release
awildturtok Apr 9, 2024
30dc855
adds logs and progress reporter to search finalize job
thoniTUB Apr 9, 2024
2a2b775
fix humonguous toString
awildturtok Apr 10, 2024
af64097
shorten full message queue messages without trace
awildturtok Apr 10, 2024
addbb71
Merge pull request #3386 from ingef/fix/humonguous-toString
awildturtok Apr 11, 2024
71624de
Merge pull request #3388 from ingef/fix/smaller-manager-backpressure
awildturtok Apr 11, 2024
1a5e3f5
extract inner class
awildturtok Apr 15, 2024
15d8215
Merge pull request #3385 from ingef/feature/progess-on-search-finalizer
awildturtok Apr 15, 2024
e397e32
Merge pull request #3390 from ingef/release
awildturtok Apr 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import com.bakdata.conquery.apiv1.frontend.FrontendValue;
import com.bakdata.conquery.io.cps.CPSType;
import com.bakdata.conquery.io.jackson.serializer.NsIdRef;
import com.bakdata.conquery.io.storage.NamespaceStorage;
import com.bakdata.conquery.models.config.IndexConfig;
import com.bakdata.conquery.models.datasets.Dataset;
import com.bakdata.conquery.models.datasets.concepts.Searchable;
Expand Down Expand Up @@ -40,7 +39,7 @@
@ToString
@Slf4j
@CPSType(id = "CSV_TEMPLATE", base = SearchIndex.class)
public class FilterTemplate extends IdentifiableImpl<SearchIndexId> implements Searchable<SearchIndexId>, SearchIndex {
public class FilterTemplate extends IdentifiableImpl<SearchIndexId> implements Searchable, SearchIndex {

private static final long serialVersionUID = 1L;

Expand Down Expand Up @@ -90,7 +89,7 @@ public boolean isSearchDisabled() {
return false;
}

public TrieSearch<FrontendValue> createTrieSearch(IndexConfig config, NamespaceStorage storage) {
public TrieSearch<FrontendValue> createTrieSearch(IndexConfig config) {

final URI resolvedURI = FileUtil.getResolvedUri(config.getBaseUrl(), getFilePath());
log.trace("Resolved filter template reference url for search '{}': {}", this.getId(), resolvedURI);
Expand Down
71 changes: 71 additions & 0 deletions backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
package com.bakdata.conquery.apiv1;

import java.util.List;
import java.util.stream.Collectors;

import com.bakdata.conquery.apiv1.frontend.FrontendValue;
import com.bakdata.conquery.models.config.IndexConfig;
import com.bakdata.conquery.models.datasets.concepts.Searchable;
import com.bakdata.conquery.models.identifiable.ids.specific.FilterId;
import com.bakdata.conquery.models.query.FilterSearch;
import com.bakdata.conquery.util.search.TrieSearch;
import com.google.common.collect.BiMap;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.experimental.Delegate;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.time.StopWatch;

@Getter
@RequiredArgsConstructor
@Slf4j
@EqualsAndHashCode
public class LabelMap implements Searchable {

private final FilterId id;
@Delegate
private final BiMap<String, String> delegate;
private final int minSuffixLength;
private final boolean generateSearchSuffixes;

@Override
public TrieSearch<FrontendValue> createTrieSearch(IndexConfig config) {

final TrieSearch<FrontendValue> search = config.createTrieSearch(true);

final List<FrontendValue> collected = delegate.entrySet().stream()
.map(entry -> new FrontendValue(entry.getKey(), entry.getValue()))
.collect(Collectors.toList());

if (log.isTraceEnabled()) {
log.trace("Labels for {}: `{}`", getId(), collected.stream().map(FrontendValue::toString).collect(Collectors.toList()));
}

StopWatch timer = StopWatch.createStarted();
log.trace("START-SELECT ADDING_ITEMS for {}", getId());

collected.forEach(feValue -> search.addItem(feValue, FilterSearch.extractKeywords(feValue)));

log.trace("DONE-SELECT ADDING_ITEMS for {} in {}", getId(), timer);

timer.reset();
log.trace("START-SELECT SHRINKING for {}", getId());

search.shrinkToFit();

log.trace("DONE-SELECT SHRINKING for {} in {}", getId(), timer);

return search;
}

@Override
public boolean isGenerateSuffixes() {
return generateSearchSuffixes;
}

@Override
public boolean isSearchDisabled() {
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -555,15 +555,14 @@ public Stream<Map<String, String>> resolveEntities(Subject subject, List<FilterV
.filter(Predicate.not(Map::isEmpty));
}

public ResultStatistics getResultStatistics(ManagedQuery managedQuery) {
final Query query = managedQuery.getQuery();
final List<ResultInfo> resultInfos = query.getResultInfos();
public ResultStatistics getResultStatistics(SingleTableResult managedQuery) {
final List<ResultInfo> resultInfos = managedQuery.getResultInfos();

final Optional<ResultInfo>
dateInfo =
query.getResultInfos().stream().filter(info -> info.getSemantics().contains(new SemanticType.EventDateT())).findFirst();
resultInfos.stream().filter(info -> info.getSemantics().contains(new SemanticType.EventDateT())).findFirst();

final int dateIndex = dateInfo.map(resultInfos::indexOf).orElse(0 /*Discarded if dateInfo is not present*/);
final Optional<Integer> dateIndex = dateInfo.map(resultInfos::indexOf);

final Locale locale = I18n.LOCALE.get();
final NumberFormat decimalFormat = NumberFormat.getNumberInstance(locale);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,29 @@
import lombok.extern.slf4j.Slf4j;
import org.apache.mina.core.future.WriteFuture;
import org.apache.mina.core.session.IoSession;
import org.jetbrains.annotations.NotNull;

@RequiredArgsConstructor
@Slf4j
public class NetworkSession implements MessageSender<NetworkMessage<?>> {
public static final int MAX_MESSAGE_LENGTH = 30;
public static final int MAX_QUEUE_LENGTH = 20;
@Getter
private final IoSession session;
private final LinkedBlockingQueue<NetworkMessage<?>> queuedMessages = new LinkedBlockingQueue<>(20);
private final LinkedBlockingQueue<NetworkMessage<?>> queuedMessages = new LinkedBlockingQueue<>(MAX_QUEUE_LENGTH);

@Override
public WriteFuture send(final NetworkMessage<?> message) {
try {
while (!queuedMessages.offer(message, 2, TimeUnit.MINUTES)) {
log.debug(
"Waiting for full writing queue for {}\n\tcurrently filled by: {}",
message,
new ArrayList<>(queuedMessages)
.stream()
.map(Objects::toString)
.collect(Collectors.joining("\n\t\t"))
log.debug("Waiting for full writing queue for {} currently filled by:\n\t- {}",
message,
log.isTraceEnabled()
? new ArrayList<>(queuedMessages).stream()
.map(Objects::toString)
.map(NetworkSession::shorten)
.collect(Collectors.joining("\n\t\t- "))
: "%s messages".formatted(queuedMessages.size())
);
}
}
Expand All @@ -45,6 +49,16 @@ public WriteFuture send(final NetworkMessage<?> message) {
return future;
}

@NotNull
private static String shorten(String desc) {
if (desc.length() <= MAX_MESSAGE_LENGTH) {
return desc;
}

return desc.substring(0, MAX_MESSAGE_LENGTH) + "…";

}

@Override
public void trySend(final NetworkMessage<?> message) {
if (isConnected()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ static NamespaceSetupData createNamespaceSetup(NamespaceStorage storage, final C

JobManager jobManager = new JobManager(storage.getDataset().getName(), config.isFailOnError());

FilterSearch filterSearch = new FilterSearch(storage, jobManager, config.getCsv(), config.getIndex());
FilterSearch filterSearch = new FilterSearch(config.getIndex());
return new NamespaceSetupData(injectables, indexService, communicationMapper, preprocessMapper, jobManager, filterSearch);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import com.bakdata.conquery.apiv1.frontend.FrontendValue;
import com.bakdata.conquery.io.jackson.serializer.NsIdRef;
import com.bakdata.conquery.io.storage.NamespaceStorage;
import com.bakdata.conquery.models.config.IndexConfig;
import com.bakdata.conquery.models.datasets.concepts.Searchable;
import com.bakdata.conquery.models.events.MajorTypeId;
Expand All @@ -26,7 +25,7 @@
@Setter
@NoArgsConstructor
@Slf4j
public class Column extends Labeled<ColumnId> implements NamespacedIdentifiable<ColumnId>, Searchable<ColumnId> {
public class Column extends Labeled<ColumnId> implements NamespacedIdentifiable<ColumnId>, Searchable {

public static final int UNKNOWN_POSITION = -1;

Expand Down Expand Up @@ -75,7 +74,7 @@ public Dataset getDataset() {
* We create only an empty search here, because the content is provided through {@link com.bakdata.conquery.models.messages.namespaces.specific.RegisterColumnValues} and filled by the caller.
*/
@Override
public TrieSearch<FrontendValue> createTrieSearch(IndexConfig config, NamespaceStorage storage) {
public TrieSearch<FrontendValue> createTrieSearch(IndexConfig config) {

return config.createTrieSearch(isGenerateSuffixes());
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,43 +1,23 @@
package com.bakdata.conquery.models.datasets.concepts;

import java.util.List;

import javax.validation.constraints.Min;

import com.bakdata.conquery.apiv1.frontend.FrontendValue;
import com.bakdata.conquery.io.storage.NamespaceStorage;
import com.bakdata.conquery.models.config.IndexConfig;
import com.bakdata.conquery.models.datasets.Dataset;
import com.bakdata.conquery.models.identifiable.Identifiable;
import com.bakdata.conquery.models.identifiable.ids.Id;
import com.bakdata.conquery.models.query.FilterSearch;
import com.bakdata.conquery.util.search.TrieSearch;
import com.fasterxml.jackson.annotation.JsonIgnore;

/**
* @implNote This class is tightly coupled with {@link FilterSearch} and {@link com.bakdata.conquery.models.datasets.concepts.filters.specific.SelectFilter}.
* <p>
* Searchable classes describe how a search should be constructed, and provide the values with getSearchValues.
*/
public interface Searchable<ID extends Id<? extends Identifiable<? extends ID>>> extends Identifiable<ID> {

public Dataset getDataset();
public interface Searchable {

/**
* All available {@link FrontendValue}s for searching in a {@link TrieSearch}.
*/
TrieSearch<FrontendValue> createTrieSearch(IndexConfig config, NamespaceStorage storage);

/**
* The actual Searchables to use, if there is potential for deduplication/pooling.
*
* @implSpec The order of objects returned is used to also sort search results from different sources.
*/
@JsonIgnore
default List<Searchable<?>> getSearchReferences() {
//Hopefully the only candidate will be Column
return List.of(this);
}
TrieSearch<FrontendValue> createTrieSearch(IndexConfig config);

/**
* Parameter used in the construction of {@link com.bakdata.conquery.util.search.TrieSearch}, defining the shortest suffix to create.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,16 @@
import java.util.stream.Collectors;

import com.bakdata.conquery.apiv1.FilterTemplate;
import com.bakdata.conquery.apiv1.LabelMap;
import com.bakdata.conquery.apiv1.frontend.FrontendFilterConfiguration;
import com.bakdata.conquery.apiv1.frontend.FrontendValue;
import com.bakdata.conquery.io.jackson.View;
import com.bakdata.conquery.io.jackson.serializer.NsIdRef;
import com.bakdata.conquery.io.storage.NamespaceStorage;
import com.bakdata.conquery.models.config.ConqueryConfig;
import com.bakdata.conquery.models.config.IndexConfig;
import com.bakdata.conquery.models.datasets.concepts.Searchable;
import com.bakdata.conquery.models.datasets.concepts.filters.SingleColumnFilter;
import com.bakdata.conquery.models.events.MajorTypeId;
import com.bakdata.conquery.models.exceptions.ConceptConfigurationException;
import com.bakdata.conquery.models.identifiable.ids.specific.FilterId;
import com.bakdata.conquery.models.query.FilterSearch;
import com.bakdata.conquery.util.search.TrieSearch;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.google.common.collect.BiMap;
Expand All @@ -30,15 +26,14 @@
import lombok.NoArgsConstructor;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.time.StopWatch;
import org.jetbrains.annotations.NotNull;

@Setter
@Getter
@NoArgsConstructor
@Slf4j
@JsonIgnoreProperties({"searchType"})
public abstract class SelectFilter<FE_TYPE> extends SingleColumnFilter<FE_TYPE> implements Searchable<FilterId> {
public abstract class SelectFilter<FE_TYPE> extends SingleColumnFilter<FE_TYPE> {

/**
* user given mapping from the values in the columns to shown labels
Expand Down Expand Up @@ -71,19 +66,27 @@ public void configureFrontend(FrontendFilterConfiguration.Top f, ConqueryConfig
@JsonIgnore
public abstract String getFilterType();

@Override
public List<Searchable<?>> getSearchReferences() {
final List<Searchable<?>> out = new ArrayList<>();

if (getTemplate() != null) {
/**
* The actual Searchables to use, if there is potential for deduplication/pooling.
*
* @implSpec The order of objects returned is used to also sort search results from different sources.
*/
@JsonIgnore
public List<Searchable> getSearchReferences() {
final List<Searchable> out = new ArrayList<>();

if (getTemplate() != null && !getTemplate().isSearchDisabled()) {
out.add(getTemplate());
}

if (!labels.isEmpty()) {
out.add(this);
out.add(new LabelMap(getId(), labels, searchMinSuffixLength, generateSearchSuffixes));
}

out.addAll(getColumn().getSearchReferences());
if (!getColumn().isSearchDisabled()) {
out.add(getColumn());
}

return out;
}
Expand All @@ -105,51 +108,4 @@ public boolean isNotUsingTemplateAndLabels() {

return (getTemplate() == null) != labels.isEmpty();
}

@Override
@JsonIgnore
public boolean isGenerateSuffixes() {
return generateSearchSuffixes;
}

@Override
@JsonIgnore
public int getMinSuffixLength() {
return searchMinSuffixLength;
}

/**
* Does not make sense to distinguish at Filter level since it's only referenced when labels are set.
*/
@Override
@JsonIgnore
public boolean isSearchDisabled() {
return false;
}

@Override
public TrieSearch<FrontendValue> createTrieSearch(IndexConfig config, NamespaceStorage storage) {

final TrieSearch<FrontendValue> search = config.createTrieSearch(true);

if(log.isTraceEnabled()) {
log.trace("Labels for {}: `{}`", getId(), collectLabels().stream().map(FrontendValue::toString).collect(Collectors.toList()));
}

StopWatch timer = StopWatch.createStarted();
log.trace("START-SELECT ADDING_ITEMS for {}", getId());

collectLabels().forEach(feValue -> search.addItem(feValue, FilterSearch.extractKeywords(feValue)));

log.trace("DONE-SELECT ADDING_ITEMS for {} in {}", getId(), timer);

timer.reset();
log.trace("START-SELECT SHRINKING for {}", getId());

search.shrinkToFit();

log.trace("DONE-SELECT SHRINKING for {} in {}", getId(), timer);

return search;
}
}
Loading
Loading