Skip to content

Commit

Permalink
Multi mapping string selects (#3613)
Browse files Browse the repository at this point in the history
Implementation of mappable Selects that produce multiple output values from a single intermediate value. Values are output unique.

This feature is enabled by `com.bakdata.conquery.models.index.MapInternToExternMapper#allowMultiple` which will change the selects using the Mapping from `STRING` to `LIST[STRING]`.

You can use this feature to associate multiple values with a single key via CSV-mapping.
  • Loading branch information
awildturtok authored Nov 19, 2024
1 parent e0dc145 commit 3c49cfa
Show file tree
Hide file tree
Showing 44 changed files with 653 additions and 293 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ public boolean isSearchDisabled() {
public TrieSearch<FrontendValue> createTrieSearch(IndexConfig config) throws IndexCreationException {

final URI resolvedURI = FileUtil.getResolvedUri(config.getBaseUrl(), getFilePath());
log.trace("Resolved filter template reference url for search '{}': {}", this.getId(), resolvedURI);
log.trace("Resolved filter template reference url for search '{}': {}", getId(), resolvedURI);

final FrontendValueIndex search = indexService.getIndex(new FrontendValueIndexKey(
resolvedURI,
Expand All @@ -101,7 +101,7 @@ public TrieSearch<FrontendValue> createTrieSearch(IndexConfig config) throws Ind
config.getSearchSplitChars()
));

return search;
return search.getDelegate();
}

@Override
Expand Down
64 changes: 31 additions & 33 deletions backend/src/main/java/com/bakdata/conquery/io/jackson/Jackson.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,48 +33,46 @@ public class Jackson {

/**
* Helper method that also creates a copy of the injected values to reduce side effects.
*
* @param om the {@link ObjectMapper} which is copied. Its {@link com.fasterxml.jackson.databind.InjectableValues} must be {@link MutableInjectableValues}
* @return A copy of the {@link ObjectMapper} along with a copy of its {@link MutableInjectableValues}.
*/
public static ObjectMapper copyMapperAndInjectables(ObjectMapper om) {
final ObjectMapper copy = om.copy();
copy.setInjectableValues(((MutableInjectableValues)copy.getInjectableValues()).copy());
copy.setInjectableValues(((MutableInjectableValues) copy.getInjectableValues()).copy());
return copy;
}

public static <T extends ObjectMapper> T configure(T objectMapper){
public static <T extends ObjectMapper> T configure(T objectMapper) {

objectMapper
.enable(MapperFeature.PROPAGATE_TRANSIENT_MARKER)
.enable(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY)
.enable(Feature.ALLOW_UNQUOTED_FIELD_NAMES)
.enable(Feature.ALLOW_COMMENTS)
.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS)
//TODO this is just a hotfix to avoid reimports
// .enable(DeserializationFeature.FAIL_ON_IGNORED_PROPERTIES)
.enable(DeserializationFeature.FAIL_ON_INVALID_SUBTYPE)
.enable(DeserializationFeature.FAIL_ON_NULL_FOR_PRIMITIVES)
.enable(DeserializationFeature.FAIL_ON_NUMBERS_FOR_ENUMS)
.enable(DeserializationFeature.FAIL_ON_READING_DUP_TREE_KEY)
.enable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES)
.enable(DeserializationFeature.FAIL_ON_UNRESOLVED_OBJECT_IDS)
.enable(DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS)
.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS)
.setLocale(Locale.ROOT)
.disable(JsonGenerator.Feature.AUTO_CLOSE_TARGET)
.enable(SerializationFeature.WRITE_EMPTY_JSON_ARRAYS)
.enable(SerializationFeature.WRITE_NULL_MAP_VALUES)
.registerModule(new JavaTimeModule())
.registerModule(new ParameterNamesModule())
.registerModule(new GuavaModule())
.registerModule(new BlackbirdModule())
.registerModule(ConquerySerializersModule.INSTANCE)
.setSerializationInclusion(Include.ALWAYS)
.setDefaultPropertyInclusion(Include.ALWAYS)
//.setAnnotationIntrospector(new RestrictingAnnotationIntrospector())
.setInjectableValues(new MutableInjectableValues())
.addMixIn(Permission.class, ConqueryPermission.class)
.addMixIn(Object2IntMap.class, Object2IntMapMixin.class);
objectMapper.enable(MapperFeature.PROPAGATE_TRANSIENT_MARKER)
.enable(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY)
.enable(Feature.ALLOW_UNQUOTED_FIELD_NAMES)
.enable(Feature.ALLOW_COMMENTS)
.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS)
.enable(DeserializationFeature.FAIL_ON_INVALID_SUBTYPE)
.enable(DeserializationFeature.FAIL_ON_NULL_FOR_PRIMITIVES)
.enable(DeserializationFeature.FAIL_ON_NUMBERS_FOR_ENUMS)
.enable(DeserializationFeature.FAIL_ON_READING_DUP_TREE_KEY)
.enable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES)
.enable(DeserializationFeature.FAIL_ON_UNRESOLVED_OBJECT_IDS)
.enable(DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS)
.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS)
.setLocale(Locale.ROOT)
.disable(JsonGenerator.Feature.AUTO_CLOSE_TARGET)
.enable(SerializationFeature.WRITE_EMPTY_JSON_ARRAYS)
.enable(SerializationFeature.WRITE_NULL_MAP_VALUES)
.registerModule(new JavaTimeModule())
.registerModule(new ParameterNamesModule())
.registerModule(new GuavaModule())
.registerModule(new BlackbirdModule())
.registerModule(ConquerySerializersModule.INSTANCE)
.setSerializationInclusion(Include.ALWAYS)
.setDefaultPropertyInclusion(Include.ALWAYS)
//.setAnnotationIntrospector(new RestrictingAnnotationIntrospector())
.setInjectableValues(new MutableInjectableValues())
.addMixIn(Permission.class, ConqueryPermission.class)
.addMixIn(Object2IntMap.class, Object2IntMapMixin.class);

return objectMapper;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
package com.bakdata.conquery.models.datasets.concepts.select.connector;

import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

import com.bakdata.conquery.io.cps.CPSType;
import com.bakdata.conquery.models.datasets.concepts.select.Select;
import com.bakdata.conquery.models.datasets.concepts.select.connector.specific.MappableSingleColumnSelect;
Expand All @@ -10,7 +14,7 @@
import com.bakdata.conquery.models.query.queryplan.aggregators.specific.value.AllValuesAggregator;
import com.bakdata.conquery.models.query.resultinfo.printers.Printer;
import com.bakdata.conquery.models.query.resultinfo.printers.PrinterFactory;
import com.bakdata.conquery.models.query.resultinfo.printers.common.MappedPrinter;
import com.bakdata.conquery.models.query.resultinfo.printers.common.OneToManyMappingPrinter;
import com.bakdata.conquery.models.types.ResultType;
import com.bakdata.conquery.sql.conversion.model.select.DistinctSelectConverter;
import com.bakdata.conquery.sql.conversion.model.select.SelectConverter;
Expand All @@ -20,8 +24,7 @@
public class DistinctSelect extends MappableSingleColumnSelect {

@JsonCreator
public DistinctSelect(ColumnId column,
InternToExternMapperId mapping) {
public DistinctSelect(ColumnId column, InternToExternMapperId mapping) {
super(column, mapping);
}

Expand All @@ -37,15 +40,33 @@ public SelectConverter<DistinctSelect> createConverter() {

@Override
public Printer<?> createPrinter(PrinterFactory printerFactory, PrintSettings printSettings) {
if(getMapping() == null){
if (getMapping() == null) {
return super.createPrinter(printerFactory, printSettings);
}

return printerFactory.getListPrinter(new MappedPrinter(getMapping().resolve()), printSettings);
return new FlatMappingPrinter(new OneToManyMappingPrinter(getMapping().resolve()))
.andThen(printerFactory.getListPrinter(printerFactory.getStringPrinter(printSettings), printSettings));
}

@Override
public ResultType getResultType() {
return new ResultType.ListT<>(super.getResultType());
}

/**
* Ensures that mapped values are still distinct.
*/
private record FlatMappingPrinter(OneToManyMappingPrinter mapper) implements Printer<Collection<String>> {

@Override
public Collection<String> apply(Collection<String> values) {
final Set<String> out = new HashSet<>();

for (String value : values) {
mapper.apply(value).forEach(out::add);
}

return out;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,5 @@ public Aggregator<?> createAggregator() {
public SelectConverter<FirstValueSelect> createConverter() {
return new FirstValueSelectConverter();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
import com.bakdata.conquery.models.datasets.concepts.select.connector.SingleColumnSelect;
import com.bakdata.conquery.models.identifiable.ids.specific.ColumnId;
import com.bakdata.conquery.models.identifiable.ids.specific.InternToExternMapperId;
import com.bakdata.conquery.models.index.InternToExternMapper;
import com.bakdata.conquery.models.query.PrintSettings;
import com.bakdata.conquery.models.query.resultinfo.SelectResultInfo;
import com.bakdata.conquery.models.query.resultinfo.printers.Printer;
import com.bakdata.conquery.models.query.resultinfo.printers.PrinterFactory;
import com.bakdata.conquery.models.query.resultinfo.printers.common.MappedPrinter;
import com.bakdata.conquery.models.types.ResultType;
import com.bakdata.conquery.models.types.SemanticType;
import lombok.Getter;
Expand Down Expand Up @@ -42,15 +42,9 @@ public Printer<?> createPrinter(PrinterFactory printerFactory, PrintSettings pri
return super.createPrinter(printerFactory, printSettings);
}

return new MappedPrinter(mapping.resolve());
}
final InternToExternMapper resolvedMapping = mapping.resolve();

@Override
public ResultType getResultType() {
if(mapping == null){
return ResultType.resolveResultType(getColumn().resolve().getType());
}
return ResultType.Primitive.STRING;
return resolvedMapping.createPrinter(printerFactory, printSettings);
}

@Override
Expand All @@ -63,6 +57,14 @@ public SelectResultInfo getResultInfo(CQConcept cqConcept) {
return new SelectResultInfo(this, cqConcept, Set.of(new SemanticType.CategoricalT()));
}

@Override
public ResultType getResultType() {
if(mapping == null){
return ResultType.resolveResultType(getColumn().resolve().getType());
}
return ResultType.Primitive.STRING;
}

public void loadMapping() {
if (mapping != null) {
mapping.resolve().init();
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
package com.bakdata.conquery.models.index;

import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;

import com.bakdata.conquery.apiv1.FilterTemplate;
import com.bakdata.conquery.apiv1.frontend.FrontendValue;
import com.bakdata.conquery.models.query.FilterSearch;
import com.bakdata.conquery.util.search.TrieSearch;
import lombok.Getter;
import lombok.ToString;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.time.StopWatch;

@Slf4j
@ToString
public class FrontendValueIndex extends TrieSearch<FrontendValue> implements Index<FrontendValueIndexKey> {
public class FrontendValueIndex implements Index<FrontendValue> {


/**
Expand All @@ -28,11 +31,15 @@ public class FrontendValueIndex extends TrieSearch<FrontendValue> implements Ind
private final String optionValueTemplate;
private final String defaultEmptyLabel;

public FrontendValueIndex(int suffixCutoff, String split, String valueTemplate, String optionValueTemplate, String defaultEmptyLabel1) {
super(suffixCutoff, split);
@Getter
private final TrieSearch<FrontendValue> delegate;

public FrontendValueIndex(int suffixCutoff, String split, String valueTemplate, String optionValueTemplate, String defaultEmptyLabel) {
this.valueTemplate = valueTemplate;
this.optionValueTemplate = optionValueTemplate;
this.defaultEmptyLabel = defaultEmptyLabel1;
this.defaultEmptyLabel = defaultEmptyLabel;

delegate = new TrieSearch<>(suffixCutoff, split);
}

@Override
Expand All @@ -43,36 +50,55 @@ public void put(String internalValue, Map<String, String> templateToConcrete) {
templateToConcrete.get(optionValueTemplate)
);

addItem(feValue, FilterSearch.extractKeywords(feValue));
delegate.addItem(feValue, FilterSearch.extractKeywords(feValue));
}

@Override
public int size() {
final long longSize = calculateSize();
final long longSize = delegate.calculateSize();
if (longSize > Integer.MAX_VALUE) {
log.trace("Trie size was larger than an int. Reporting Integer.MAX_VALUE. Was actually: {}", longSize);
return Integer.MAX_VALUE;
}
return (int) longSize;
}

@Override
public Collection<FrontendValue> externalMultiple(String key) {
final List<FrontendValue> matches = delegate.findExact(Set.of(key), Integer.MAX_VALUE);
if (matches.isEmpty()) {
return null;
}
return matches;
}

@Override
public FrontendValue external(String key) {
final List<FrontendValue> matches = delegate.findExact(Set.of(key), 1);

if (matches.isEmpty()) {
return null;
}

return matches.iterator().next();
}

@Override
public void finalizer() {

StopWatch timer = StopWatch.createStarted();
final StopWatch timer = StopWatch.createStarted();

// If no empty label was provided by the mapping, we insert the configured default-label
if (findExact(List.of(""), 1).isEmpty()) {
addItem(new FrontendValue("", defaultEmptyLabel), List.of(defaultEmptyLabel));
if (delegate.findExact(List.of(""), 1).isEmpty()) {
delegate.addItem(new FrontendValue("", defaultEmptyLabel), List.of(defaultEmptyLabel));
}

log.trace("DONE-FINALIZER ADDING_ITEMS in {}", timer);

timer.reset();
log.trace("START-FV-FIN SHRINKING");

shrinkToFit();
delegate.shrinkToFit();

log.trace("DONE-FV-FIN SHRINKING in {}", timer);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@

import com.bakdata.conquery.apiv1.FilterTemplate;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.ToString;

@EqualsAndHashCode(callSuper = true)
@EqualsAndHashCode
@ToString
public class FrontendValueIndexKey extends AbstractIndexKey<FrontendValueIndex> {
public class FrontendValueIndexKey implements IndexKey {


private final int suffixCutoff;
Expand All @@ -27,15 +28,20 @@ public class FrontendValueIndexKey extends AbstractIndexKey<FrontendValueIndex>
* @see FilterTemplate#getOptionValue()
*/
private final String optionValueTemplate;
@Getter
private final URI csv;
@Getter
private final String internalColumn;


public FrontendValueIndexKey(URI csv, String internalColumn, String valueTemplate, String optionValueTemplate, int suffixCutoff, String splitPattern) {
super(csv, internalColumn);
this.suffixCutoff = suffixCutoff;
this.splitPattern = splitPattern;

this.valueTemplate = valueTemplate;
this.optionValueTemplate = optionValueTemplate;
this.csv = csv;
this.internalColumn = internalColumn;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
package com.bakdata.conquery.models.index;

import java.util.Collection;
import java.util.Map;
import javax.annotation.CheckForNull;

public interface Index<T extends IndexKey<? extends Index<T>>> {
public interface Index<V> {

void put(String key, Map<String, String> templateToConcrete);

int size();

void finalizer();

@CheckForNull
V external(String key);

@CheckForNull
Collection<V> externalMultiple(String key);

}
Loading

0 comments on commit 3c49cfa

Please sign in to comment.