Skip to content

Commit

Permalink
Merge pull request #3634 from ingef/fix/entity-upload-NPE-slowdown
Browse files Browse the repository at this point in the history
short circuit empty date field and preventing time consuming and log chatty NPE
  • Loading branch information
thoniTUB authored Dec 9, 2024
2 parents d44859e + 7e67c01 commit 330fd61
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,11 @@ public void readDates(String value, DateReader dateReader, CDateSet out) {
DATE_SET {
@Override
public void readDates(String value, DateReader dateReader, CDateSet out) {
out.addAll(dateReader.parseToCDateSet(value));
CDateSet parsed = dateReader.parseToCDateSet(value);
if (parsed == null ) {
return;
}
out.addAll(parsed);
}
},
ALL {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import com.bakdata.conquery.models.identifiable.mapping.ExternalId;
import com.bakdata.conquery.util.DateReader;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;

@Slf4j
public class EntityResolverUtil {
Expand All @@ -41,7 +42,7 @@ public static CDateSet[] readDates(String[][] values, List<String> format, DateR
but can also don't contribute to any date aggregation.
*/
if (dateFormats.stream().allMatch(Objects::isNull)) {
// Initialize empty
// Initialize empty, so all lines appear als resolved
for (int row = 0; row < values.length; row++) {
out[row] = CDateSet.createEmpty();
}
Expand All @@ -59,10 +60,19 @@ public static CDateSet[] readDates(String[][] values, List<String> format, DateR
if (dateFormat == null) {
continue;
}
dateFormat.readDates(values[row][col], dateReader, dates);
String value = values[row][col];

if (StringUtils.isBlank(value)) {
log.trace("Found blank/null value in {}/{} (row/col)", row,col);
continue;
}

dateFormat.readDates(value, dateReader, dates);
}

if (dates.isEmpty()) {
// Don't set an empty dateset here, because this flags the line as: unresolvedDate
// TODO It might be better to set an empty dateset nonetheless, because it seems to be intentionally empty, as we had no problem while parsing a value
continue;
}

Expand All @@ -73,7 +83,9 @@ public static CDateSet[] readDates(String[][] values, List<String> format, DateR
out[row].addAll(dates);
}
catch (Exception e) {
log.warn("Failed to parse Date from {}", row, e);
// If a value is not parsable, it is included in the exceptions cause message (see DateReader)
log.trace("Failed to parse Date in row {}", row, e);
// This catch causes `out[row]` to remain `null` which later flags this line as: unresolvedDate
}
}

Expand Down Expand Up @@ -142,6 +154,7 @@ public static String tryResolveId(String[] row, List<Function<String[], External
*/
public static Map<String, String>[] readExtras(String[][] values, List<String> format) {
final String[] names = values[0];
@SuppressWarnings("unchecked")
final Map<String, String>[] extrasByRow = new Map[values.length];


Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
package com.bakdata.conquery.mode.cluster;

import static com.bakdata.conquery.apiv1.query.concept.specific.external.EntityResolverUtil.collectExtraData;
import static com.bakdata.conquery.apiv1.query.concept.specific.external.EntityResolverUtil.readDates;
import static com.bakdata.conquery.apiv1.query.concept.specific.external.EntityResolverUtil.tryResolveId;
import static com.bakdata.conquery.apiv1.query.concept.specific.external.EntityResolverUtil.verifyOnlySingles;
import static com.bakdata.conquery.apiv1.query.concept.specific.external.EntityResolverUtil.*;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import jakarta.validation.constraints.NotEmpty;

import com.bakdata.conquery.apiv1.query.concept.specific.external.EntityResolver;
import com.bakdata.conquery.apiv1.query.concept.specific.external.EntityResolverUtil;
Expand All @@ -19,7 +17,6 @@
import com.bakdata.conquery.models.identifiable.mapping.ExternalId;
import com.bakdata.conquery.util.DateReader;
import com.bakdata.conquery.util.io.IdColumnUtil;
import jakarta.validation.constraints.NotEmpty;

public class ClusterEntityResolver implements EntityResolver {

Expand Down Expand Up @@ -58,18 +55,18 @@ public ResolveStatistic resolveEntities(

final String[] row = values[rowNum];

if (rowDates[rowNum] == null) {
unresolvedDate.add(row);
continue;
}

// Try to resolve the id first, because it has higher priority for the uploader than the dates
String resolvedId = tryResolveId(row, readers, mapping);

if (resolvedId == null) {
unresolvedId.add(row);
continue;
}

if (rowDates[rowNum] == null) {
unresolvedDate.add(row);
continue;
}

// read the dates from the row
resolved.put(resolvedId, rowDates[rowNum]);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import java.util.Locale;
import java.util.Set;
import java.util.stream.Collectors;
import javax.annotation.Nullable;

import com.bakdata.conquery.models.common.CDateSet;
import com.bakdata.conquery.models.common.daterange.CDateRange;
Expand Down Expand Up @@ -53,7 +54,7 @@ public class DateReader {
* All available formats for parsing.
*/
@JsonIgnore
private List<DateTimeFormatter> dateFormats;
private final List<DateTimeFormatter> dateFormats;
/**
* Parsed values cache.
*/
Expand All @@ -63,9 +64,9 @@ public class DateReader {
.concurrencyLevel(10)
.build(CacheLoader.from(this::tryParseDate));
@JsonIgnore
private List<String> rangeStartEndSeperators;
private final List<String> rangeStartEndSeperators;
@JsonIgnore
private List<LocaleConfig.ListFormat> dateSetLayouts;
private final List<LocaleConfig.ListFormat> dateSetLayouts;

@JsonCreator
public DateReader(Set<String> dateParsingFormats, List<String> rangeStartEndSeperators, List<LocaleConfig.ListFormat> dateSetLayouts) {
Expand Down Expand Up @@ -136,6 +137,7 @@ private CDateRange parseToCDateRange(String value, String sep) {
/**
* Try parsing the String value to a LocalDate.
*/
@Nullable
public LocalDate parseToLocalDate(String value) throws ParsingException {
if (Strings.isNullOrEmpty(value)) {
return null;
Expand All @@ -153,6 +155,7 @@ public LocalDate parseToLocalDate(String value) throws ParsingException {
/**
* Try and parse value to CDateSet using all available layouts, but starting at the last known successful one.
*/
@Nullable
public CDateSet parseToCDateSet(String value) {
if (Strings.isNullOrEmpty(value)) {
return null;
Expand Down

0 comments on commit 330fd61

Please sign in to comment.