Skip to content

Commit

Permalink
Merge pull request #3329 from ingef/feature/dont-use-atomics
Browse files Browse the repository at this point in the history
Cleanup BooleanStatsCollector and all other collectors for performance issues
  • Loading branch information
awildturtok authored Mar 6, 2024
2 parents 094bbe3 + 87f9a54 commit 88ba41b
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 32 deletions.
Original file line number Diff line number Diff line change
@@ -1,28 +1,50 @@
package com.bakdata.conquery.models.query.statistics;

import java.util.List;
import java.util.Map;

import c10n.C10N;
import com.bakdata.conquery.models.query.PrintSettings;
import com.bakdata.conquery.models.types.ResultType;
import lombok.Getter;

@Getter
public class BooleanColumnStatsCollector extends ColumnStatsCollector {

private final StringColumnStatsCollector delegate;
private int trues, falses, missing;

public BooleanColumnStatsCollector(String name, String label, String description, PrintSettings printSettings) {
super(name, label, description, printSettings);
delegate = new StringColumnStatsCollector(name, label, description, ResultType.StringT.INSTANCE, printSettings, Integer.MAX_VALUE);
}

@Override
public void consume(Object value) {
final String printed = value == null ? null : ResultType.BooleanT.INSTANCE.printNullable(getPrintSettings(), value);
delegate.consume(printed);
if (value == null) {
missing++;
return;
}

if (((Boolean) value)) {
trues++;
}
else {
falses++;
}
}

@Override
public ResultColumnStatistics describe() {
return delegate.describe();
return new HistogramColumnDescription(
getName(), getLabel(), getDescription(),
List.of(
new HistogramColumnDescription.Entry(ResultType.BooleanT.INSTANCE.print(getPrintSettings(), true), trues),
new HistogramColumnDescription.Entry(ResultType.BooleanT.INSTANCE.print(getPrintSettings(), false), falses)
),
Map.of(
C10N.get(StatisticsLabels.class, getPrintSettings().getLocale()).missing(),
getPrintSettings().getIntegerFormat().format(getMissing())
)
);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
import java.util.List;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Function;

import com.bakdata.conquery.io.cps.CPSType;
Expand All @@ -24,8 +22,8 @@ public class DateColumnStatsCollector extends ColumnStatsCollector {
private final SortedMap<String, Integer> quarterCounts = new TreeMap<>();
private final SortedMap<String, Integer> monthCounts = new TreeMap<>();

private final AtomicInteger totalCount = new AtomicInteger();
private final AtomicLong nulls = new AtomicLong(0);
private int totalCount = 0;
private int nulls = 0;
private final Function<Object, CDateRange> dateExtractor;
private CDateRange span = null;

Expand All @@ -49,10 +47,10 @@ private static Function<Object, CDateRange> getDateExtractor(ResultType dateType

@Override
public void consume(Object value) {
totalCount.incrementAndGet();
totalCount++;

if (value == null) {
nulls.incrementAndGet();
nulls++;
return;
}

Expand Down Expand Up @@ -89,8 +87,8 @@ private void handleDay(int day) {
public ResultColumnStatistics describe() {

return new ColumnDescription(getName(), getLabel(), getDescription(),
totalCount.get(),
getNulls().intValue(),
totalCount,
nulls,
quarterCounts,
monthCounts,
span == null ? CDateRange.all().toSimpleRange() : span.toSimpleRange()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;

import c10n.C10N;
import com.bakdata.conquery.models.query.PrintSettings;
Expand All @@ -25,7 +24,7 @@ public class NumberColumnStatsCollector<TYPE extends Number & Comparable<TYPE>>

private final ResultType type;
private final DescriptiveStatistics statistics = new DescriptiveStatistics();
private final AtomicLong nulls = new AtomicLong(0);
private int nulls = 0;


private final Comparator<TYPE> comparator;
Expand Down Expand Up @@ -107,7 +106,7 @@ private static Range<Double> expandBounds(double lower, double upper, int expect
@Override
public void consume(Object value) {
if (value == null) {
nulls.incrementAndGet();
nulls++;
return;
}

Expand Down Expand Up @@ -185,7 +184,7 @@ private Map<String, String> getExtras() {

out.put(labels.sum(), printValue(getStatistics().getSum()));
out.put(labels.count(), getPrintSettings().getIntegerFormat().format(getStatistics().getN()));
out.put(labels.missing(), getPrintSettings().getIntegerFormat().format(getNulls().get()));
out.put(labels.missing(), getPrintSettings().getIntegerFormat().format(getNulls()));

return out;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
package com.bakdata.conquery.models.query.statistics;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.StreamSupport;

import c10n.C10N;
Expand All @@ -19,10 +18,9 @@
public class StringColumnStatsCollector extends ColumnStatsCollector {

private final Frequency frequencies = new Frequency();
private final AtomicLong nulls = new AtomicLong(0);
private final long limit;

private final ResultType.StringT type;
private int nulls = 0;

public StringColumnStatsCollector(String name, String label, String description, ResultType.StringT type, PrintSettings printSettings, long limit) {
super(name, label, description, printSettings);
Expand All @@ -33,7 +31,7 @@ public StringColumnStatsCollector(String name, String label, String description,
@Override
public void consume(Object value) {
if (value == null) {
nulls.incrementAndGet();
nulls++;
return;
}

Expand Down Expand Up @@ -66,13 +64,16 @@ public ResultColumnStatistics describe() {

final StatisticsLabels statisticsLabels = C10N.get(StatisticsLabels.class, getPrintSettings().getLocale());

final Map<String, String> extras =
entriesSorted.size() <= limit
? Collections.emptyMap()
: Map.of(
statisticsLabels.remainingValues(entriesSorted.size() - limit),
statisticsLabels.remainingEntries(frequencies.getSumFreq() - shownTotal)
);
final Map<String, String> extras = new HashMap<>();

if (entriesSorted.size() > limit) {
extras.put(
statisticsLabels.remainingValues(entriesSorted.size() - limit),
statisticsLabels.remainingEntries(frequencies.getSumFreq() - shownTotal)
);
}

extras.put(statisticsLabels.missing(), getPrintSettings().getIntegerFormat().format(getNulls()));

return new HistogramColumnDescription(getName(), getLabel(), getDescription(), head, extras);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,9 @@ public void execute(String name, TestConquery testConquery) throws Exception {
new HistogramColumnDescription.Entry("b", 1),
new HistogramColumnDescription.Entry("d", 1)
),
Map.of()
Map.of(
labels.missing(), "1"
)
),
new HistogramColumnDescription(
"concept mapped",
Expand All @@ -127,7 +129,9 @@ public void execute(String name, TestConquery testConquery) throws Exception {
new HistogramColumnDescription.Entry("BEH", 1),
new HistogramColumnDescription.Entry("d", 1)
),
Map.of()
Map.of(
labels.missing(), "0"
)
),
new HistogramColumnDescription(
"concept int",
Expand Down Expand Up @@ -211,7 +215,9 @@ public void execute(String name, TestConquery testConquery) throws Exception {
new HistogramColumnDescription.Entry("Yes", 4),
new HistogramColumnDescription.Entry("No", 1)
),
Map.of()
Map.of(
labels.missing(), "1"
)
)
),
Range.of(LocalDate.of(2021, 1, 1), LocalDate.of(2021, 10, 1))
Expand Down

0 comments on commit 88ba41b

Please sign in to comment.