diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/statistics/BalancingHistogram.java b/backend/src/main/java/com/bakdata/conquery/models/query/statistics/BalancingHistogram.java index 8c192cdf85..4e446b10c1 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/statistics/BalancingHistogram.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/statistics/BalancingHistogram.java @@ -19,33 +19,34 @@ @Data public class BalancingHistogram { private final Node[] nodes; - private final double min; + private final double min, max; private final double width; private final int expectedBins; - private final double stiffness; private int total; - public static BalancingHistogram create(double min, double max, int expectedBins, double stiffness, boolean snap) { - double width = (max - min) / (expectedBins - 1); + public static BalancingHistogram create(double min, double max, int expectedBins) { + final double width = (max - min) / (expectedBins - 1); - if (snap) { - min = Math.floor(min); - max = Math.ceil(max); - width = Math.min(1, Math.round(width)); - - expectedBins = (int) Math.ceil((min - max) / width); - } - - return new BalancingHistogram(new Node[expectedBins], min, width, expectedBins, stiffness); + return new BalancingHistogram(new Node[expectedBins], min, max, width, expectedBins); } public void add(double value) { total++; - final int index = (int) Math.floor((value - min) / width); + final int index; + + if (value >= max) { + index = nodes.length - 1; + } + else if (value <= min) { + index = 0; + } + else { + index = (int) Math.floor((value - min) / width); + } if (nodes[index] == null) { nodes[index] = new Node(new DoubleArrayList()); @@ -54,17 +55,17 @@ public void add(double value) { nodes[index].add(value); } - public List balanced() { + public List balanced(double stiffness) { - final List merged = mergeLeft(nodes); + final List merged = mergeLeft(nodes, stiffness); - final List split = splitRight(merged); + final List split = splitRight(merged, stiffness); return split; } - private List mergeLeft(Node[] nodes) { + private List mergeLeft(Node[] nodes, double stiffness) { final List bins = new ArrayList<>(); Node prior = null; @@ -102,7 +103,7 @@ private List mergeLeft(Node[] nodes) { return bins; } - private List splitRight(List nodes) { + private List splitRight(List nodes, double stiffness) { final int expectedBinSize = total / expectedBins; final List bins = new ArrayList<>(); diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/statistics/NumberColumnStatsCollector.java b/backend/src/main/java/com/bakdata/conquery/models/query/statistics/NumberColumnStatsCollector.java index 4ddd00bf79..4057a1d515 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/statistics/NumberColumnStatsCollector.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/statistics/NumberColumnStatsCollector.java @@ -94,7 +94,11 @@ public ResultColumnStatistics describe() { @NotNull private List createBins(int expectedBins) { - final BalancingHistogram histogram = BalancingHistogram.create(getStatistics().getMin(), getStatistics().getMax(), expectedBins, 0.8d, false); + + final double min = Math.max(getStatistics().getMean() - getStatistics().getStandardDeviation() * 2, getStatistics().getMin()); + final double max = Math.min(getStatistics().getMean() + getStatistics().getStandardDeviation() * 2, getStatistics().getMax()); + + final BalancingHistogram histogram = BalancingHistogram.create(min, max, expectedBins); Arrays.stream(getStatistics().getValues()).forEach(histogram::add); @@ -141,13 +145,9 @@ private Map getExtras() { } - - - private String printValue(Number value) { return formatter.format(value.doubleValue()); } - } diff --git a/backend/src/test/java/com/bakdata/conquery/models/query/statistics/BalancingHistogramTest.java b/backend/src/test/java/com/bakdata/conquery/models/query/statistics/BalancingHistogramTest.java index a10c1754bb..d7bfdef1fc 100644 --- a/backend/src/test/java/com/bakdata/conquery/models/query/statistics/BalancingHistogramTest.java +++ b/backend/src/test/java/com/bakdata/conquery/models/query/statistics/BalancingHistogramTest.java @@ -14,7 +14,7 @@ class BalancingHistogramTest { @Test void balanced() { - BalancingHistogram histogram = BalancingHistogram.create(0, max, 15, 0.8d, false); + BalancingHistogram histogram = BalancingHistogram.create(0, max, 15); final Random random = new Random(SEED); @@ -33,13 +33,13 @@ void balanced() { histogram.add(max); - List balanced = histogram.balanced(); + List balanced = histogram.balanced(0.8d); log.info("{}", balanced); } @Test void snapped() { - BalancingHistogram histogram = BalancingHistogram.create(0, max, 15, 0.8d, false); + BalancingHistogram histogram = BalancingHistogram.create(0, max, 15); final Random random = new Random(SEED); @@ -64,7 +64,7 @@ void snapped() { @Test void plain() { - BalancingHistogram histogram = BalancingHistogram.create(0, max, 15, 0.8d, false); + BalancingHistogram histogram = BalancingHistogram.create(0, max, 15); final Random random = new Random(SEED);