Skip to content

Commit

Permalink
implement overflow bins, to get a better view on the data. focus on 2std
Browse files Browse the repository at this point in the history
  • Loading branch information
awildturtok committed Jan 9, 2024
1 parent f17fdb3 commit 874a83c
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,33 +19,34 @@
@Data
public class BalancingHistogram {
private final Node[] nodes;
private final double min;
private final double min, max;
private final double width;

private final int expectedBins;

private final double stiffness;

private int total;

public static BalancingHistogram create(double min, double max, int expectedBins, double stiffness, boolean snap) {
double width = (max - min) / (expectedBins - 1);
public static BalancingHistogram create(double min, double max, int expectedBins) {
final double width = (max - min) / (expectedBins - 1);

if (snap) {
min = Math.floor(min);
max = Math.ceil(max);
width = Math.min(1, Math.round(width));

expectedBins = (int) Math.ceil((min - max) / width);
}

return new BalancingHistogram(new Node[expectedBins], min, width, expectedBins, stiffness);
return new BalancingHistogram(new Node[expectedBins], min, max, width, expectedBins);
}

public void add(double value) {
total++;

final int index = (int) Math.floor((value - min) / width);
final int index;

if (value >= max) {
index = nodes.length - 1;
}
else if (value <= min) {
index = 0;
}
else {
index = (int) Math.floor((value - min) / width);
}

if (nodes[index] == null) {
nodes[index] = new Node(new DoubleArrayList());
Expand All @@ -54,17 +55,17 @@ public void add(double value) {
nodes[index].add(value);
}

public List<Node> balanced() {
public List<Node> balanced(double stiffness) {

final List<Node> merged = mergeLeft(nodes);
final List<Node> merged = mergeLeft(nodes, stiffness);

final List<Node> split = splitRight(merged);
final List<Node> split = splitRight(merged, stiffness);

return split;

}

private List<Node> mergeLeft(Node[] nodes) {
private List<Node> mergeLeft(Node[] nodes, double stiffness) {
final List<Node> bins = new ArrayList<>();

Node prior = null;
Expand Down Expand Up @@ -102,7 +103,7 @@ private List<Node> mergeLeft(Node[] nodes) {
return bins;
}

private List<Node> splitRight(List<Node> nodes) {
private List<Node> splitRight(List<Node> nodes, double stiffness) {
final int expectedBinSize = total / expectedBins;

final List<Node> bins = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,11 @@ public ResultColumnStatistics describe() {

@NotNull
private List<StringColumnStatsCollector.ColumnDescription.Entry> createBins(int expectedBins) {
final BalancingHistogram histogram = BalancingHistogram.create(getStatistics().getMin(), getStatistics().getMax(), expectedBins, 0.8d, false);

final double min = Math.max(getStatistics().getMean() - getStatistics().getStandardDeviation() * 2, getStatistics().getMin());
final double max = Math.min(getStatistics().getMean() + getStatistics().getStandardDeviation() * 2, getStatistics().getMax());

final BalancingHistogram histogram = BalancingHistogram.create(min, max, expectedBins);

Arrays.stream(getStatistics().getValues()).forEach(histogram::add);

Expand Down Expand Up @@ -141,13 +145,9 @@ private Map<String, String> getExtras() {
}





private String printValue(Number value) {
return formatter.format(value.doubleValue());
}



}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class BalancingHistogramTest {

@Test
void balanced() {
BalancingHistogram histogram = BalancingHistogram.create(0, max, 15, 0.8d, false);
BalancingHistogram histogram = BalancingHistogram.create(0, max, 15);

final Random random = new Random(SEED);

Expand All @@ -33,13 +33,13 @@ void balanced() {
histogram.add(max);


List<BalancingHistogram.Node> balanced = histogram.balanced();
List<BalancingHistogram.Node> balanced = histogram.balanced(0.8d);
log.info("{}", balanced);
}

@Test
void snapped() {
BalancingHistogram histogram = BalancingHistogram.create(0, max, 15, 0.8d, false);
BalancingHistogram histogram = BalancingHistogram.create(0, max, 15);

final Random random = new Random(SEED);

Expand All @@ -64,7 +64,7 @@ void snapped() {

@Test
void plain() {
BalancingHistogram histogram = BalancingHistogram.create(0, max, 15, 0.8d, false);
BalancingHistogram histogram = BalancingHistogram.create(0, max, 15);

final Random random = new Random(SEED);

Expand Down

0 comments on commit 874a83c

Please sign in to comment.