From 029c202f4d11ef731829d8b6cf7cb6fd02c8f5e6 Mon Sep 17 00:00:00 2001
From: Kristofer Munsterhjelm <kristofer@munsterhjelm.no>
Date: Sat, 21 Sep 2024 20:37:10 +0200
Subject: [PATCH] Add Bernoulli generator and binary issue space PR measure.

This replicates the old multiwinner functionality (though still without
the VSE rescaling). I'll need to get the VSE stuff going before I can
test it against the old implementation.

multiwinner.cc shows how to generate and test binary issue spaces.
---
 CMakeLists.txt                        |  2 +
 src/generator/spatial/all.h           |  1 +
 src/generator/spatial/bernoulli.cc    | 27 +++++++++++
 src/generator/spatial/bernoulli.h     | 50 +++++++++++++++++++++
 src/main/multiwinner.cc               | 18 +++++---
 src/multiwinner/pr_measures/binary.cc | 64 +++++++++++++++++++++++++++
 src/multiwinner/pr_measures/binary.h  | 28 ++++++++++++
 7 files changed, 185 insertions(+), 5 deletions(-)
 create mode 100644 src/generator/spatial/bernoulli.cc
 create mode 100644 src/generator/spatial/bernoulli.h
 create mode 100644 src/multiwinner/pr_measures/binary.cc
 create mode 100644 src/multiwinner/pr_measures/binary.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f395217..b7076f5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -208,6 +208,7 @@ add_library(qe_multiwinner_methods
 	src/multiwinner/methods/rusty/auxiliary/dsc.cc
 	src/multiwinner/methods/shuntsstv.cc
 	src/multiwinner/methods/stv.cc
+	src/multiwinner/pr_measures/binary.cc
 	src/multiwinner/pr_measures/clustering.cc
 	src/multiwinner/pr_measures/normal_fit.cc
 	src/stats/multiwinner/convex_hull.cc
@@ -220,6 +221,7 @@ add_library(quadelect_lib src/common/ballots.cc
 	src/generator/ballotgen.cc
 	src/generator/iac.cc
 	src/generator/impartial_gen.cc
+	src/generator/spatial/bernoulli.cc
 	src/generator/spatial/gaussian.cc
 	src/generator/spatial/spatial.cc
 	src/generator/spatial/uniform.cc
diff --git a/src/generator/spatial/all.h b/src/generator/spatial/all.h
index d082f0c..2329204 100644
--- a/src/generator/spatial/all.h
+++ b/src/generator/spatial/all.h
@@ -1,6 +1,7 @@
 
 // All spatial utility generators.
 
+#include "bernoulli.h"
 #include "gaussian.h"
 #include "spatial.h"
 #include "uniform.h"
\ No newline at end of file
diff --git a/src/generator/spatial/bernoulli.cc b/src/generator/spatial/bernoulli.cc
new file mode 100644
index 0000000..66ae18c
--- /dev/null
+++ b/src/generator/spatial/bernoulli.cc
@@ -0,0 +1,27 @@
+#include "bernoulli.h"
+#include <vector>
+
+std::vector<double> bernoulli_generator::rnd_vector(size_t size,
+	coordinate_gen & coord_source) const {
+
+	std::vector<double> coord = coord_source.get_coordinate(
+			size);
+
+	// Clamp.
+	for (size_t i = 0; i < size; ++i) {
+		if (coord[i] <= center[i]) {
+			coord[i] = 1;
+		} else {
+			coord[i] = 0;
+		}
+	}
+
+	return coord;
+}
+
+void bernoulli_generator::bias_generator(size_t num_dimensions,
+	coordinate_gen & coord_source) {
+
+	set_center(coord_source.get_coordinate(
+			num_dimensions));
+}
\ No newline at end of file
diff --git a/src/generator/spatial/bernoulli.h b/src/generator/spatial/bernoulli.h
new file mode 100644
index 0000000..7bdc986
--- /dev/null
+++ b/src/generator/spatial/bernoulli.h
@@ -0,0 +1,50 @@
+// n-dimensional Bernoulli distribution: each coordinate is true (one)
+// with probability p and false (zero) with probability (1-p). The center
+// (location) variable is a vector of these p, because expected value on
+// each axis is p.
+
+#pragma once
+#include "spatial.h"
+
+class bernoulli_generator : public spatial_generator {
+	protected:
+		std::vector<double> rnd_vector(size_t size,
+			coordinate_gen & coord_source) const;
+
+	public:
+		bernoulli_generator() : spatial_generator() {
+			uses_center = true;
+			uses_dispersion = false;
+			set_center(0.5);
+		}
+		bernoulli_generator(bool compress_in) : spatial_generator(
+				compress_in) {
+			uses_center = true;
+			uses_dispersion = false;
+			set_center(0.5);
+		}
+		bernoulli_generator(bool compress_in, bool do_truncate)
+			: spatial_generator(compress_in, do_truncate) {
+			uses_center = true;
+			uses_dispersion = false;
+			set_center(0.5);
+		}
+
+		bernoulli_generator(bool compress_in, bool do_truncate,
+			double num_dimensions_in, bool warren_util_in) :
+			spatial_generator(compress_in, do_truncate,
+				num_dimensions_in, warren_util_in) {
+			uses_center = true; uses_dispersion = false;
+			set_center(0.5);
+		}
+
+		// Sets the centers to random values between 0 and 1,
+		// with the given number of dimensions. Used for binary issue
+		// testing.
+		void bias_generator(size_t num_dimensions,
+			coordinate_gen & coord_source);
+
+		std::string name() const {
+			return ("Bernoulli");
+		}
+};
\ No newline at end of file
diff --git a/src/main/multiwinner.cc b/src/main/multiwinner.cc
index ba54e7e..5544561 100644
--- a/src/main/multiwinner.cc
+++ b/src/main/multiwinner.cc
@@ -8,6 +8,7 @@
 
 #include "multiwinner/helper/errors.h"
 
+#include "multiwinner/pr_measures/binary.h"
 #include "multiwinner/pr_measures/clustering.h"
 #include "multiwinner/pr_measures/normal_fit.h"
 
@@ -20,29 +21,36 @@ int main() {
 	std::cout << gauss.get_num_dimensions() << std::endl;
 	gauss.set_dispersion(1);
 
+	// Or for binary testing...
+	size_t num_issues = 5;
+	bernoulli_generator bingen;
+
 	size_t num_voters = 4096;
 	size_t num_candidates = 50;
 
 	size_t num_clusters = 2; // say
 
-	size_t maxiters = 5000;
+	size_t maxiters = 50000;
 
 	std::cout << gauss.pdf(std::vector<double>(5, 0.1)) << "\n";
 
 	cluster_proportionality test(num_clusters);
 	normal_proportionality ntest(gauss);
+	binary_proportionality btest;
 
 	for (double delta = 0.1; delta <= 1; delta += 0.1) {
 
 		double error = 0;
-		for (int i = 0; i < maxiters; ++i) {
+		for (size_t i = 0; i < maxiters; ++i) {
+
+			bingen.bias_generator(num_issues, rnd);
 
 			std::cerr << i << "/" << maxiters << "    \r" << std::flush;
 
-			positions_election p_e = gauss.generate_election_result(
+			positions_election p_e = bingen.generate_election_result(
 					num_voters, num_candidates, false, rnd);
 
-			ntest.prepare(p_e);
+			btest.prepare(p_e);
 
 			// Elect using, say, QPQ.
 			size_t num_seats = 7;
@@ -50,7 +58,7 @@ int main() {
 			std::list<size_t> qpq_council = QPQ(delta, true).get_council(
 					num_seats, num_candidates, p_e.ballots);
 
-			error += ntest.get_error(qpq_council);
+			error += btest.get_error(qpq_council);
 		}
 
 		std::cerr << "\n";
diff --git a/src/multiwinner/pr_measures/binary.cc b/src/multiwinner/pr_measures/binary.cc
new file mode 100644
index 0000000..133a1bd
--- /dev/null
+++ b/src/multiwinner/pr_measures/binary.cc
@@ -0,0 +1,64 @@
+#include "binary.h"
+
+#include "multiwinner/helper/errors.h"
+
+void binary_proportionality::prepare(const positions_election & p_e) {
+
+	size_t num_voters = p_e.voters_pos.size(),
+		   num_issues = p_e.voters_pos[0].size();
+
+	issue_voter_proportions.resize(num_issues);
+	issue_winner_proportions.resize(num_issues);
+
+	std::fill(issue_voter_proportions.begin(),
+		issue_voter_proportions.end(), 0.0);
+
+	candidate_opinions = p_e.candidates_pos;
+
+	for (size_t voter = 0; voter < num_voters; ++voter) {
+		for (size_t issue = 0; issue < num_issues; ++issue) {
+			if (p_e.voters_pos[voter][issue] != 0 &&
+				p_e.voters_pos[voter][issue] != 1) {
+				// Maybe use sign value here to generalize to any
+				// distribution??? Would that be too confusing?
+				// Would it be helpful? Nah, that would make Bernoulli
+				// fail -- silently. Probably not a good idea.
+				throw std::invalid_argument("Binary proportionality: voter"
+					" issue vectors aren't binary!");
+			}
+			if (p_e.voters_pos[voter][issue] == 1) {
+				issue_voter_proportions[issue] += 1.0/num_voters;
+			}
+		}
+	}
+}
+
+double binary_proportionality::get_error(
+	const std::list<size_t> & outcome) {
+
+	size_t num_seats = outcome.size(),
+		   num_issues = issue_winner_proportions.size();
+
+	std::fill(issue_winner_proportions.begin(),
+		issue_winner_proportions.end(), 0.0);
+
+	for (size_t winner: outcome) {
+		for (size_t issue = 0; issue < num_issues; ++issue) {
+			if (candidate_opinions[winner][issue] != 0 &&
+				candidate_opinions[winner][issue] != 1) {
+				// Maybe use sign value here to generalize to any
+				// distribution??? Would that be too confusing?
+				// Would it be helpful? Nah, that would make Bernoulli
+				// fail -- silently. Probably not a good idea.
+				throw std::invalid_argument("Binary proportionality: winner"
+					" issue vectors aren't binary!");
+			}
+			if (candidate_opinions[winner][issue] == 1) {
+				issue_winner_proportions[issue] += 1.0/num_seats;
+			}
+		}
+	}
+
+	return sli(issue_winner_proportions,
+			issue_voter_proportions);
+}
\ No newline at end of file
diff --git a/src/multiwinner/pr_measures/binary.h b/src/multiwinner/pr_measures/binary.h
new file mode 100644
index 0000000..cc9eb0b
--- /dev/null
+++ b/src/multiwinner/pr_measures/binary.h
@@ -0,0 +1,28 @@
+#include "measure.h"
+#include <vector>
+
+// This proportionality measure only works on binary spatial models, but
+// is pretty simple. It just checks the proportion of the voters that
+// agree with each issue (take the true or 1 position) and compares this
+// to the proportion of the winners that do.
+
+// TODO? Maybe a print function???
+
+class binary_proportionality : public proportionality_measure {
+	private:
+		// These vectors ought to be bool but because all spatial
+		// generators work on double, we use what we're given.
+
+		// Each candidate's opinion (true = 1, false = 0) on
+		// every issue.
+		std::vector<std::vector<double> > candidate_opinions;
+
+		// The fraction of voters and winners who take the true
+		// position on each issue.
+		std::vector<double> issue_voter_proportions;
+		std::vector<double> issue_winner_proportions;
+
+	public:
+		void prepare(const positions_election & p_e);
+		double get_error(const std::list<size_t> & outcome);
+};
\ No newline at end of file