From 607fda287b158ed917763113146631038a796779 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mattia=20Isgr=C3=B2?= <mattiaisgro@users.noreply.github.com>
Date: Fri, 1 Dec 2023 11:44:26 +0100
Subject: [PATCH] Linear regression with error on X and Y

---
 src/core/real_analysis.h    |  7 +--
 src/statistics/regression.h | 87 +++++++++++++++++++++++++++++++++++++
 test/test_core.cpp          |  4 +-
 3 files changed, 92 insertions(+), 6 deletions(-)
diff --git a/src/core/real_analysis.h b/src/core/real_analysis.h
index 901cb75fa..eb04d5796 100644
--- a/src/core/real_analysis.h
+++ b/src/core/real_analysis.h
@@ -950,9 +950,10 @@ namespace theoretica {
 		x -= floor(x / PI) * PI;
 
 		// Domain reduction to [0, PI / 4]
-		if(x > (PI / 4))
-			return (1 + tan(x - (PI / 4.0))) /
-				   (1 - tan(x - (PI / 4.0)));
+		if(x > (PI / 4)) {
+			const real t = tan(x - PI / 4.0);
+			return (1 + t) / (1 - t);
+		}
 
 		const real s = sin(x);
 		const real c = cos(x);
diff --git a/src/statistics/regression.h b/src/statistics/regression.h
index e761217b8..21bdf014f 100644
--- a/src/statistics/regression.h
+++ b/src/statistics/regression.h
@@ -137,6 +137,51 @@ namespace theoretica {
 		}
 
 
+		/// Compute the coefficients of the linear regression
+		/// using Weighted Least Squares
+		template<typename Dataset1, typename Dataset2>
+		inline void wls_linear(
+			const Dataset1& X, const Dataset2& Y,
+			real sigma_X, real sigma_Y,
+			real& intercept, real& slope, real& error,
+			real& sigma_A, real& sigma_B) {
+
+			if(X.size() != Y.size()) {
+				TH_MATH_ERROR("wls_linear", X.size(), INVALID_ARGUMENT);
+				intercept = nan();
+				slope = nan();
+				error = nan();
+				return;
+			}
+
+			// Pre-compute values
+			const real mean_x = mean(X);
+			const real mean_y = mean(Y);
+			const real delta_x = (sum_squares(X) / X.size()) - square(mean_x);
+			const real delta_y = (sum_squares(Y) / Y.size()) - square(mean_y);
+			const real delta_xy = (product_sum(X, Y) / X.size()) - mean_x * mean_y;
+			const real Delta = square(sigma_X) * delta_y - square(sigma_Y) * delta_x;
+
+			slope = (Delta + sqrt(
+					square(Delta) + 4 * square(sigma_X * sigma_Y * delta_xy)
+				)) / (2 * square(sigma_X) * delta_xy);
+
+			intercept = mean_y - slope * mean_x;
+
+			// Compute the error on the model
+			real residual = 0;
+			for (unsigned int i = 0; i < X.size(); ++i)
+				residual += square(Y[i] - intercept - slope * X[i]);
+
+			// Correction by degrees of freedom (N - 2)
+			error = sqrt(residual / (real) (X.size() - 2));
+
+			// Compute the propagated error on the coefficients
+			sigma_A = nan();
+			sigma_B = nan();
+		}
+
+
 		/// @class linear_model Linear regression structure
 		/// for computation and storage of least squares linear
 		/// regression results with model \f$y = A + Bx\f$.
@@ -198,6 +243,15 @@ namespace theoretica {
 			}
 
 
+			/// Construct a linear model from data
+			/// and compute the fit
+			template<typename Dataset1, typename Dataset2>
+			inline linear_model(
+				const Dataset1& X, const Dataset2& Y, real sigma_X, real sigma_Y) {
+				fit(X, Y, sigma_X, sigma_Y);
+			}
+
+
 			/// Compute the linear regression of two sets of data of the same size
 			/// using ordinary least squares linear regression. Without
 			/// the error on the y axis, the chi-squared and the error
@@ -311,6 +365,39 @@ namespace theoretica {
 			}
 
 
+			/// Compute the linear regression of two sets of data of the same size
+			/// using least squares linear regression.
+			///
+			/// @param X The set of values on the x axis
+			/// @param Y The set of values on the y axis
+			/// @param sigma The different errors on the y axis
+			template<typename Dataset1, typename Dataset2>
+			inline void fit(
+				const Dataset1& X, const Dataset2& Y, real sigma_X, real sigma_Y) {
+
+				if(X.size() != Y.size()) {
+					TH_MATH_ERROR("linear_model::fit", X.size(), INVALID_ARGUMENT);
+					A = nan(); B = nan();
+					return;
+				}
+
+				if(Y.size() < 2) {
+					TH_MATH_ERROR("linear_model::fit", Y.size(), INVALID_ARGUMENT);
+					A = nan(); B = nan();
+					return;
+				}
+
+				wls_linear(X, Y, sigma_X, sigma_Y, A, B, err, sigma_A, sigma_B);
+
+				if(is_nan(A))
+					return;
+
+				chi_squared = err / (square(sigma_Y) + square(B * sigma_X));
+				ndf = Y.size() - 2;
+				p_value = pvalue_chi_squared(chi_squared, ndf);
+			}
+
+
 #ifndef THEORETICA_NO_PRINT
 
 		/// Convert the vector to string representation
diff --git a/test/test_core.cpp b/test/test_core.cpp
index 4271233f6..ee6f3c51d 100644
--- a/test/test_core.cpp
+++ b/test/test_core.cpp
@@ -108,7 +108,6 @@ int main(int argc, char const *argv[]) {
 
 	prec::state.outputFolder = "test/";
 	prec::state.defaultIterations = 100000;
-	// prec::state.pickedTests["th::tan(real)"] = true;
 
 	prec::setup("core", argc, argv);
 
@@ -340,8 +339,7 @@ int main(int argc, char const *argv[]) {
 			"th::tan(real)",
 			REAL_LAMBDA(th::tan),
 			REAL_LAMBDA(std::tan),
-			interval(MIN, MAX), 1E-5, false, 100,
-			prec::fail_on_rel_err);
+			interval(MIN, MAX), 1E-6, false, 1000);
 
 		prec::equals("tan(2) = tan(2 + 100 PI)",
 			th::tan(2), th::tan(2 + 100 * PI));