From 607fda287b158ed917763113146631038a796779 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattia=20Isgr=C3=B2?= Date: Fri, 1 Dec 2023 11:44:26 +0100 Subject: [PATCH] Linear regression with error on X and Y --- src/core/real_analysis.h | 7 +-- src/statistics/regression.h | 87 +++++++++++++++++++++++++++++++++++++ test/test_core.cpp | 4 +- 3 files changed, 92 insertions(+), 6 deletions(-) diff --git a/src/core/real_analysis.h b/src/core/real_analysis.h index 901cb75fa..eb04d5796 100644 --- a/src/core/real_analysis.h +++ b/src/core/real_analysis.h @@ -950,9 +950,10 @@ namespace theoretica { x -= floor(x / PI) * PI; // Domain reduction to [0, PI / 4] - if(x > (PI / 4)) - return (1 + tan(x - (PI / 4.0))) / - (1 - tan(x - (PI / 4.0))); + if(x > (PI / 4)) { + const real t = tan(x - PI / 4.0); + return (1 + t) / (1 - t); + } const real s = sin(x); const real c = cos(x); diff --git a/src/statistics/regression.h b/src/statistics/regression.h index e761217b8..21bdf014f 100644 --- a/src/statistics/regression.h +++ b/src/statistics/regression.h @@ -137,6 +137,51 @@ namespace theoretica { } + /// Compute the coefficients of the linear regression + /// using Weighted Least Squares + template + inline void wls_linear( + const Dataset1& X, const Dataset2& Y, + real sigma_X, real sigma_Y, + real& intercept, real& slope, real& error, + real& sigma_A, real& sigma_B) { + + if(X.size() != Y.size()) { + TH_MATH_ERROR("wls_linear", X.size(), INVALID_ARGUMENT); + intercept = nan(); + slope = nan(); + error = nan(); + return; + } + + // Pre-compute values + const real mean_x = mean(X); + const real mean_y = mean(Y); + const real delta_x = (sum_squares(X) / X.size()) - square(mean_x); + const real delta_y = (sum_squares(Y) / Y.size()) - square(mean_y); + const real delta_xy = (product_sum(X, Y) / X.size()) - mean_x * mean_y; + const real Delta = square(sigma_X) * delta_y - square(sigma_Y) * delta_x; + + slope = (Delta + sqrt( + square(Delta) + 4 * square(sigma_X * sigma_Y * delta_xy) + )) / (2 * square(sigma_X) * delta_xy); + + intercept = mean_y - slope * mean_x; + + // Compute the error on the model + real residual = 0; + for (unsigned int i = 0; i < X.size(); ++i) + residual += square(Y[i] - intercept - slope * X[i]); + + // Correction by degrees of freedom (N - 2) + error = sqrt(residual / (real) (X.size() - 2)); + + // Compute the propagated error on the coefficients + sigma_A = nan(); + sigma_B = nan(); + } + + /// @class linear_model Linear regression structure /// for computation and storage of least squares linear /// regression results with model \f$y = A + Bx\f$. @@ -198,6 +243,15 @@ namespace theoretica { } + /// Construct a linear model from data + /// and compute the fit + template + inline linear_model( + const Dataset1& X, const Dataset2& Y, real sigma_X, real sigma_Y) { + fit(X, Y, sigma_X, sigma_Y); + } + + /// Compute the linear regression of two sets of data of the same size /// using ordinary least squares linear regression. Without /// the error on the y axis, the chi-squared and the error @@ -311,6 +365,39 @@ namespace theoretica { } + /// Compute the linear regression of two sets of data of the same size + /// using least squares linear regression. + /// + /// @param X The set of values on the x axis + /// @param Y The set of values on the y axis + /// @param sigma The different errors on the y axis + template + inline void fit( + const Dataset1& X, const Dataset2& Y, real sigma_X, real sigma_Y) { + + if(X.size() != Y.size()) { + TH_MATH_ERROR("linear_model::fit", X.size(), INVALID_ARGUMENT); + A = nan(); B = nan(); + return; + } + + if(Y.size() < 2) { + TH_MATH_ERROR("linear_model::fit", Y.size(), INVALID_ARGUMENT); + A = nan(); B = nan(); + return; + } + + wls_linear(X, Y, sigma_X, sigma_Y, A, B, err, sigma_A, sigma_B); + + if(is_nan(A)) + return; + + chi_squared = err / (square(sigma_Y) + square(B * sigma_X)); + ndf = Y.size() - 2; + p_value = pvalue_chi_squared(chi_squared, ndf); + } + + #ifndef THEORETICA_NO_PRINT /// Convert the vector to string representation diff --git a/test/test_core.cpp b/test/test_core.cpp index 4271233f6..ee6f3c51d 100644 --- a/test/test_core.cpp +++ b/test/test_core.cpp @@ -108,7 +108,6 @@ int main(int argc, char const *argv[]) { prec::state.outputFolder = "test/"; prec::state.defaultIterations = 100000; - // prec::state.pickedTests["th::tan(real)"] = true; prec::setup("core", argc, argv); @@ -340,8 +339,7 @@ int main(int argc, char const *argv[]) { "th::tan(real)", REAL_LAMBDA(th::tan), REAL_LAMBDA(std::tan), - interval(MIN, MAX), 1E-5, false, 100, - prec::fail_on_rel_err); + interval(MIN, MAX), 1E-6, false, 1000); prec::equals("tan(2) = tan(2 + 100 PI)", th::tan(2), th::tan(2 + 100 * PI));