diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 81c888c..005b39e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -17,5 +17,6 @@ set(FSTATS_SOURCES ${dir}/fstats_bootstrap.f90 ${dir}/fstats_sampling.f90 ${dir}/fstats_smoothing.f90 + ${dir}/fstats_types.f90 ) set(FSTATS_SOURCES ${FSTATS_SOURCES} PARENT_SCOPE) diff --git a/src/fstats.f90 b/src/fstats.f90 index 1c040be..3c1bb24 100644 --- a/src/fstats.f90 +++ b/src/fstats.f90 @@ -78,6 +78,8 @@ module fstats public :: box_muller_sample public :: rejection_sample public :: lowess + public :: pooled_variance + public :: bartletts_test public :: FS_LEVENBERG_MARQUARDT_UPDATE public :: FS_QUADRATIC_UPDATE public :: FS_NIELSEN_UPDATE diff --git a/src/fstats_descriptive_statistics.f90 b/src/fstats_descriptive_statistics.f90 index b6a376e..591904a 100644 --- a/src/fstats_descriptive_statistics.f90 +++ b/src/fstats_descriptive_statistics.f90 @@ -3,6 +3,7 @@ module fstats_descriptive_statistics use linalg, only : sort use ferror use fstats_errors + use fstats_types implicit none private public :: mean @@ -12,7 +13,13 @@ module fstats_descriptive_statistics public :: quantile public :: trimmed_mean public :: covariance + public :: pooled_variance + interface pooled_variance + !! Computes the pooled estimate of variance. + module procedure :: pooled_variance_1 + module procedure :: pooled_variance_2 + end interface contains ! ------------------------------------------------------------------------------ pure function mean(x) result(rst) @@ -237,5 +244,53 @@ pure function covariance(x, y) result(rst) end if end function +! ------------------------------------------------------------------------------ +pure function pooled_variance_1(si, ni) result(rst) + !! Computes the pooled estimate of variance. + real(real64), intent(in), dimension(:) :: si + !! An N-element array containing the estimates for each of the N + !! variances. + integer(int32), intent(in), dimension(size(si)) :: ni + !! An N-element array containing the number of data points in each + !! of the data sets used to compute the variances in si. + real(real64) :: rst + !! The pooled variance. + + ! Local Variables + integer(int32) :: i, k, n + + ! Process + k = size(si) + rst = 0.0d0 + n = 0 + do i = 1, k + n = n + ni(i) + rst = rst + (ni(i) - 1.0d0) * si(i) + end do + rst = rst / real(n - k, real64) +end function + +pure function pooled_variance_2(x) result(rst) + !! Computes the pooled estimate of variance. + type(array_container), intent(in), dimension(:) :: x + !! An array of arrays of data. + real(real64) :: rst + !! The pooled variance. + + ! Local Variables + integer(int32) :: i, k, n, ni + + ! Process + k = size(x) + n = 0 + rst = 0.0d0 + do i = 1, k + ni = size(x(i)%x) + n = n + ni + rst = rst + variance(x(i)%x) * (ni - 1.0) + end do + rst = rst / real(n - k, real64) +end function + ! ------------------------------------------------------------------------------ end module \ No newline at end of file diff --git a/src/fstats_hypothesis.f90 b/src/fstats_hypothesis.f90 index d6eb068..6b6ce3d 100644 --- a/src/fstats_hypothesis.f90 +++ b/src/fstats_hypothesis.f90 @@ -5,12 +5,14 @@ module fstats_hypothesis use fstats_special_functions use fstats_distributions use fstats_descriptive_statistics + use fstats_types private public :: confidence_interval public :: t_test_equal_variance public :: t_test_unequal_variance public :: t_test_paired public :: f_test + public :: bartletts_test interface confidence_interval !! Computes the confidence interval for the specified distribution. @@ -279,13 +281,13 @@ subroutine f_test(x1, x2, stat, p, dof1, dof2) !! A measure of the degrees of freedom. ! Parameters - real(real64), parameter :: half = 0.5d0 real(real64), parameter :: one = 1.0d0 real(real64), parameter :: two = 2.0d0 ! Local Variables integer(int32) :: n1, n2 - real(real64) :: v1, v2, m1, m2, a, b, x + real(real64) :: v1, v2, m1, m2 + type(f_distribution) :: dist ! Compute the F-statistic n1 = size(x1) @@ -304,12 +306,61 @@ subroutine f_test(x1, x2, stat, p, dof1, dof2) dof2 = n1 - one end if - ! Compute the probability - a = half * dof2 - b = half * dof1 - x = dof2 / (dof2 + dof1 * stat) - p = two * regularized_beta(a, b, x) - if (p > one) p = two - p + dist%d1 = dof1 + dist%d2 = dof2 + p = two * (one - dist%cdf(stat)) +end subroutine + +! ------------------------------------------------------------------------------ +subroutine bartletts_test(x, stat, p) + !! Computes Bartlett's test statistic as follows. + !! + !! $$ \chi^{2} = \frac{(N - k) \ln(S_{p}^{2}) \sum_{i = 1}^{k} + !! \left(n_{i} - 1 \right) \ln(S_{i}^{2})}{1 + + !! \frac{1}{3 \left( k - 1 \right)} \left( \sum_{i = 1}^{k} + !! \left( \frac{1}{n_{i} - 1} \right) - \frac{1}{N - k} \right)} $$ + type(array_container), intent(in), dimension(:) :: x + !! The arrays of data to analyze. + real(real64), intent(out) :: stat + !! The Bartlett's test statistic. + real(real64), intent(out) :: p + !! The probability value that the variances of each data set are + !! equivalent. A low p-value, less than some significance level, + !! indicates a non-equivalance of variances. + + ! Local Variables + integer(int32) :: i, n, k, ni + real(real64) :: si, sp, numer, denom + type(chi_squared_distribution) :: dist + + ! Initialization + k = size(x) + n = 0 + do i = 1, k + n = n + size(x(i)%x) + end do + + ! Compute the statistic + n = 0 + sp = 0.0d0 + numer = 0.0d0 + denom = 0.0d0 + do i = 1, k + ni = size(x(i)%x) + n = n + ni + si = variance(x(i)%x) + sp = sp + (ni - 1.0d0) * si + numer = numer + (ni - 1.0d0) * log(variance(x(i)%x)) + denom = denom + 1.0d0 / (ni - 1.0d0) + end do + sp = sp / real(n - k, real64) + stat = ((n - k) * log(sp) - numer) / & + (1.0d0 + (1.0d0 / (3.0d0 * k - 3.0d0)) * & + (denom - 1.0d0 / real(n - k, real64))) + + ! Compute the p-value + dist%dof = k - 1 + p = 1.0d0 - dist%cdf(stat) end subroutine ! ------------------------------------------------------------------------------ diff --git a/src/fstats_types.f90 b/src/fstats_types.f90 new file mode 100644 index 0000000..62cff8f --- /dev/null +++ b/src/fstats_types.f90 @@ -0,0 +1,11 @@ +module fstats_types + use iso_fortran_env + implicit none + + type array_container + !! Provides a container for a real-valued array. A practical use of + !! this construct is in the construction of jagged arrays. + real(real64), allocatable, dimension(:) :: x + !! The array. + end type +end module \ No newline at end of file diff --git a/tests/fstats_statistics_tests.f90 b/tests/fstats_statistics_tests.f90 index 8ec752d..27216ca 100644 --- a/tests/fstats_statistics_tests.f90 +++ b/tests/fstats_statistics_tests.f90 @@ -2,6 +2,7 @@ module fstats_statistics_tests use iso_fortran_env use fstats use fstats_test_helper + use fstats_types implicit none contains ! ------------------------------------------------------------------------------ @@ -903,5 +904,93 @@ function test_correlation_1() result(rst) end if end function +! ------------------------------------------------------------------------------ + function test_pooled_variance_1() result(rst) + ! Arguments + logical :: rst + + ! Variables + integer(int32), parameter :: k = 20 + integer(int32), parameter :: n = 1000 + integer(int32) :: i, m, ni(k) + type(array_container) :: x(k) + real(real64) :: ans, sp, sp2, si(k) + + ! Initialization + rst = .true. + m = n * k + ans = 0.0d0 + do i = 1, k + allocate(x(i)%x(n)) + call random_number(x(i)%x) + si(i) = variance(x(i)%x) + ni(i) = n + ans = ans + si(i) * (n - 1.0d0) + end do + ans = ans / real(m - k, real64) + + ! Test 1 + sp = pooled_variance(x) + if (.not.is_equal(ans, sp)) then + rst = .false. + print '(A)', "TEST FAILED: Pooled variance test 1" + end if + + ! Test 2 + sp2 = pooled_variance(si, ni) + if (.not.is_equal(ans, sp2)) then + rst = .false. + print '(A)', "TEST FAILED: Pooled variance test 2" + end if + end function + +! ------------------------------------------------------------------------------ + function test_bartlett_1() result(rst) + ! Arguments + logical :: rst + + ! Local Variables + integer(int32), parameter :: npts = 25 + real(real64), parameter :: stat_ans = 1.52960933869d0 + real(real64), parameter :: p_ans = 0.21617108550d0 + type(array_container) :: x(2) + real(real64) :: stat, p + + ! Initialization + rst = .true. + allocate(x(1)%x(npts), x(2)%x(npts)) + x(1)%x = [ & + 0.357692624494507d0, 0.614383931107340d0, 0.802887803239860d0, & + 0.138578373117993d0, 0.754710064162687d0, 0.522900047841238d0, & + 0.076443208652965d0, 0.860167572639019d0, 0.183130360392741d0, & + 0.471659086806133d0, 0.071125320345872d0, 0.559104389166637d0, & + 0.500927806104085d0, 0.458795768141623d0,0.846677090629315d0, & + 0.569806543430701d0, 0.342909916953321d0, 0.660491929487175d0, & + 0.153963845813654d0, 0.274295416513455d0, 0.568200275187962d0, & + 0.234641814188948d0, 0.223741046257743d0, 0.960908965603275d0, & + 0.409105079237050d0] + x(2)%x = [ & + 0.146676174916891d0, 0.074990182260762d0, 0.626736796964143d0, & + 0.715565996611289d0, 0.243765200640375d0, 0.158861292064668d0, & + 0.135906579751008d0, 0.105995980721743d0, 0.040415255757239d0, & + 0.483256288452508d0, 0.658804776850479d0, 0.777622410194220d0, & + 0.965626651747080d0, 0.779568110382855d0, 0.215591619210302d0, & + 0.921351076345661d0, 0.199225207581094d0, 0.147835195118642d0, & + 0.908049631668390d0, 0.970759884908368d0, 0.825828458448519d0, & + 0.183678303516097d0, 0.433932667318501d0, 0.915015089978645d0, & + 0.495746710480416d0] + + ! Test 1 - reference results calculated by Excel + call bartletts_test(x, stat, p) + if (.not.is_equal(stat, stat_ans)) then + rst = .false. + print '(A)', "TEST FAILED: Bartlett's test 1" + end if + if (.not.is_equal(p, p_ans)) then + rst = .false. + print '(A)', "TEST FAILED: Bartlett's test 1" + end if + end function + ! ------------------------------------------------------------------------------ end module \ No newline at end of file diff --git a/tests/fstats_tests.f90 b/tests/fstats_tests.f90 index ef5da95..8d5b4ce 100644 --- a/tests/fstats_tests.f90 +++ b/tests/fstats_tests.f90 @@ -118,6 +118,13 @@ program tests local = test_correlation_1() if (.not.local) overall = .false. + ! Additional Tests + local = test_pooled_variance_1() + if (.not.local) overall = .false. + + local = test_bartlett_1() + if (.not.local) overall = .false. + ! End if (.not.overall) then stop 1