-
Notifications
You must be signed in to change notification settings - Fork 7
/
30-CIsTesting-TwoMeans.Rmd
2074 lines (1678 loc) · 84.7 KB
/
30-CIsTesting-TwoMeans.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# CIs and tests: comparing two means {#AnalysisTwoMeans}
\index{Difference between means}
<!-- Introductions; easier to separate by format -->
```{r, child = if (knitr::is_html_output()) {'./introductions/30-CIsTesting-TwoMeans-HTML.Rmd'} else {'./introductions/30-CIsTesting-TwoMeans-LaTeX.Rmd'}}
```
<!-- Define colours as appropriate -->
```{r, child = if (knitr::is_html_output()) {'./children/coloursHTML.Rmd'} else {'./children/coloursLaTeX.Rmd'}}
```
## Introduction: garter snakes {#TwoMeansHT-intro}
Some Mexican garter snakes (*Thamnophis melanogaster*) live in habitats with no crayfish, while some live in habitats with crayfish and hence use crayfish as a food source.
@manjarrez2017morphological were interested in whether the snakes in these two regions were different:
> For female Mexican garter snakes, is the mean snout--vent length (SVL) different for those in regions with crayfish and without crayfish?
Two different groups of snakes are studied, so this is a relational RQ with no intervention (the study uses a between-individuals comparison\index{Comparison!between individuals}), and the data are shown
`r if (knitr::is_latex_output()) {
'in Table\\ \\@ref(tab:SnakesDataTableTest).'
} else {
'below.'
}`
```{r SnakesDataTableTest}
data("Snakes")
# Extract just the female snakes, for example; n \approx 50
SNF <- subset(Snakes,
Sex == "female")
SNF$Crayfish <- ordered(SNF$Crayfish,
levels = c("NoCfish", "Cfish"),
labels = c("No",
"Yes"))
if( knitr::is_latex_output() ) {
tb1 <- c( na.omit( SNF$SVL[ SNF$Crayfish == "No" ] ) )
tb1 <- cbind (tb1[1:5],
tb1[6:10],
tb1[11:15],
tb1[16:20],
tb1[21:25],
tb1[26:30],
tb1[31:35],
tb1[36:34],
c( tb1[41], NA, NA, NA, NA) )
tb2 <- c( na.omit( SNF$SVL[ SNF$Crayfish == "Yes" ] ) )
tb2 <- cbind (tb2[1:5],
tb2[6:10],
tb2[11:15],
tb2[16:20],
tb2[21:25],
tb2[26:30],
tb2[31:35] )
T1 <- knitr::kable( pad(tb1,
surroundMaths = TRUE,
targetLength = rep(2, 9),
decDigits = 0),
format = "latex",
valign = 't',
align = "c",
#table.env = "@empty",
linesep = "",
row.names = FALSE,
escape = FALSE,
booktabs = TRUE) %>%
row_spec(0, bold = TRUE) %>%
add_header_above( c("Non-crayfish region" = 9),
bold = TRUE)
T2 <- knitr::kable( pad(tb2,
surroundMaths = TRUE,
targetLength = rep(2, 9),
decDigits = 0),
format = "latex",
align = "c",
valign = 't',
#table.env = "@empty",
linesep = "",
row.names = FALSE,
escape = FALSE,
booktabs = TRUE) %>%
row_spec(0, bold = TRUE) %>%
add_header_above( c("Crayfish region" = 7),
bold = TRUE)
out <- knitr::kables(list(T1, T2),
format = "latex",
label = "SnakesDataTableTest",
caption = "Snout--vent length (in cm) for female Mexican garter snakes living in crayfish ($n = 35$) and non-crayfish ($n = 41$) regions.") %>%
kable_styling(font_size = 8)
out2 <- prepareSideBySideTable(out)
out2
}
if( knitr::is_html_output() ) {
DT::datatable( SNF,
colnames = c("Crayfish region",
"Not-crayfish region"),
caption = "Snout--vent length (in cm) for female Mexican garter snakes living in crayfish ($n = 35$) and non-crayfish ($n = 41$) regions.",
fillContainer = FALSE, # Make more room, so we don't just have ten values
#filter="top",
#selection="multiple",
#escape=FALSE,
options = list(searching = FALSE) # Remove searching: See: https://stackoverflow.com/questions/35624413/remove-search-option-but-leave-search-columns-option
)
}
```
## Summarising the data and error bar charts {#ErrorBarCharts}
A numerical summary *must* summarise the difference between the means, because the RQ is about this difference.
Both groups should be summarised too.\index{Mean!difference between}
The information can be found using software (Fig.\ \@ref(fig:SnakesSummaryTestjamovi)),\index{Software output!comparing two means} and compiled into a table (Table\ \@ref(tab:SnakesNumericalTest)).
The appropriate summary for graphically summarising the *data* is (for example) a boxplot (Fig.\ \@ref(fig:SnakesErrorbar), left panel).
```{r SnakesSummaryTestjamovi, fig.cap="Software output for the garter-snakes data.", fig.align="center", out.width=c("100%","65%"), fig.show='hold'}
knitr::include_graphics("jamovi/Snakes/Snakes-Test.png")
knitr::include_graphics("jamovi/Snakes/Snakes-Summary.png")
```
```{r SnakesNumericalTest}
SN.summary <- array(NA,
dim = c(3, 4) )
SN.summary[1:2, 1] <- aggregate(SVL ~ Crayfish,
data = SNF,
FUN = "mean")[, 2]
SN.summary[1:2, 2] <- aggregate(SVL ~ Crayfish,
data = SNF,
FUN = "sd")[, 2]
SN.summary[1:2, 3] <- aggregate(SVL ~ Crayfish,
data = SNF,
FUN = "realLength")[, 2]
SN.summary[1:2, 4] <- aggregate(SVL ~ Crayfish,
data = SNF,
FUN = "findStdError")[, 2]
SN.summary[3, 1] <- SN.summary[1, 1] - SN.summary[2, 1]
SN.summary[3, 4] <- sqrt( SN.summary[1, 2]^2/SN.summary[1, 3] +
SN.summary[2, 2]^2/SN.summary[2, 3] )
rownames(SN.summary) <- c("Non-crayfish region",
"Crayfish region",
"Difference")
if( knitr::is_latex_output() ) {
kable(pad(SN.summary,
surroundMaths = TRUE,
targetLength = c(5, 5, 2, 5),
decDigits = c(2, 2, 0, 3)),
format = "latex",
booktabs = TRUE,
longtable = FALSE,
escape = FALSE,
align = "c",
col.names = c("Mean",
"Standard deviation",
"Sample size",
"Standard error"),
caption = "Numerical summaries of SVL (in cm) for female garter snakes in two regions.") %>%
row_spec(0, bold = TRUE) %>%
row_spec(3, italic = TRUE) %>%
row_spec(2, hline_after = TRUE) %>%
kable_styling(font_size = 8)
}
if( knitr::is_html_output() ) {
kable(pad(SN.summary,
surroundMaths = TRUE,
targetLength = c(5, 5, 2, 5),
decDigits = c(2, 2, 0, 3)),
format = "html",
booktabs = TRUE,
longtable = FALSE,
align = "c",
col.names = c("Mean",
"Sample size",
"Standard deviation",
"Standard error"),
caption = "Numerical summaries of SVL (in cm) for female snakes in two regions.")
}
```
```{r, SnakesErrorbar, fig.width=8.75, fig.height=3, fig.cap="Boxplot (left) and error bar chart (right) of SVL for female snakes in two regions.", fig.align="center", out.width='95%'}
ci.lo <- SN.summary[, 1] - 2 * SN.summary[, 4]
ci.hi <- SN.summary[, 1] + 2 * SN.summary[, 4]
par(mfrow = c(1, 2))
boxplot(SVL ~ Crayfish,
data = SNF,
las = 1,
ylim = c(15, 60),
xlab = "Crayfish region?",
main = "Boxplot: Snout--vent length ",
ylab = "SVL (in cm)")
###
plot( range( c( ci.lo, ci.hi) ) ~ c(0.9, 2.1),
data = SNF,
type = "n",
xlim = c(0.5, 2.5),
ylim = c(29, 46),
pch = 19,
axes = FALSE,
main = "Error bar chart:\nSnout--vent length ",
xlab = "Crayfish region?",
ylab = "SVL (in cm)",
sub = "(Error bars are 95% confidence intervals)",
las = 1)
axis(side = 1,
at = 1:2,
labels = c("No",
"Yes"),
las = 1)
axis(side = 2,
las = 1)
box()
mns <- SN.summary[1:2, 1]
points( mns ~ c(1:2),
pch = 19)
arrows(x0 = 1:2,
y0 = ci.lo[1:2],
x1 = 1:2,
y1 = ci.hi[1:2],
length = 0.05,
angle = 90,
code = 3)
```
Since two groups are being compared, subscripts are used to distinguish between the statistics for the two groups; say, Groups\ $1$ and\ $2$ in general (Table\ \@ref(tab:IndSampleNotationHT)).
Using this notation, the *parameter* in the RQ is the difference between population means: $\mu_1 - \mu_2$.
As usual, the population values are unknown, so this is estimated using the statistic $\bar{x}_1 - \bar{x}_2$.
```{r IndSampleNotationHT}
Diff2Notation <- array(dim = c(5, 3))
colnames(Diff2Notation) <- c("Group 1",
"Group 2",
"Comparing groups")
rownames(Diff2Notation) <- c("Sample sizes:",
"Population means:",
"Sample means:",
"Standard deviations:",
"Standard errors:")
if( knitr::is_latex_output() ) {
Diff2Notation[1, ] <- c( "$n_1$",
"$n_2$",
NA)
Diff2Notation[2, ] <- c( "$\\mu_1$",
"$\\mu_2$",
"$\\mu_1 - \\mu_2$")
Diff2Notation[3, ] <- c( "$\\bar{x}_1$",
"$\\bar{x}_2$",
"$\\bar{x}_1 - \\bar{x}_2$")
Diff2Notation[4, ] <- c( "$s_1$",
"$s_2$",
NA)
Diff2Notation[5, ] <- c( "$\\displaystyle\\text{s.e.}(\\bar{x}_1) = \\frac{s_1}{\\sqrt{n_1}}$",
"$\\displaystyle\\text{s.e.}(\\bar{x}_2) = \\frac{s_2}{\\sqrt{n_2}}$",
"$\\displaystyle\\text{s.e.}(\\bar{x}_1 - \\bar{x}_2)$")
kable( Diff2Notation,
format = "latex",
booktabs = TRUE,
align = c("c", "c"),
longtable = FALSE,
escape = FALSE,
linesep = c("\\addlinespace", "", "\\addlinespace", "", ""),
col.names = colnames(Diff2Notation),
caption = "Notation used to distinguish the two independent groups.") %>%
row_spec(0, bold = TRUE) %>%
kable_styling(font_size = 8)
}
if( knitr::is_html_output() ) {
Diff2Notation[1, ] <- c( "$n_1$",
"$n_2$",
NA)
Diff2Notation[2, ] <- c( "$\\mu_1$",
"$\\mu_2$",
"$\\mu_1 - \\mu_2$")
Diff2Notation[3, ] <- c( "$\\bar{x}_1$",
"$\\bar{x}_2$",
"$\\bar{x}_1 - \\bar{x}_2$")
Diff2Notation[4, ] <- c( "$s_1$",
"$s_2$",
NA)
Diff2Notation[5, ] <- c( "$\\displaystyle\\text{s.e.}(\\bar{x}_1) = \\frac{s_1}{\\sqrt{n_1}}$",
"$\\displaystyle\\text{s.e.}(\\bar{x}_2) = \\frac{s_2}{\\sqrt{n_2}}$",
"$\\displaystyle\\text{s.e.}(\\bar{x}_1 - \\bar{x}_2)$")
kable( Diff2Notation,
format = "html",
booktabs = TRUE,
longtable = FALSE,
align = c("c", "c", "c"),
col.names = colnames(Diff2Notation),
caption = "Notation used to distinguish between the two independent groups.") %>%
row_spec(0, bold = TRUE)
}
```
For the garter-snakes data, define the differences as the mean for females snakes living in non-crayfish regions\ ($N$), *minus* the mean for female snakes in crayfish regions\ ($C$): $\mu_N - \mu_C$.
This is the *parameter*.
By this definition, the differences refer to how much larger (on average) the SVL is for snakes living in non-crayfish regions.
::: {.importantBox .important data-latex="{iconmonstr-warning-8-240.png}"}
Here the difference is computed as the mean\ SVL for snakes living in non-crayfish regions, *minus* the mean\ SVL for snakes living in crayfish regions.
Computing the difference as the mean\ SVL for snakes in crayfish regions, *minus* non-crayfish regions is also correct.
You need to be clear about how the difference is computed, and be consistent throughout.
The *meaning* of the conclusions will be the same whichever direction is used.
:::
A useful way to compare the means of two (or more) groups is to display the CIs for the means of the groups being compared in an *error bar chart*.\index{Graphs!error bar charts}
Error bars charts display the expected variation *in the sample means* from sample to sample, while boxplots display the variation *in the individual observations*.
For the garter-snakes data, the error bar chart (Fig.\ \@ref(fig:SnakesErrorbar), right panel) shows the $95$%\ CI for each group; the mean has been added as a dot.
The two CIs for the SVL are (using information from the bottom table in Fig.\ \@ref(fig:SnakesSummaryTestjamovi)):
* \makebox[34mm][l]{Crayfish regions:} $34.171 \pm (2 \times 2.112)$, or from\ $29.94$ to\ $38.40\cms$.
* \makebox[34mm][l]{Non-crayfish regions:} $42.566 \pm (2\times 1.216)$, or from\ $40.13$ to\ $45.00\cms$.
However, the error bar chart, and these CIs, do not give a CI for the *difference* between the two means, as relevant to the RQ.
::: {.example #ErrorBarCharts2 name="Error bar charts"}
@data:ForestBiomass2017 studied the foliage biomass of small-leaved lime trees from three sources: coppices; natural; planted.
Three graphical summaries are shown in Fig.\ \@ref(fig:LimeTreesBoxErrorbar): a boxplot (showing the variation in *individual* trees; left), an error bar chart (showing the variation in the *sample means*; centre) on the same vertical scale as the boxplot, and the same error bar chart using a more appropriate scale for the error bar plot (right).
:::
```{r LimeTreesBoxErrorbar, fig.cap="Boxplot (left) and error bar charts (centre; right) comparing the mean foliage biomass for small-leaved lime trees from three sources (C:\\ Coppice; N:\\ Natural; P:\\ Planted). The centre panel shows an error bar chart using the same vertical scale as the boxplot; the dashed horizontal lines are the limits of the error bar chart on the right. The right error bar chart uses a more appropriate scale on the vertical axis. The solid dots show the mean of the distributions.", fig.align="center", fig.width=7, fig.height=3.0, out.width='100%'}
par( mfrow = c(1, 3))
data(Lime)
lime.mns <- tapply(Lime$Foliage,
Lime$Origin,
"mean")
###
boxplot(Foliage ~ Origin,
data = Lime,
xlab = "Origin",
ylab = "Foliage biomass (kg)",
las = 1,
ylim = c(0, 14),
main = "Boxplot: Foliage biomass\nof individual trees",
names = c("C", "N", "P"),
col = "white")
points(1:3,
lime.mns,
pch = 19)
mns <- with(Lime,
tapply(Foliage,
Origin,
"mean") )
ses <- with(Lime,
tapply(Foliage,
Origin,
function(x){sd(x) / sqrt(length(x))}) )
ci.lo <- mns - ses*2
ci.hi <- mns + ses*2
###
plot( range( c( ci.lo, ci.hi) ) ~ c(1, 3),
type = "n",
ylim = c(0, 14),
xlim = c(0.5, 3.5),
pch = 19,
axes = FALSE,
main = "Error bars chart:\n mean foliage biomass",
xlab = "Origin",
ylab = "Foliage biomass (kg)",
sub = "(Error bars are 95%\ CIs)",
las = 1)
axis(side = 1,
at = 1:3,
labels = c("C", "N", "P"),
las = 1)
axis(side = 2,
las = 1)
box()
points( mns ~ c(1:3),
pch = 19)
arrows(1:3,
mns - 2 * ses,
1:3,
mns + 2 * ses,
length = 0.05,
angle = 90,
code = 3)
# Show the limits of the right-most graph
abline(h = 3.5 + 0.05,
col = "grey",
lty = 2)
abline(h = 1.0 - 0.05,
col = "grey",
lty = 2)
###
plot( range( c( ci.lo, ci.hi) ) ~ c(1, 3),
type = "n",
ylim = c(1, 3.5),
xlim = c(0.5, 3.5),
pch = 19,
axes = FALSE,
main = "Error bars chart:\n mean foliage biomass",
xlab = "Origin",
ylab = "Foliage biomass (kg)",
sub = "(Error bars are 95%\ CIs)",
las = 1)
axis(side = 1,
at = 1:3,
labels = c("C", "N", "P"),
las = 1)
axis(side = 2,
las = 1)
box()
points( mns ~ c(1:3),
pch = 19,
cex = 1.5)
arrows(1:3, mns - 2 * ses,
1:3, mns + 2 * ses,
length = 0.05,
angle = 90,
code = 3)
```
## Confidence intervals for $\mu_1 - \mu_2$ {#CIDiffBetweenMeans}
\index{Sampling distribution!comparing two means}\index{Confidence intervals!comparing two means|(}
Each sample will comprise different snakes, and give different SVLs.
The sample means for each group will differ from sample to sample, and the *difference* between the sample means will be different for each sample.
The *difference* between the sample means varies from sample to sample, and so has a sampling distribution and a standard error.
::: {.definition #DEFSamplingDistributionDiffMeans name="Sampling distribution for the difference between two sample means"}
The *sampling distribution of the difference between two sample means*\ $\bar{x}_1$ and\ $\bar{x}_2$ is (when the appropriate conditions are met; Sect.\ \@ref(ValidityTwoMeans)) described by:
* an approximate normal distribution,
* centred around a sampling mean whose value is\ ${\mu_1} - {\mu_2}$, the difference between the *population* means,
* with a standard deviation, called the standard error of the difference between the means, of $\displaystyle\text{s.e.}(\bar{x}_1 - \bar{x}_2)$.
The standard error for the difference between the means is found using
$$
\text{s.e.}(\bar{x}_1 - \bar{x}_2) = \sqrt{ \text{s.e.}(\bar{x}_1)^2 + \text{s.e.}(\bar{x}_2)^2},
$$
though this value will often be *given* (e.g., on computer output) rather than needing to be computed.
:::
For the garter-snakes data, the differences between the sample means will have:
* an approximate normal distribution,
* centred around the sampling mean whose value is $\mu_N - \mu_C$,
* with a standard deviation, called the *standard error* of the difference, of $\text{s.e.}(\bar{x}_P - \bar{x}_C) = 2.437$.
The standard error of the difference between the means was computed using
$$
\text{s.e.}(\bar{x}_N - \bar{x}_C)
= \sqrt{ \text{s.e.}(\bar{x}_N)^2 + \text{s.e.}(\bar{x}_C)^2}
= \sqrt{ 1.216^2 + 2.1112^2 } = 2.437,
$$
the same value shown in the *second row* of the software output (Fig.\ \@ref(fig:SnakesSummaryTestjamovi)).
The sampling distribution describes how the values of $\bar{x}_N - \bar{x}_C$ vary from sample to sample.
Then, finding a $95$%\ CI for the difference between the mean SVLs is similar to the process used in Chap.\ \@ref(OneMeanConfInterval), since the sampling distribution has an approximate normal distribution:
$$
\text{statistic} \pm \big(\text{multiplier} \times\text{s.e.}(\text{statistic})\big).
$$
When the statistic is $\bar{x}_P - \bar{x}_C$, the approximate $95$%\ CI is
$$
(\bar{x}_N - \bar{x}_C) \pm \big(2 \times \text{s.e.}(\bar{x}_N - \bar{x}_C)\big).
$$
So, in this case, the approximate $95$%\ CI is
$$
8.394 \pm (2 \times 2.437)
$$
or $8.394\pm 4.874$, after rounding appropriately.
We write:
> The difference between mean SVLs is\ $3.52\cms$, shorter for those living in a crayfish region (mean: $34.17\cms$; s.e.: $2.112$; $n = 35$) compared to those *not* living in a crayfish region (mean: $42.57\cms$; s.e.: $1.216$; $n = 41$), with an *approximate* $95$%\ CI for the difference between mean SVLs from $3.52$ to $13.27\cms$.
The plausible values for the difference between the two population means SVLs are between\ $3.52$ to $13.27\cms$ (shorter for those living in crayfish regions).
::: {.importantBox .important data-latex="{iconmonstr-warning-8-240.png}"}
Giving the CI alone is insufficient; the *direction* in which the differences were calculated must be given, so readers know which group had the higher mean.
:::
Output from software often shows *two* CIs for the difference between the two means (Fig.\ \@ref(fig:SnakesSummaryTestjamovi)).
*We will use the results from Welch's test (the second row)*,\index{Welch's test} as this row of output is more general, and makes fewer assumptions.
The information in the second row makes fewer assumptions, and is more widely applicable.
::: {.importantBox .important data-latex="{iconmonstr-warning-8-240.png}"}
Most software gives *two* confidence intervals: one assuming the standard deviations in the two groups are the same (Student's), and another *not* assuming the standard deviations in the two groups are the same (Welch's).
We will use the information that does *not* assume the standard deviations in the two groups are the same.
In the software output in Fig.\ \@ref(fig:SnakesSummaryTestjamovi), this is the second row of the top table (labelled 'Welch's\ $t$').
(The information in both rows are often similar anyway.)
:::
From the output, the $95$%\ CI for the difference is from\ $3.51$ to\ $13.28\cms$.
The *approximate* CI and the *exact* (from software) CIs are only slightly different, as the samples sizes are not too small.
(Recall: the $t$-multiplier of\ $2$ is an approximation, based on the $68$--$95$--$99.7$ rule.)
\index{Confidence intervals!comparing two means|)}
## Hypothesis tests for $\mu_1 - \mu_2$: $t$-test
\index{Hypothesis testing!difference between two means|(}
As always, the null hypothesis is the default 'no difference, no change, no relationship' position; any difference between the parameter and statistic is due to sampling variation (Sect.\ \@ref(AboutHypotheses)).
Hence, the null hypothesis is 'no difference' between the population mean\ SVL of the two groups:
* $H_0$: $\mu_N - \mu_C = 0$ (equivalent to $\mu_N = \mu_C$).
From the RQ, the alternative hypothesis is *two*-tailed:
* $H_1$: $\mu_N - \mu_C\ne 0$ (equivalent to $\mu_N \ne \mu_C$).
The alternative hypothesis proposes that any difference between the *sample* means is because a difference really exists between the *population means*.
The alternative hypothesis is two-tailed, based on the RQ.
The sample mean difference between the SVL in the two groups depends on which one of the many possible samples is randomly obtained, *even if* the difference between the means in the population is zero.
The difference between the sample means is $8.394\cms$, but this difference will vary from sample to sample; that is, *sampling variation* exists.
For the SVL\ data, the sampling distribution of $\bar{x}_N - \bar{x}_C$ can be described as (see Def.\ \@ref(def:DEFSamplingDistributionDiffMeans)):
* an approximate normal distribution,
* centred around the sampling mean whose value is ${\mu_{N}} - {\mu_{C}} = 0$, the difference between the population means (from $H_0$),
* with a standard deviation of $\text{s.e.}(\bar{x}_N - \bar{x}_C) = 2.4368$.
::: {.softwareBox .software data-latex="{iconmonstr-laptop-4-240.png}"}
Most software gives *two* hypothesis test results: one assuming the standard deviations in the two groups are the same, and another *not* assuming the standard deviations in the two groups are the same.
We will use the information that does *not* assume the standard deviations in the two groups are the same.
In the software output in Fig.\ \@ref(fig:SnakesSummaryTestjamovi), this is the second row of the bottom table (labelled 'Welch's\ $t$').
(The information in both rows are often similar anyway.)
:::
The observed difference between sample means, relative to what was expected, is found by computing the test statistic; in this case, a $t$-score.
The software output (Fig.\ \@ref(fig:SnakesSummaryTestjamovi)) gives the $t$-score, but the $t$-score can also be computed using the information in Table\ \@ref(tab:SnakesNumericalTest):\index{Test statistic!t@$t$-score}
\begin{align*}
t
&=
\frac{\text{sample statistic} - \text{mean of sampling distribution (from $H_0$)}}
{\text{standard deviation of sampling distribution}}\\[6pt]
&=
\frac{ (\bar{x}_P - \bar{x}_C) - (\mu_P - \mu_C)}
{\text{s.e.}(\bar{x}_P - \bar{x}_C)}
= \frac{8.39 - 0}{2.4368} = 3.44,
\end{align*}
as in the software output.
A $P$-value determines if the sample statistic is consistent with the assumption (i.e., $H_0$).
Since the $t$-score is large, the $P$-value will be small using the $68$--$95$--$99.7$ rule\index{68@$68$--$95$--$99.7$ rule} (and less than\ $0.003$).
This is confirmed by the software (Fig.\ \@ref(fig:SnakesSummaryTestjamovi)): the two-tailed $P$-value is\ $0.0011$.
A small $P$-value suggests the observations are *inconsistent* with the assumption of no difference (Table\ \@ref(tab:PvaluesInterpretation)), and the difference between the sample means could *not* be reasonably explained by sampling variation, if $\mu_N - \mu_C = 0$.
`r if (knitr::is_latex_output()) '<!--'`
`r if (knitr::is_html_output()){
'Click on the pins in the following image, and describe what the software output tells us.'
}`
<iframe src="https://learningapps.org/watch?v=pd365s6bj22" style="border:0px;width:100%;height:500px" allowfullscreen="true" webkitallowfullscreen="true" mozallowfullscreen="true"></iframe>
`r if (knitr::is_latex_output()) '-->'`
In conclusion, write:
> Strong evidence exists in the sample (two independent samples $t = 3.445$; two-tailed $P = 0.0011$) that the population mean SVL is different for female snakes living in crayfish regions (mean:\ $34.17\ cm$; $n = 35$) and non-crayfish regions (mean:\ $42.57\cms$; $n = 41$;\ $95$%\ CI for the difference: $3.51$ to\ $13.28\cms$ longer for those in non-crayfish regions).
The conclusion contains an *answer to the RQ*, the *evidence* leading to this conclusion ($t = 3.44$; two-tailed $P = 0.0011$), and *sample summary statistics*, including a CI.
\index{Hypothesis testing!difference between two means|)}
## Statistical validity conditions{#ValidityTwoMeans}
\index{Statistical validity (for inference)!compare two means}
As usual, these results apply under certain conditions.
Statistical validity can be assessed using these criteria:
* When *both* samples have $n \ge 25$, the test is statistically valid.
(If the distribution of a sample is highly skewed, the sample size for that sample may need to be larger.)
* When one or both groups have $25$\ or fewer observations, the test is statistically valid only if the *populations* corresponding to both comparison groups have an approximate normal distribution.
The sample size of\ $25$ is a rough figure; some books give other values (such as\ $30$).
This condition ensures that the *distribution of the difference between sample means has an approximate normal distribution* (so that, for example, the $68$--$95$--$99.7$ rule can be used).
The histograms of the *sample data* can be used to determine if normality of the *populations* seems reasonable.
The units of analysis are also assumed to be *independent* (e.g., from a simple random sample).
If the statistical validity conditions are not met, other similar options include using a Mann-Whitney test\index{Mann-Whitney test} [@conover2003practical] or using resampling methods [@efron2021computer].
::: {.example #StatisticalValidityReactionHT name="Statistical validity"}
For the garter-snakes data, both samples sizes exceed\ $25$ ($41$ and\ $35$), so the test is statistically valid.
The data in each group do not need to be normally distributed, since both sample sizes are larger than\ $25$, and the data are not severely skewed (Fig.\ \@ref(fig:SnakesErrorbar), left panel).
:::
## Tests for comparing more than two means: ANOVA {#CompareManyMeans}
Often, more than two means need to be compared.
This requires a different method, called *analysis of variance*\index{Analysis of variance} (or <span style="font-variant:small-caps;">anova</span>).
The details are beyond the scope of this book.
In this section, a very brief overview of using a one-way <span style="font-variant:small-caps;">anova</span> is given, using an example.
Importantly, this example shows that the basic principles of hypothesis testing from Chap.\ \@ref(MoreAboutTests) still apply.
::: {.example #ANOVA name="ANOVA"}
[*Dataset*: `BMI`]
@johnson2021association collected data from hospital outpatients at an Irish hospital (Table\ \@ref(tab:BMIsummary)).
One research question involves comparing the mean number of days per week that patients exercise for more than $30\mins$ (say, $\mu$) according to their smoking status: daily\ ($D$), occasionally\ ($O$) or not at all\ ($N$).
An error bar chart can be used to display the three groups (Fig.\ \@ref(fig:BMIerrorbar)).\index{Graphs!error bar charts}
As per Sect.\ \@ref(AboutHypotheses), the null hypothesis is 'no difference' between the population means:
$$
\text{$H_0$:}\ \mu_D = \mu_O = \mu_N.
$$
The alternative hypothesis is that the three means are not all equal.
This hypothesis encompasses many possibilities: for example, that the three means are *all* different from each other, or that the first is different from the other two (which are the same).
Because the alternative hypothesis encompasses many possibilities, it is difficult to write using symbols, so we write:
$$
\text{$H_1$:}\ \text{Not all means are equal.}
$$
::: {.importantBox .important data-latex="{iconmonstr-warning-8-240.png}"}
For comparing more than two mean, the alternative hypothesis *is always two-tailed*.
:::
Performing an <span style="font-variant:small-caps;">anova</span> using software (Fig.\ \@ref(fig:BMIjamovi)) gives $P = 0.00007$.
(The *test statistic* here is an $F$-score;\index{Test statistic!F@$F$-score} we don't discuss these further.)
The $P$-value in this context means the same as usual (Sect.\ \@ref(AboutPvalues)): there is very strong evidence to support the alternative hypothesis (that the three means are *not* all equal).
While we know the means are not all the same, we do not know *which* group means are different from which other group means.
One option might be to compare all possible combinations of two groups (i.e., the means of groups\ $D$ and\ $O$; the means of groups\ $D$ and\ $N$; the means of groups\ $O$ and\ $N$) using three separate two-sample $t$-tests.
However, this approach increases the probability of declaring a false positive (i.e., of making a Type\ I error; Sect.\ \@ref(TypeErrors)):\index{Type\ I error} *incorrectly* declaring a difference between two sets of means.
The correct approach requires methods beyond this book.
:::
```{r BMIsummary}
data(BMI)
BMItab <- array(dim = c(3, 4))
BMItab[, 1] <- tapply(BMI$exercise,
list(BMI$smoke),
"mean")
BMItab[, 2] <- tapply(BMI$exercise,
list(BMI$smoke),
"sd")
BMItab[, 3] <- tapply(BMI$exercise,
list(BMI$smoke),
"findStdError")
BMItab[, 4] <- tapply(BMI$exercise,
list(BMI$smoke),
"realLength")
rownames(BMItab) <- c("Smokes daily",
"Smokes occasionally",
"Does not smoke")
if( knitr::is_latex_output() ) {
kable( pad(BMItab,
surroundMaths = TRUE,
targetLength = c(4, 4, 5, 2),
decDigits = c(2, 2, 3, 0)),
format = "latex",
booktabs = TRUE,
longtable = FALSE,
escape = FALSE,
col.names = c("Mean",
"Standard deviation",
"Standard error",
"$n$"),
align = "c",
caption = "Number of days per week where patients do more than $30$ mins of exercise.") %>%
column_spec(1, bold = TRUE) %>%
row_spec(0, bold = TRUE) %>%
kable_styling(font_size = 8)
}
if( knitr::is_html_output() ) {
kable(pad(BMItab,
surroundMaths = TRUE,
targetLength = c(4, 4, 5, 2),
decDigits = c(2, 2, 3, 0)),
format = "html",
booktabs = TRUE,
longtable = FALSE,
align = "c",
caption = "Number of days per week where patients do more than $30$ mins of exercise.")
}
```
```{r BMIerrorbar, fig.align="center", fig.cap="The error bar chart for comparing the number of days per week on which people do more than $30\\mins$\\ of exercise, for different smoking groups.", fig.width=5, out.width='60%', fig.height=3.25}
mns <- BMItab[, 1]
ses <- BMItab[, 3]
ci.lo <- mns - ses*2
ci.hi <- mns + ses*2
par( mar = c(5, 5, 4, 2) + 0.1 )
plot( range( c( ci.lo, ci.hi) ) ~ c(1, 3),
type = "n",
ylim = c(0, 4),
xlim = c(0.5, 3.5),
pch = 19,
axes = FALSE,
main = "Error bars chart: average number of\ndays per week of 30 mins exercise",
xlab = "Smoking status",
ylab = "Number of days\nper week",
sub = "(Error bars are 95%\ CIs)",
las = 1)
axis(side = 1,
at = 1:3,
labels = c("Daily", "Occasionally", "Not all"),
las = 1)
axis(side = 2,
las = 1)
box()
points( mns ~ c(1:3),
pch = 19)
arrows(1:3,
mns - 2 * ses,
1:3,
mns + 2 * ses,
length = 0.05,
angle = 90,
code = 3)
```
<span style="font-variant:small-caps;">Anova</span> is a general tool that can be extended beyond just comparing more than two means, and used in many and varied context for the analysis of data.
```{r BMIjamovi, echo=FALSE, fig.cap="Software output for testing hypotheses for the BMI data.", fig.align="center", out.width='55%'}
knitr::include_graphics( "jamovi/BMI/ANOVA.png")
```
## Example: speed signage {#SpeedSignage}
<div style="float:right; width: 222x; border: 1px; padding:10px">
<img src="Illustrations/pexels-pixabay-315939.jpg" width="200px"/>
</div>
To reduce vehicle speeds on freeway exit ramps, @ma2019impacts studied adding additional signage.
At one site (Ningxuan Freeway), speeds were recorded for $38$\ vehicles *before* the extra signage was added, and then for $41$ different vehicles *after* the extra signage was added.
The researchers are hoping that the addition of extra signage will *reduce* the mean speed of the vehicles.
The RQ is:
> At this freeway exit, does the mean vehicle speed *reduce* after extra signage is added?
The data are *not* paired: different vehicles are measured before\ ($B$) and after\ ($A$) the extra signage is added.
Define $\mu$ as the mean speed (in km.h^$-1$^) on the exit ramp, and the parameter as $\mu_B - \mu_A$, the *reduction* in the mean speed.
The data can be summarised (Table\ \@ref(tab:SignageSummary)) using the software output (Fig.\ \@ref(fig:SpeedjamoviCI2)), where
$$
\text{s.e.}(\bar{x}_B - \bar{x}_A)
= \sqrt{ \text{s.e.}(\bar{x}_B)^2 + \text{s.e.}(\bar{x}_A)^2}
= \sqrt{ 2.140^2 + 2.051^2} = 2.965,
$$
as in the output table (Row\ 2).
A boxplot of the data is shown in Fig.\ \@ref(fig:SignageErrorBar) (left panel), and an error bar chart in Fig.\ \@ref(fig:SignageErrorBar) (right panel).
```{r SpeedjamoviCI2, fig.cap="Software output for the speed data.", fig.align="center", out.width="100%"}
knitr::include_graphics("jamovi/Speed/Speed-ALL.png")
```
```{r SignageSummary}
data(Speed)
Speed$When <- factor(Speed$When,
levels = c("Before", "After"),
ordered = TRUE)
Speed2 <- data.frame( Before = Speed$Speed[ Speed$When == "Before" ][1:10],
After = Speed$Speed[ Speed$When == "After" ][1:10])
SignageSummary <- array(dim = c(3, 5))
rownames(SignageSummary) <- c("Before",
"After",
"Speed reduction")
colnames(SignageSummary) <- c("Mean",
"Median",
"Standard deviation",
"Standard error",
"Sample size")
Speed.n <- tapply(Speed$Speed,
list(Speed$When),
"length")
Speed.sd <- tapply(Speed$Speed,
list(Speed$When),
"sd")
SignageSummary[1:2, 1] <- round( tapply(Speed$Speed,
list(Speed$When),
"mean"),
2)
SignageSummary[1:2, 2] <- tapply(Speed$Speed,
list(Speed$When),
"median")
SignageSummary[1:2, 3] <- round( Speed.sd, 3)
SignageSummary[1:2, 4] <- tapply(Speed$Speed,
list(Speed$When),
"findStdError")
SignageSummary[1:2, 5] <- Speed.n
SignageSummary[3, 1] <- SignageSummary[1, 1] - SignageSummary[2, 1]
sdB <- round( Speed.sd[1], 3)
sdA <- round( Speed.sd[2], 3)
SignageSummary[3, 4] <- round( sqrt(SignageSummary[1, 4]^2 + SignageSummary[2, 4]^2 ), 3)
if( knitr::is_latex_output() ) {
kable(pad(SignageSummary,
surroundMaths = TRUE,
targetLength = c(5, 4, 6, 4, 2),
decDigits = c(2, 1, 3, 3, 0)),
format = "latex",
longtable = FALSE,
booktabs = TRUE,
escape = FALSE,
align = "c",
col.names = c("Mean",
"Median",
"Standard deviation",
"Standard error",
"Sample size"),
caption = "The signage data summary (in km.h$^{-1}$)."
) %>%
row_spec(0, bold = TRUE) %>%
row_spec(3, italic = TRUE) %>%
row_spec(2, hline_after = TRUE) %>%
kable_styling(font_size = 8)
}
if( knitr::is_html_output() ) {
kable(pad(SignageSummary,
surroundMaths = TRUE,
targetLength = c(5, 4, 6, 4, 2),
decDigits = c(2, 1, 3, 1, 0)),
format = "html",
longtable = FALSE,
booktabs = TRUE,
align = "c",
col.names = c("Mean",
"Median",
"Standard deviation",
"Standard error",
"Sample size"),
caption = "The signage data summary (in km.h$^{-1}$).") %>%
kable_styling(full_width = FALSE)
}
```
```{r SignageErrorBar, fig.cap="Boxplot (left) and error bar chart (right) showing the mean speed before and after the addition of extra signage, and the $95$\\%\ CIs. The vertical scales on the two graphs are different.", fig.align="center", out.width='90%',fig.width=8.0, fig.height=3.25}
SignageSummary <- array(dim = c(3, 5))
rownames(SignageSummary) <- c("Before",
"After",
"Speed reduction")
colnames(SignageSummary) <- c("Mean",
"Median",
"Standard deviation",
"IQR",
"Sample size")
Speed.n <- tapply(Speed$Speed,
list(Speed$When),
"length")
Speed.sd <- tapply(Speed$Speed,
list(Speed$When),
"sd")
SignageSummary[1:2, 1] <- round( tapply(Speed$Speed,
list(Speed$When),
"mean"),
2)
SignageSummary[1:2, 2] <- tapply(Speed$Speed,
list(Speed$When),
"median")
SignageSummary[1:2, 3] <- round( Speed.sd, 2)
SignageSummary[1:2, 4] <- tapply(Speed$Speed,
list(Speed$When),
"IQR")
SignageSummary[1:2, 5] <- Speed.n
SignageSummary[3, 1] <- SignageSummary[1, 1] -
SignageSummary[2, 1]
sdB <- round( Speed.sd[1], 2)
sdA <- round( Speed.sd[2], 2)
SignageSummary[3, 4] <- NA
SignageSummary[3, 2] <- NA
###
par( mfrow = c(1, 2),
mar = c(5.1, 5.1, 4.1, 2.1) )
boxplot( Speed ~ When,
data = Speed,
# ylab = "Vehicle speed (km/h)",
ylab = expression(Speeds~"("*km*"."*h^{-1}*")"),
xlab = "When measured",
main = "Speed before and after\nextra signage added",
ylim = c(60, 130),
las = 1,
col = plot.colour)
bit <- 0.05
mns <- SignageSummary[1:2, 1]
se <- SignageSummary[1:2, 3] / sqrt(SignageSummary[1:2, 5] )
ci.lo <- mns - 2 * se
ci.hi <- mns + 2 * se
plot( 1:2,
mns,
pch = 19,
ylim = c(85, 105 ),
xlim = c(0.4, 2.6),
axes = FALSE,
xlab = "",
# ylab = "Vehicle speed (km/h)",
ylab = expression(Speeds~"("*km*"."*h^{-1}*")"),
main = "Mean speed before and after\nextra signage added",
sub = "(Error bars are the 95%\ CI)",
type = "p")
axis(side = 1,
at = 1:2,
las = 1,
labels = c("Before",
"After"))
axis(side = 2,
las = 1)
box()