frequency-and-count-data.html

<!DOCTYPE html>
<html lang="" xml:lang="">
<head>

  <meta charset="utf-8" />
  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
  <title>Chapter 28 Frequency and count data | Statistical Techniques for Biological and Environmental Sciences</title>
  <meta name="description" content="This is a lab book for the University of Stirling second year undergraduate Biological and Environmental Sciences statistics module." />
  <meta name="generator" content="bookdown 0.27 and GitBook 2.6.7" />

  <meta property="og:title" content="Chapter 28 Frequency and count data | Statistical Techniques for Biological and Environmental Sciences" />
  <meta property="og:type" content="book" />
  
  <meta property="og:description" content="This is a lab book for the University of Stirling second year undergraduate Biological and Environmental Sciences statistics module." />
  <meta name="github-repo" content="bradduthie/statistical_techniques" />

  <meta name="twitter:card" content="summary" />
  <meta name="twitter:title" content="Chapter 28 Frequency and count data | Statistical Techniques for Biological and Environmental Sciences" />
  
  <meta name="twitter:description" content="This is a lab book for the University of Stirling second year undergraduate Biological and Environmental Sciences statistics module." />
  

<meta name="author" content="Brad Duthie" />


<meta name="date" content="2023-03-18" />

  <meta name="viewport" content="width=device-width, initial-scale=1" />
  <meta name="apple-mobile-web-app-capable" content="yes" />
  <meta name="apple-mobile-web-app-status-bar-style" content="black" />
  
  
<link rel="prev" href="Week9.html"/>
<link rel="next" href="correlation.html"/>
<script src="libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/fuse.js@6.4.6/dist/fuse.min.js"></script>
<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />


<link href="libs/anchor-sections-1.1.0/anchor-sections.css" rel="stylesheet" />
<link href="libs/anchor-sections-1.1.0/anchor-sections-hash.css" rel="stylesheet" />
<script src="libs/anchor-sections-1.1.0/anchor-sections.js"></script>


<style type="text/css">
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
  { counter-reset: source-line 0; }
pre.numberSource code > span
  { position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
  { content: counter(source-line);
    position: relative; left: -1em; text-align: right; vertical-align: baseline;
    border: none; display: inline-block;
    -webkit-touch-callout: none; -webkit-user-select: none;
    -khtml-user-select: none; -moz-user-select: none;
    -ms-user-select: none; user-select: none;
    padding: 0 4px; width: 4em;
  }
pre.numberSource { margin-left: 3em;  padding-left: 4px; }
div.sourceCode
  {   }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { font-weight: bold; } /* Alert */
code span.an { font-style: italic; } /* Annotation */
code span.cf { font-weight: bold; } /* ControlFlow */
code span.co { font-style: italic; } /* Comment */
code span.cv { font-style: italic; } /* CommentVar */
code span.do { font-style: italic; } /* Documentation */
code span.dt { text-decoration: underline; } /* DataType */
code span.er { font-weight: bold; } /* Error */
code span.in { font-style: italic; } /* Information */
code span.kw { font-weight: bold; } /* Keyword */
code span.pp { font-weight: bold; } /* Preprocessor */
code span.wa { font-style: italic; } /* Warning */
</style>

<style type="text/css">
/* Used with Pandoc 2.11+ new --citeproc when CSL is used */
div.csl-bib-body { }
div.csl-entry {
  clear: both;
}
.hanging div.csl-entry {
  margin-left:2em;
  text-indent:-2em;
}
div.csl-left-margin {
  min-width:2em;
  float:left;
}
div.csl-right-inline {
  margin-left:2em;
  padding-left:1em;
}
div.csl-indent {
  margin-left: 2em;
}
</style>

<link rel="stylesheet" href="style.css" type="text/css" />
</head>

<body>


  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">

    <div class="book-summary">
      <nav role="navigation">

<ul class="summary">
<li><a href="./">Statistical Techniques</a></li>

<li class="divider"></li>
<li class="chapter" data-level="" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i>Preface</a>
<ul>
<li class="chapter" data-level="" data-path="index.html"><a href="index.html#why-this-module-is-important"><i class="fa fa-check"></i>Why this module is important</a></li>
<li class="chapter" data-level="" data-path="index.html"><a href="index.html#ILOs"><i class="fa fa-check"></i>Intended learning outcomes (ILOs)</a></li>
<li class="chapter" data-level="" data-path="index.html"><a href="index.html#accessibility"><i class="fa fa-check"></i>Accessibility</a></li>
<li class="chapter" data-level="" data-path="index.html"><a href="index.html#teaching_overview"><i class="fa fa-check"></i>Teaching overview</a>
<ul>
<li class="chapter" data-level="" data-path="index.html"><a href="index.html#book_chapters"><i class="fa fa-check"></i>Book chapters</a></li>
<li class="chapter" data-level="" data-path="index.html"><a href="index.html#additional_readings"><i class="fa fa-check"></i>Additional readings</a></li>
</ul></li>
<li class="chapter" data-level="" data-path="index.html"><a href="index.html#Canvas"><i class="fa fa-check"></i>Canvas</a></li>
<li class="chapter" data-level="" data-path="index.html"><a href="index.html#assessment-overview"><i class="fa fa-check"></i>Assessment overview</a>
<ul>
<li class="chapter" data-level="" data-path="index.html"><a href="index.html#tests"><i class="fa fa-check"></i>Tests</a></li>
<li class="chapter" data-level="" data-path="index.html"><a href="index.html#exams"><i class="fa fa-check"></i>Exams</a></li>
<li class="chapter" data-level="" data-path="index.html"><a href="index.html#feedback"><i class="fa fa-check"></i>Feedback</a></li>
<li class="chapter" data-level="" data-path="index.html"><a href="index.html#extenuating_circumstances"><i class="fa fa-check"></i>Extenuating circumstances</a></li>
</ul></li>
<li class="chapter" data-level="" data-path="index.html"><a href="index.html#practicals"><i class="fa fa-check"></i>Practicals</a></li>
<li class="chapter" data-level="" data-path="index.html"><a href="index.html#help"><i class="fa fa-check"></i>Optional help hours</a></li>
<li class="chapter" data-level="" data-path="index.html"><a href="index.html#jamovi"><i class="fa fa-check"></i>Jamovi statistical software</a></li>
<li class="chapter" data-level="" data-path="index.html"><a href="index.html#timetable"><i class="fa fa-check"></i>Timetable</a></li>
<li class="chapter" data-level="" data-path="index.html"><a href="index.html#license"><i class="fa fa-check"></i>License</a></li>
</ul></li>
<li class="part"><span><b>I Background mathematics and data organisation</b></span></li>
<li class="chapter" data-level="" data-path="Week1.html"><a href="Week1.html"><i class="fa fa-check"></i>Week 1 Overview</a></li>
<li class="chapter" data-level="1" data-path="Chapter_1.html"><a href="Chapter_1.html"><i class="fa fa-check"></i><b>1</b> Background mathematics</a>
<ul>
<li class="chapter" data-level="1.1" data-path="Chapter_1.html"><a href="Chapter_1.html#numbers-and-operations"><i class="fa fa-check"></i><b>1.1</b> Numbers and operations</a></li>
<li class="chapter" data-level="1.2" data-path="Chapter_1.html"><a href="Chapter_1.html#logarithms"><i class="fa fa-check"></i><b>1.2</b> Logarithms</a></li>
<li class="chapter" data-level="1.3" data-path="Chapter_1.html"><a href="Chapter_1.html#order-of-operations"><i class="fa fa-check"></i><b>1.3</b> Order of operations</a></li>
</ul></li>
<li class="chapter" data-level="2" data-path="Chapter_2.html"><a href="Chapter_2.html"><i class="fa fa-check"></i><b>2</b> Data organisation</a>
<ul>
<li class="chapter" data-level="2.1" data-path="Chapter_2.html"><a href="Chapter_2.html#tidy-data"><i class="fa fa-check"></i><b>2.1</b> Tidy data</a></li>
<li class="chapter" data-level="2.2" data-path="Chapter_2.html"><a href="Chapter_2.html#data-files"><i class="fa fa-check"></i><b>2.2</b> Data files</a></li>
<li class="chapter" data-level="2.3" data-path="Chapter_2.html"><a href="Chapter_2.html#managing-data-files"><i class="fa fa-check"></i><b>2.3</b> Managing data files</a></li>
</ul></li>
<li class="chapter" data-level="3" data-path="Chapter_3.html"><a href="Chapter_3.html"><i class="fa fa-check"></i><b>3</b> Practical: Preparing data</a>
<ul>
<li class="chapter" data-level="3.1" data-path="Chapter_3.html"><a href="Chapter_3.html#exercise-1-transferring-data-to-a-spreadsheet"><i class="fa fa-check"></i><b>3.1</b> Exercise 1: Transferring data to a spreadsheet</a></li>
<li class="chapter" data-level="3.2" data-path="Chapter_3.html"><a href="Chapter_3.html#exercise-2-making-spreadsheet-data-tidy"><i class="fa fa-check"></i><b>3.2</b> Exercise 2: Making spreadsheet data tidy</a></li>
<li class="chapter" data-level="3.3" data-path="Chapter_3.html"><a href="Chapter_3.html#exercise-3-making-data-tidy-again"><i class="fa fa-check"></i><b>3.3</b> Exercise 3: Making data tidy again</a></li>
<li class="chapter" data-level="3.4" data-path="Chapter_3.html"><a href="Chapter_3.html#exercise-4-tidy-data-and-spreadsheet-calculations"><i class="fa fa-check"></i><b>3.4</b> Exercise 4: Tidy data and spreadsheet calculations</a></li>
<li class="chapter" data-level="3.5" data-path="Chapter_3.html"><a href="Chapter_3.html#summary"><i class="fa fa-check"></i><b>3.5</b> Summary</a></li>
</ul></li>
<li class="part"><span><b>II Statistical concepts</b></span></li>
<li class="chapter" data-level="" data-path="Week2.html"><a href="Week2.html"><i class="fa fa-check"></i>Week 2 Overview</a></li>
<li class="chapter" data-level="4" data-path="Chapter_4.html"><a href="Chapter_4.html"><i class="fa fa-check"></i><b>4</b> Populations and samples</a></li>
<li class="chapter" data-level="5" data-path="Chapter_5.html"><a href="Chapter_5.html"><i class="fa fa-check"></i><b>5</b> Types of variables</a></li>
<li class="chapter" data-level="6" data-path="Chapter_6.html"><a href="Chapter_6.html"><i class="fa fa-check"></i><b>6</b> Accuracy, precision, and units</a>
<ul>
<li class="chapter" data-level="6.1" data-path="Chapter_6.html"><a href="Chapter_6.html#accuracy"><i class="fa fa-check"></i><b>6.1</b> Accuracy</a></li>
<li class="chapter" data-level="6.2" data-path="Chapter_6.html"><a href="Chapter_6.html#precision"><i class="fa fa-check"></i><b>6.2</b> Precision</a></li>
<li class="chapter" data-level="6.3" data-path="Chapter_6.html"><a href="Chapter_6.html#systems-of-units"><i class="fa fa-check"></i><b>6.3</b> Systems of units</a></li>
<li class="chapter" data-level="6.4" data-path="Chapter_6.html"><a href="Chapter_6.html#other-examples-of-units"><i class="fa fa-check"></i><b>6.4</b> Other examples of units</a>
<ul>
<li class="chapter" data-level="6.4.1" data-path="Chapter_6.html"><a href="Chapter_6.html#units-of-density"><i class="fa fa-check"></i><b>6.4.1</b> Units of density</a></li>
<li class="chapter" data-level="6.4.2" data-path="Chapter_6.html"><a href="Chapter_6.html#mass-of-metal-discharged-from-a-catchment"><i class="fa fa-check"></i><b>6.4.2</b> Mass of metal discharged from a catchment</a></li>
<li class="chapter" data-level="6.4.3" data-path="Chapter_6.html"><a href="Chapter_6.html#soil-carbon-inventories"><i class="fa fa-check"></i><b>6.4.3</b> Soil carbon inventories</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="7" data-path="Chapter_7.html"><a href="Chapter_7.html"><i class="fa fa-check"></i><b>7</b> Uncertainty propogation</a>
<ul>
<li class="chapter" data-level="7.1" data-path="Chapter_7.html"><a href="Chapter_7.html#adding-or-subtracting-errors"><i class="fa fa-check"></i><b>7.1</b> Adding or subtracting errors</a></li>
<li class="chapter" data-level="7.2" data-path="Chapter_7.html"><a href="Chapter_7.html#multiplying-or-dividing-errors"><i class="fa fa-check"></i><b>7.2</b> Multiplying or dividing errors</a></li>
<li class="chapter" data-level="7.3" data-path="Chapter_7.html"><a href="Chapter_7.html#applying-formulas-for-combining-errors"><i class="fa fa-check"></i><b>7.3</b> Applying formulas for combining errors</a></li>
</ul></li>
<li class="chapter" data-level="8" data-path="Chapter_8.html"><a href="Chapter_8.html"><i class="fa fa-check"></i><b>8</b> Practical. Introduction to Jamovi</a>
<ul>
<li class="chapter" data-level="8.1" data-path="Chapter_8.html"><a href="Chapter_8.html#summary_statistics_02"><i class="fa fa-check"></i><b>8.1</b> Exercise for summary statistics</a></li>
<li class="chapter" data-level="8.2" data-path="Chapter_8.html"><a href="Chapter_8.html#transforming_variables_02"><i class="fa fa-check"></i><b>8.2</b> Exercise on transforming variables</a></li>
<li class="chapter" data-level="8.3" data-path="Chapter_8.html"><a href="Chapter_8.html#computing_variables_02"><i class="fa fa-check"></i><b>8.3</b> Exercise on computing variables</a></li>
<li class="chapter" data-level="8.4" data-path="Chapter_8.html"><a href="Chapter_8.html#summary-1"><i class="fa fa-check"></i><b>8.4</b> Summary</a></li>
</ul></li>
<li class="part"><span><b>III Summary statistics</b></span></li>
<li class="chapter" data-level="" data-path="Week3.html"><a href="Week3.html"><i class="fa fa-check"></i>Week 3 Overview</a></li>
<li class="chapter" data-level="9" data-path="Chapter_9.html"><a href="Chapter_9.html"><i class="fa fa-check"></i><b>9</b> Decimal places, significant figures, and rounding</a>
<ul>
<li class="chapter" data-level="9.1" data-path="Chapter_9.html"><a href="Chapter_9.html#decimal-places-and-significant-figures"><i class="fa fa-check"></i><b>9.1</b> Decimal places and significant figures</a></li>
<li class="chapter" data-level="9.2" data-path="Chapter_9.html"><a href="Chapter_9.html#rounding"><i class="fa fa-check"></i><b>9.2</b> Rounding</a></li>
</ul></li>
<li class="chapter" data-level="10" data-path="Chapter_10.html"><a href="Chapter_10.html"><i class="fa fa-check"></i><b>10</b> Graphs</a>
<ul>
<li class="chapter" data-level="10.1" data-path="Chapter_10.html"><a href="Chapter_10.html#histograms"><i class="fa fa-check"></i><b>10.1</b> Histograms</a></li>
<li class="chapter" data-level="10.2" data-path="Chapter_10.html"><a href="Chapter_10.html#barplots-and-pie-charts"><i class="fa fa-check"></i><b>10.2</b> Barplots and pie charts</a></li>
<li class="chapter" data-level="10.3" data-path="Chapter_10.html"><a href="Chapter_10.html#box-whisker-plots"><i class="fa fa-check"></i><b>10.3</b> Box-whisker plots</a></li>
</ul></li>
<li class="chapter" data-level="11" data-path="Chapter_11.html"><a href="Chapter_11.html"><i class="fa fa-check"></i><b>11</b> Measures of central tendency</a>
<ul>
<li class="chapter" data-level="11.1" data-path="Chapter_11.html"><a href="Chapter_11.html#the-mean"><i class="fa fa-check"></i><b>11.1</b> The mean</a></li>
<li class="chapter" data-level="11.2" data-path="Chapter_11.html"><a href="Chapter_11.html#the-mode"><i class="fa fa-check"></i><b>11.2</b> The mode</a></li>
<li class="chapter" data-level="11.3" data-path="Chapter_11.html"><a href="Chapter_11.html#the-median-and-quantiles"><i class="fa fa-check"></i><b>11.3</b> The median and quantiles</a></li>
</ul></li>
<li class="chapter" data-level="12" data-path="Chapter_12.html"><a href="Chapter_12.html"><i class="fa fa-check"></i><b>12</b> Measures of spread</a>
<ul>
<li class="chapter" data-level="12.1" data-path="Chapter_12.html"><a href="Chapter_12.html#the-range"><i class="fa fa-check"></i><b>12.1</b> The range</a></li>
<li class="chapter" data-level="12.2" data-path="Chapter_12.html"><a href="Chapter_12.html#the-inter-quartile-range"><i class="fa fa-check"></i><b>12.2</b> The inter-quartile range</a></li>
<li class="chapter" data-level="12.3" data-path="Chapter_12.html"><a href="Chapter_12.html#the-variance"><i class="fa fa-check"></i><b>12.3</b> The variance</a></li>
<li class="chapter" data-level="12.4" data-path="Chapter_12.html"><a href="Chapter_12.html#the-standard-deviation"><i class="fa fa-check"></i><b>12.4</b> The standard deviation</a></li>
<li class="chapter" data-level="12.5" data-path="Chapter_12.html"><a href="Chapter_12.html#the-coefficient-of-variation"><i class="fa fa-check"></i><b>12.5</b> The coefficient of variation</a></li>
<li class="chapter" data-level="12.6" data-path="Chapter_12.html"><a href="Chapter_12.html#the-standard-error"><i class="fa fa-check"></i><b>12.6</b> The standard error</a></li>
</ul></li>
<li class="chapter" data-level="13" data-path="Chapter_13.html"><a href="Chapter_13.html"><i class="fa fa-check"></i><b>13</b> <em>Practical</em>. Plotting and statistical summaries in Jamovi</a>
<ul>
<li class="chapter" data-level="13.1" data-path="Chapter_13.html"><a href="Chapter_13.html#reorganise-the-dataset-into-a-tidy-format"><i class="fa fa-check"></i><b>13.1</b> Reorganise the dataset into a tidy format</a></li>
<li class="chapter" data-level="13.2" data-path="Chapter_13.html"><a href="Chapter_13.html#histograms-and-box-whisker-plots"><i class="fa fa-check"></i><b>13.2</b> Histograms and box-whisker plots</a></li>
<li class="chapter" data-level="13.3" data-path="Chapter_13.html"><a href="Chapter_13.html#calculate-summary-statistics"><i class="fa fa-check"></i><b>13.3</b> Calculate summary statistics</a></li>
<li class="chapter" data-level="13.4" data-path="Chapter_13.html"><a href="Chapter_13.html#reporting-decimals-and-significant-figures"><i class="fa fa-check"></i><b>13.4</b> Reporting decimals and significant figures</a></li>
<li class="chapter" data-level="13.5" data-path="Chapter_13.html"><a href="Chapter_13.html#comparing-across-sites"><i class="fa fa-check"></i><b>13.5</b> Comparing across sites</a></li>
</ul></li>
<li class="part"><span><b>IV Probability models and the Central Limit Theorem</b></span></li>
<li class="chapter" data-level="" data-path="Week4.html"><a href="Week4.html"><i class="fa fa-check"></i>Week 4 Overview</a></li>
<li class="chapter" data-level="14" data-path="Chapter_14.html"><a href="Chapter_14.html"><i class="fa fa-check"></i><b>14</b> Introduction to probability models</a>
<ul>
<li class="chapter" data-level="14.1" data-path="Chapter_14.html"><a href="Chapter_14.html#an-instructive-example"><i class="fa fa-check"></i><b>14.1</b> An instructive example</a></li>
<li class="chapter" data-level="14.2" data-path="Chapter_14.html"><a href="Chapter_14.html#biological-applications"><i class="fa fa-check"></i><b>14.2</b> Biological applications</a></li>
<li class="chapter" data-level="14.3" data-path="Chapter_14.html"><a href="Chapter_14.html#sampling-with-and-without-replacement"><i class="fa fa-check"></i><b>14.3</b> Sampling with and without replacement</a></li>
<li class="chapter" data-level="14.4" data-path="Chapter_14.html"><a href="Chapter_14.html#probability-distributions"><i class="fa fa-check"></i><b>14.4</b> Probability distributions</a>
<ul>
<li class="chapter" data-level="14.4.1" data-path="Chapter_14.html"><a href="Chapter_14.html#binomial-distribution"><i class="fa fa-check"></i><b>14.4.1</b> Binomial distribution</a></li>
<li class="chapter" data-level="14.4.2" data-path="Chapter_14.html"><a href="Chapter_14.html#poisson-distribution"><i class="fa fa-check"></i><b>14.4.2</b> Poisson distribution</a></li>
<li class="chapter" data-level="14.4.3" data-path="Chapter_14.html"><a href="Chapter_14.html#uniform-distribution"><i class="fa fa-check"></i><b>14.4.3</b> Uniform distribution</a></li>
<li class="chapter" data-level="14.4.4" data-path="Chapter_14.html"><a href="Chapter_14.html#normal-distribution"><i class="fa fa-check"></i><b>14.4.4</b> Normal distribution</a></li>
</ul></li>
<li class="chapter" data-level="14.5" data-path="Chapter_14.html"><a href="Chapter_14.html#summary-2"><i class="fa fa-check"></i><b>14.5</b> Summary</a></li>
</ul></li>
<li class="chapter" data-level="15" data-path="Chapter_15.html"><a href="Chapter_15.html"><i class="fa fa-check"></i><b>15</b> The Central Limit Theorem (CLT)</a>
<ul>
<li class="chapter" data-level="15.1" data-path="Chapter_15.html"><a href="Chapter_15.html#the-distribution-of-means-is-normal"><i class="fa fa-check"></i><b>15.1</b> The distribution of means is normal</a></li>
<li class="chapter" data-level="15.2" data-path="Chapter_15.html"><a href="Chapter_15.html#probability-and-z-scores"><i class="fa fa-check"></i><b>15.2</b> Probability and z-scores</a></li>
</ul></li>
<li class="chapter" data-level="16" data-path="Chapter_16.html"><a href="Chapter_16.html"><i class="fa fa-check"></i><b>16</b> <em>Practical</em>. Probability and simulation</a>
<ul>
<li class="chapter" data-level="16.1" data-path="Chapter_16.html"><a href="Chapter_16.html#probabilities-from-a-dataset"><i class="fa fa-check"></i><b>16.1</b> Probabilities from a dataset</a></li>
<li class="chapter" data-level="16.2" data-path="Chapter_16.html"><a href="Chapter_16.html#probabilities-from-a-normal-distribution"><i class="fa fa-check"></i><b>16.2</b> Probabilities from a normal distribution</a></li>
<li class="chapter" data-level="16.3" data-path="Chapter_16.html"><a href="Chapter_16.html#central-limit-theorem"><i class="fa fa-check"></i><b>16.3</b> Central limit theorem</a></li>
</ul></li>
<li class="part"><span><b>V Statistical inference</b></span></li>
<li class="chapter" data-level="" data-path="Week5.html"><a href="Week5.html"><i class="fa fa-check"></i>Week 5 Overview</a></li>
<li class="chapter" data-level="17" data-path="Chapter_17.html"><a href="Chapter_17.html"><i class="fa fa-check"></i><b>17</b> Confidence intervals (CIs)</a>
<ul>
<li class="chapter" data-level="17.1" data-path="Chapter_17.html"><a href="Chapter_17.html#normal-distribution-cis"><i class="fa fa-check"></i><b>17.1</b> Normal distribution CIs</a></li>
<li class="chapter" data-level="17.2" data-path="Chapter_17.html"><a href="Chapter_17.html#binomial-distribution-cis"><i class="fa fa-check"></i><b>17.2</b> Binomial distribution CIs</a></li>
</ul></li>
<li class="chapter" data-level="18" data-path="Chapter_18.html"><a href="Chapter_18.html"><i class="fa fa-check"></i><b>18</b> The t-interval</a></li>
<li class="chapter" data-level="19" data-path="Chapter_19.html"><a href="Chapter_19.html"><i class="fa fa-check"></i><b>19</b> <em>Practical</em>. z- and t- intervals</a>
<ul>
<li class="chapter" data-level="19.1" data-path="Chapter_19.html"><a href="Chapter_19.html#confidence-intervals-with-distraction"><i class="fa fa-check"></i><b>19.1</b> Confidence intervals with distrACTION</a></li>
<li class="chapter" data-level="19.2" data-path="Chapter_19.html"><a href="Chapter_19.html#confidence-intervals-from-z--and-t-scores"><i class="fa fa-check"></i><b>19.2</b> Confidence intervals from z- and t-scores</a></li>
<li class="chapter" data-level="19.3" data-path="Chapter_19.html"><a href="Chapter_19.html#confidence-intervals-for-different-sample-sizes-t--and-z-"><i class="fa fa-check"></i><b>19.3</b> Confidence intervals for different sample sizes (t- and z-)</a></li>
<li class="chapter" data-level="19.4" data-path="Chapter_19.html"><a href="Chapter_19.html#proportion-confidence-intervals"><i class="fa fa-check"></i><b>19.4</b> Proportion confidence intervals</a></li>
<li class="chapter" data-level="19.5" data-path="Chapter_19.html"><a href="Chapter_19.html#another-proportion-confidence-interval"><i class="fa fa-check"></i><b>19.5</b> Another proportion confidence interval</a></li>
</ul></li>
<li class="part"><span><b>VI Hypothesis testing</b></span></li>
<li class="chapter" data-level="" data-path="Week6.html"><a href="Week6.html"><i class="fa fa-check"></i>Week 6 Overview</a></li>
<li class="chapter" data-level="20" data-path="Chapter_20.html"><a href="Chapter_20.html"><i class="fa fa-check"></i><b>20</b> What is hypothesis testing?</a>
<ul>
<li class="chapter" data-level="20.1" data-path="Chapter_20.html"><a href="Chapter_20.html#how-ridiculous-is-our-hypothesis"><i class="fa fa-check"></i><b>20.1</b> How ridiculous is our hypothesis?</a></li>
<li class="chapter" data-level="20.2" data-path="Chapter_20.html"><a href="Chapter_20.html#statistical-hypothesis-testing"><i class="fa fa-check"></i><b>20.2</b> Statistical hypothesis testing</a></li>
<li class="chapter" data-level="20.3" data-path="Chapter_20.html"><a href="Chapter_20.html#p-values-false-positives-and-power"><i class="fa fa-check"></i><b>20.3</b> P-values, false positives, and power</a></li>
</ul></li>
<li class="chapter" data-level="21" data-path="Chapter_21.html"><a href="Chapter_21.html"><i class="fa fa-check"></i><b>21</b> The t-test</a>
<ul>
<li class="chapter" data-level="21.1" data-path="Chapter_21.html"><a href="Chapter_21.html#one-sample-t-test"><i class="fa fa-check"></i><b>21.1</b> One sample t-test</a></li>
<li class="chapter" data-level="21.2" data-path="Chapter_21.html"><a href="Chapter_21.html#independent-samples-t-test"><i class="fa fa-check"></i><b>21.2</b> Independent samples t-test</a></li>
<li class="chapter" data-level="21.3" data-path="Chapter_21.html"><a href="Chapter_21.html#paired-sample-t-test"><i class="fa fa-check"></i><b>21.3</b> Paired sample t-test</a></li>
<li class="chapter" data-level="21.4" data-path="Chapter_21.html"><a href="Chapter_21.html#assumptions-of-t-tests"><i class="fa fa-check"></i><b>21.4</b> Assumptions of t-tests</a></li>
<li class="chapter" data-level="21.5" data-path="Chapter_21.html"><a href="Chapter_21.html#non-parametric-alternatives"><i class="fa fa-check"></i><b>21.5</b> Non-parametric alternatives</a>
<ul>
<li class="chapter" data-level="21.5.1" data-path="Chapter_21.html"><a href="Chapter_21.html#wilcoxon-test"><i class="fa fa-check"></i><b>21.5.1</b> Wilcoxon test</a></li>
<li class="chapter" data-level="21.5.2" data-path="Chapter_21.html"><a href="Chapter_21.html#mann-whitney-u-test"><i class="fa fa-check"></i><b>21.5.2</b> Mann-Whitney U test</a></li>
</ul></li>
<li class="chapter" data-level="21.6" data-path="Chapter_21.html"><a href="Chapter_21.html#summary-3"><i class="fa fa-check"></i><b>21.6</b> Summary</a></li>
</ul></li>
<li class="chapter" data-level="22" data-path="Chapter_22.html"><a href="Chapter_22.html"><i class="fa fa-check"></i><b>22</b> <em>Practical</em>. Hypothesis testing and t-tests</a>
<ul>
<li class="chapter" data-level="22.1" data-path="Chapter_22.html"><a href="Chapter_22.html#exercise-on-a-simple-one-sample-t-test"><i class="fa fa-check"></i><b>22.1</b> Exercise on a simple one sample t-test</a></li>
<li class="chapter" data-level="22.2" data-path="Chapter_22.html"><a href="Chapter_22.html#exercise-on-a-paired-t-test"><i class="fa fa-check"></i><b>22.2</b> Exercise on a paired t-test</a></li>
<li class="chapter" data-level="22.3" data-path="Chapter_22.html"><a href="Chapter_22.html#wilcoxon-test-1"><i class="fa fa-check"></i><b>22.3</b> Wilcoxon test</a></li>
<li class="chapter" data-level="22.4" data-path="Chapter_22.html"><a href="Chapter_22.html#independent-samples-t-test-1"><i class="fa fa-check"></i><b>22.4</b> Independent samples t-test</a></li>
<li class="chapter" data-level="22.5" data-path="Chapter_22.html"><a href="Chapter_22.html#mann-whitney-u-test-1"><i class="fa fa-check"></i><b>22.5</b> Mann-Whitney U Test</a></li>
</ul></li>
<li class="part"><span><b>VII Review of parts I-V</b></span></li>
<li class="chapter" data-level="" data-path="Week7.html"><a href="Week7.html"><i class="fa fa-check"></i>Week 7 Overview (Reading week)</a></li>
<li class="part"><span><b>VIII Analysis of Variance (ANOVA)</b></span></li>
<li class="chapter" data-level="" data-path="Week8.html"><a href="Week8.html"><i class="fa fa-check"></i>Week 8 Overview</a></li>
<li class="chapter" data-level="23" data-path="Chapter_23.html"><a href="Chapter_23.html"><i class="fa fa-check"></i><b>23</b> Analysis of variance</a>
<ul>
<li class="chapter" data-level="23.1" data-path="Chapter_23.html"><a href="Chapter_23.html#the-f-distribution"><i class="fa fa-check"></i><b>23.1</b> The F-distribution</a></li>
<li class="chapter" data-level="23.2" data-path="Chapter_23.html"><a href="Chapter_23.html#one-way-anova"><i class="fa fa-check"></i><b>23.2</b> One-way ANOVA</a>
<ul>
<li class="chapter" data-level="23.2.1" data-path="Chapter_23.html"><a href="Chapter_23.html#anova-mean-variance-among-groups"><i class="fa fa-check"></i><b>23.2.1</b> ANOVA mean variance among groups</a></li>
<li class="chapter" data-level="23.2.2" data-path="Chapter_23.html"><a href="Chapter_23.html#anova-mean-variance-within-groups"><i class="fa fa-check"></i><b>23.2.2</b> ANOVA mean variance within groups</a></li>
<li class="chapter" data-level="23.2.3" data-path="Chapter_23.html"><a href="Chapter_23.html#anova-f-statistic-calculation"><i class="fa fa-check"></i><b>23.2.3</b> ANOVA F statistic calculation</a></li>
</ul></li>
<li class="chapter" data-level="23.3" data-path="Chapter_23.html"><a href="Chapter_23.html#assumptions-of-anova"><i class="fa fa-check"></i><b>23.3</b> Assumptions of ANOVA</a></li>
</ul></li>
<li class="chapter" data-level="24" data-path="Chapter_24.html"><a href="Chapter_24.html"><i class="fa fa-check"></i><b>24</b> Multiple comparisons</a></li>
<li class="chapter" data-level="25" data-path="Chapter_25.html"><a href="Chapter_25.html"><i class="fa fa-check"></i><b>25</b> Kruskall-Wallis H test</a></li>
<li class="chapter" data-level="26" data-path="Chapter_26.html"><a href="Chapter_26.html"><i class="fa fa-check"></i><b>26</b> Two-way ANOVA</a></li>
<li class="chapter" data-level="27" data-path="Chapter_27.html"><a href="Chapter_27.html"><i class="fa fa-check"></i><b>27</b> <em>Practical</em>. ANOVA and associated tests</a>
<ul>
<li class="chapter" data-level="27.1" data-path="Chapter_27.html"><a href="Chapter_27.html#one-way-anova-site"><i class="fa fa-check"></i><b>27.1</b> One-way ANOVA (site)</a></li>
<li class="chapter" data-level="27.2" data-path="Chapter_27.html"><a href="Chapter_27.html#one-way-anova-profile"><i class="fa fa-check"></i><b>27.2</b> One-way ANOVA (profile)</a></li>
<li class="chapter" data-level="27.3" data-path="Chapter_27.html"><a href="Chapter_27.html#multiple-comparisons"><i class="fa fa-check"></i><b>27.3</b> Multiple comparisons</a></li>
<li class="chapter" data-level="27.4" data-path="Chapter_27.html"><a href="Chapter_27.html#kruskall-wallis-h-test"><i class="fa fa-check"></i><b>27.4</b> Kruskall-Wallis H test</a></li>
<li class="chapter" data-level="27.5" data-path="Chapter_27.html"><a href="Chapter_27.html#two-way-anova"><i class="fa fa-check"></i><b>27.5</b> Two-way ANOVA</a></li>
</ul></li>
<li class="part"><span><b>IX Counts and Correlation</b></span></li>
<li class="chapter" data-level="" data-path="Week9.html"><a href="Week9.html"><i class="fa fa-check"></i>Week 9 Overview</a></li>
<li class="chapter" data-level="28" data-path="frequency-and-count-data.html"><a href="frequency-and-count-data.html"><i class="fa fa-check"></i><b>28</b> Frequency and count data</a>
<ul>
<li class="chapter" data-level="28.1" data-path="frequency-and-count-data.html"><a href="frequency-and-count-data.html#the-chi-square-distribution"><i class="fa fa-check"></i><b>28.1</b> The Chi-square distribution</a></li>
<li class="chapter" data-level="28.2" data-path="frequency-and-count-data.html"><a href="frequency-and-count-data.html#chi-squared-goodness-of-fit"><i class="fa fa-check"></i><b>28.2</b> Chi-squared goodness of fit</a></li>
<li class="chapter" data-level="28.3" data-path="frequency-and-count-data.html"><a href="frequency-and-count-data.html#chi-squared-test-of-association"><i class="fa fa-check"></i><b>28.3</b> Chi-squared test of association</a></li>
</ul></li>
<li class="chapter" data-level="29" data-path="correlation.html"><a href="correlation.html"><i class="fa fa-check"></i><b>29</b> Correlation</a>
<ul>
<li class="chapter" data-level="29.1" data-path="correlation.html"><a href="correlation.html#key-concepts-of-correlation"><i class="fa fa-check"></i><b>29.1</b> Key concepts of correlation</a></li>
<li class="chapter" data-level="29.2" data-path="correlation.html"><a href="correlation.html#mathematics-of-correlation"><i class="fa fa-check"></i><b>29.2</b> Mathematics of correlation</a></li>
<li class="chapter" data-level="29.3" data-path="correlation.html"><a href="correlation.html#correlation-hypothesis-testing"><i class="fa fa-check"></i><b>29.3</b> Correlation hypothesis testing</a>
<ul>
<li class="chapter" data-level="29.3.1" data-path="correlation.html"><a href="correlation.html#pearson-product-moment-correlation-coefficient"><i class="fa fa-check"></i><b>29.3.1</b> Pearson product moment correlation coefficient</a></li>
<li class="chapter" data-level="29.3.2" data-path="correlation.html"><a href="correlation.html#spearman-rank-correlation-coefficient"><i class="fa fa-check"></i><b>29.3.2</b> Spearman rank correlation coefficient</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="30" data-path="practical.-analysis-of-count-data-correlation-and-regression.html"><a href="practical.-analysis-of-count-data-correlation-and-regression.html"><i class="fa fa-check"></i><b>30</b> <em>Practical</em>. Analysis of count data, correlation, and regression</a>
<ul>
<li class="chapter" data-level="30.1" data-path="practical.-analysis-of-count-data-correlation-and-regression.html"><a href="practical.-analysis-of-count-data-correlation-and-regression.html#chi-square-exercise-1"><i class="fa fa-check"></i><b>30.1</b> Chi-Square Exercise 1</a></li>
<li class="chapter" data-level="30.2" data-path="practical.-analysis-of-count-data-correlation-and-regression.html"><a href="practical.-analysis-of-count-data-correlation-and-regression.html#chi-square-association-exercise-2"><i class="fa fa-check"></i><b>30.2</b> Chi-Square association Exercise 2</a></li>
<li class="chapter" data-level="30.3" data-path="practical.-analysis-of-count-data-correlation-and-regression.html"><a href="practical.-analysis-of-count-data-correlation-and-regression.html#correlation-exercise-3"><i class="fa fa-check"></i><b>30.3</b> Correlation Exercise 3</a></li>
<li class="chapter" data-level="30.4" data-path="practical.-analysis-of-count-data-correlation-and-regression.html"><a href="practical.-analysis-of-count-data-correlation-and-regression.html#correlation-exercise-4"><i class="fa fa-check"></i><b>30.4</b> Correlation Exercise 4</a></li>
</ul></li>
<li class="part"><span><b>X Linear Regression</b></span></li>
<li class="chapter" data-level="" data-path="Week10.html"><a href="Week10.html"><i class="fa fa-check"></i>Week 10 Overview</a></li>
<li class="chapter" data-level="31" data-path="regression-key-concepts.html"><a href="regression-key-concepts.html"><i class="fa fa-check"></i><b>31</b> Regression key concepts</a></li>
<li class="chapter" data-level="32" data-path="regression-validity.html"><a href="regression-validity.html"><i class="fa fa-check"></i><b>32</b> Regression validity</a></li>
<li class="chapter" data-level="33" data-path="introduction-to-multiple-regression.html"><a href="introduction-to-multiple-regression.html"><i class="fa fa-check"></i><b>33</b> Introduction to multiple regression</a></li>
<li class="chapter" data-level="34" data-path="model-selection-maybe-remove-this.html"><a href="model-selection-maybe-remove-this.html"><i class="fa fa-check"></i><b>34</b> Model selection (maybe remove this?)</a></li>
<li class="chapter" data-level="35" data-path="practical.-using-regression.html"><a href="practical.-using-regression.html"><i class="fa fa-check"></i><b>35</b> <em>Practical</em>. Using regression</a>
<ul>
<li class="chapter" data-level="35.1" data-path="practical.-using-regression.html"><a href="practical.-using-regression.html#regression-exercise-1"><i class="fa fa-check"></i><b>35.1</b> Regression Exercise 1</a></li>
<li class="chapter" data-level="35.2" data-path="practical.-using-regression.html"><a href="practical.-using-regression.html#regression-exercise-2"><i class="fa fa-check"></i><b>35.2</b> Regression Exercise 2</a></li>
<li class="chapter" data-level="35.3" data-path="practical.-using-regression.html"><a href="practical.-using-regression.html#regression-exercise-3"><i class="fa fa-check"></i><b>35.3</b> Regression Exercise 3</a></li>
<li class="chapter" data-level="35.4" data-path="practical.-using-regression.html"><a href="practical.-using-regression.html#regression-exercise-4"><i class="fa fa-check"></i><b>35.4</b> Regression Exercise 4</a></li>
</ul></li>
<li class="part"><span><b>XI Randomisation approaches</b></span></li>
<li class="chapter" data-level="" data-path="Week11.html"><a href="Week11.html"><i class="fa fa-check"></i>Week 11 Overview</a></li>
<li class="chapter" data-level="36" data-path="introduction-to-randomisation.html"><a href="introduction-to-randomisation.html"><i class="fa fa-check"></i><b>36</b> Introduction to randomisation</a></li>
<li class="chapter" data-level="37" data-path="assumptions-of-randomisation.html"><a href="assumptions-of-randomisation.html"><i class="fa fa-check"></i><b>37</b> Assumptions of randomisation</a></li>
<li class="chapter" data-level="38" data-path="bootstrapping.html"><a href="bootstrapping.html"><i class="fa fa-check"></i><b>38</b> Bootstrapping</a></li>
<li class="chapter" data-level="39" data-path="monte-carlo.html"><a href="monte-carlo.html"><i class="fa fa-check"></i><b>39</b> Monte Carlo</a></li>
<li class="chapter" data-level="40" data-path="practical.-using-r.html"><a href="practical.-using-r.html"><i class="fa fa-check"></i><b>40</b> <em>Practical</em>. Using R</a>
<ul>
<li class="chapter" data-level="40.1" data-path="practical.-using-r.html"><a href="practical.-using-r.html#r-exercise-1"><i class="fa fa-check"></i><b>40.1</b> R Exercise 1</a></li>
<li class="chapter" data-level="40.2" data-path="practical.-using-r.html"><a href="practical.-using-r.html#r-exercise-2"><i class="fa fa-check"></i><b>40.2</b> R Exercise 2</a></li>
<li class="chapter" data-level="40.3" data-path="practical.-using-r.html"><a href="practical.-using-r.html#r-exercise-3"><i class="fa fa-check"></i><b>40.3</b> R Exercise 3</a></li>
</ul></li>
<li class="part"><span><b>XII Statistical Reporting</b></span></li>
<li class="chapter" data-level="" data-path="Week12.html"><a href="Week12.html"><i class="fa fa-check"></i>Week 12 Overview</a></li>
<li class="chapter" data-level="41" data-path="reporting-statistics.html"><a href="reporting-statistics.html"><i class="fa fa-check"></i><b>41</b> Reporting statistics</a></li>
<li class="chapter" data-level="42" data-path="more-introduction-to-r.html"><a href="more-introduction-to-r.html"><i class="fa fa-check"></i><b>42</b> More introduction to R</a></li>
<li class="chapter" data-level="43" data-path="more-getting-started-with-r.html"><a href="more-getting-started-with-r.html"><i class="fa fa-check"></i><b>43</b> More getting started with R</a></li>
<li class="chapter" data-level="44" data-path="practical.-using-r-1.html"><a href="practical.-using-r-1.html"><i class="fa fa-check"></i><b>44</b> <em>Practical</em>. Using R</a>
<ul>
<li class="chapter" data-level="44.1" data-path="practical.-using-r-1.html"><a href="practical.-using-r-1.html#r-exercise-1-1"><i class="fa fa-check"></i><b>44.1</b> R Exercise 1</a></li>
<li class="chapter" data-level="44.2" data-path="practical.-using-r-1.html"><a href="practical.-using-r-1.html#r-exercise-2-1"><i class="fa fa-check"></i><b>44.2</b> R Exercise 2</a></li>
<li class="chapter" data-level="44.3" data-path="practical.-using-r-1.html"><a href="practical.-using-r-1.html#r-exercise-3-1"><i class="fa fa-check"></i><b>44.3</b> R Exercise 3</a></li>
</ul></li>
<li class="part"><span><b>XIII Review of parts (VII-XII)</b></span></li>
<li class="chapter" data-level="" data-path="Week13.html"><a href="Week13.html"><i class="fa fa-check"></i>Module summary</a></li>
<li class="appendix"><span><b>Appendix</b></span></li>
<li class="chapter" data-level="A" data-path="appendexA_CMS.html"><a href="appendexA_CMS.html"><i class="fa fa-check"></i><b>A</b> Common Marking Scheme</a></li>
<li class="chapter" data-level="B" data-path="uncertainty_derivation.html"><a href="uncertainty_derivation.html"><i class="fa fa-check"></i><b>B</b> Uncertainty derivation</a></li>
<li class="chapter" data-level="C" data-path="appendixC_tables.html"><a href="appendixC_tables.html"><i class="fa fa-check"></i><b>C</b> Statistical tables</a>
<ul>
<li class="chapter" data-level="C.1" data-path="appendixC_tables.html"><a href="appendixC_tables.html#wilcoxon-signed-rank-critical-values"><i class="fa fa-check"></i><b>C.1</b> Wilcoxon signed rank critical values</a></li>
<li class="chapter" data-level="C.2" data-path="appendixC_tables.html"><a href="appendixC_tables.html#mann-whitney-u-critical-values"><i class="fa fa-check"></i><b>C.2</b> Mann-Whitney U critical values</a></li>
</ul></li>
<li class="chapter" data-level="" data-path="references.html"><a href="references.html"><i class="fa fa-check"></i>References</a></li>
<li class="divider"></li>
<li><a href="https://github.com/rstudio/bookdown" target="blank">Published with bookdown</a></li>

</ul>

      </nav>
    </div>

    <div class="book-body">
      <div class="body-inner">
        <div class="book-header" role="navigation">
          <h1>
            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">Statistical Techniques for Biological and Environmental Sciences</a>
          </h1>
        </div>

        <div class="page-wrapper" tabindex="-1" role="main">
          <div class="page-inner">

            <section class="normal" id="section-">
<div id="frequency-and-count-data" class="section level1 hasAnchor" number="28">
<h1><span class="header-section-number">Chapter 28</span> Frequency and count data<a href="frequency-and-count-data.html#frequency-and-count-data" class="anchor-section" aria-label="Anchor link to header"></a></h1>
<p>In this book, we have introduced hypothesis testing as a tool to determine if variables were sampled from a population with a specific mean (one sample t-test in <a href="Chapter_21.html#one-sample-t-test">Chapter 21.1</a>), or if different groups of variables were sampled from a population with the same mean (the independent samples t-test in <a href="Chapter_21.html#independent-samples-t-test">Chapter 21.2</a> and ANOVA in <a href="Chapter_23.html#Chapter_23">Chapter 23</a>).
In these tests, the variables for which we calculated the means were always continuous (e.g., fig wasp wing lengths, nitrogen concentration in parts per million).
That is, the variables of the t-test and ANOVA could always, at least in theory, take any real value (i.e., any decimal).
And the comparison was always between the means of categorical groups (e.g., fig wasp species or study sites).
But not every variable that we measure will be continuous.
For example, in <a href="Chapter_5.html#Chapter_5">Chapter 5</a>, we also introduced discrete variables, which can only take discrete counts (1, 2, 3, 4, and so forth).
Examples of such <strong>count data</strong> might include the number of species of birds in a forest or the number of days in the year for which an extreme temperature is recorded.
<a href="Chapter_14.html#Chapter_14">Chapter 14</a> included some examples of count data when introducing probability distributions (e.g., counts of heads or tails in coin flips, or the number of people testing positive for Covid-19).
Count data are discrete because they can only take integer values.
For example, there cannot be 14.24 bird species in a forest; it needs to be a whole number.</p>
<p>In the biological and environmental sciences, we often want to test whether or not observed counts are significantly different from some expectation.
For example, we might hypothesise that the probability of flowers being red versus blue in a particular field is the same.
In other words, <span class="math inline">\(Pr(flower = red) = 0.5\)</span> and <span class="math inline">\(Pr(flower = Blue) = 0.5\)</span>.
By this logic, if we were to collect 100 flowers at random from the field, we would expect 50 to be red and 50 to be blue.
If we actually went out and collected 100 flowers at random, but found 46 to be red and 54 to be blue, would this be sufficiently different from our expectation to reject the null hypothesis that the probability of sampling a red versus blue flower is the same?
We could test this null hypothesis using a Chi-square goodness of fit test (<a href="frequency-and-count-data.html#the-chi-square-distribution">Chapter 28.1</a>).
Similarly, we might want to test if 2 different count variables (e.g., flower colour and flower species) are associated with one another (e.g., if blue flowers are more common in one species than another species).
We could test this kind of hypothesis using a Chi-squared test of association (<a href="">Chapter 30</a>).</p>
<p>Before introducing the Chi-square goodness of fit test or the Chi-square test of association, it makes sense to first introduce the Chi-square (<span class="math inline">\(\chi^{2}\)</span>) distribution.
The general motivation for introducing the Chi-square distribution is the same as it was for the t-distribution (<a href="Chapter_18.html#Chapter_18">Chapter 18</a>) or F-distribution (<a href="Chapter_23.html#the-f-distribution">Chapter 23.1</a>).
We need some probability density distribution that is our null distribution, which is what we predict if our null hypothesis is true.
We then compare this this null distribution our test statistic to find the probability of sampling a test statistic as or more extreme if the null hypothesis is really true (i.e., a p-value).</p>
<div id="the-chi-square-distribution" class="section level2 hasAnchor" number="28.1">
<h2><span class="header-section-number">28.1</span> The Chi-square distribution<a href="frequency-and-count-data.html#the-chi-square-distribution" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>The Chi-square (<span class="math inline">\(\chi^{2}\)</span>) distribution is a continuous distribution in which values of <span class="math inline">\(\chi^{2}\)</span> can be any real number greater than or equal to 0. We can generate a <span class="math inline">\(\chi^{2}\)</span> distribution by adding up squared values that are sampled from a standard normal distribution <span class="citation">(<a href="#ref-Sokal1995" role="doc-biblioref">Sokal and Rohlf 1995</a>)</span>, hence the ‘square’ in ‘Chi-square’.
There is a lot to unpack in the previous sentence, so we can go through it step by step.
First, we can take another look at the standard normal distribution from <a href="Chapter_14.html#normal-distribution">Chapter 14.4.4</a> (Figure 28.1).</p>
<div class="figure"><span style="display:block;" id="fig:unnamed-chunk-134"></span>
<img src="bookdown-demo_files/figure-html/unnamed-chunk-134-1.png" alt="A plot of a bell curve, shaded in grey and centered at the x-axis on a value of zero is shown. The x axis is labelled 'x'." width="672" />
<p class="caption">
Figure 28.1: Recreation of Figure 14.9, a standard normal probability distribution
</p>
</div>
<p>Suppose that we randomly sampled 4 values from the standard normal distribution shown in Figure 28.1.</p>
<ul>
<li><span class="math inline">\(x_{1} = -1.244\)</span></li>
<li><span class="math inline">\(x_{2} = 0.162\)</span></li>
<li><span class="math inline">\(x_{3} = -2.214\)</span></li>
<li><span class="math inline">\(x_{4} = 2.071\)</span></li>
</ul>
<p>We can square all of these values, then add up the squares,</p>
<p><span class="math display">\[\chi^{2} = (-1.244)^{2} + (0.162)^{2} + (-2.214)^{2} + (2.071)^{2}.\]</span></p>
<p>Note that <span class="math inline">\(\chi^{2}\)</span> cannot be negative because when we square a number that is either positive or negative, we always end up with a positive value (e.g., <span class="math inline">\(-2^{2} = 4\)</span>, see <a href="Chapter_1.html#numbers-and-operations">Chapter 1.1</a>).
The final value is <span class="math inline">\(\chi^{2} = 10.76462\)</span>.
Of course, this <span class="math inline">\(\chi^{2}\)</span> value would have been different if our <span class="math inline">\(x_{i}\)</span> values (<span class="math inline">\(x_{1}\)</span>, <span class="math inline">\(x_{2}\)</span>, <span class="math inline">\(x_{3}\)</span>, and <span class="math inline">\(x_{4}\)</span>) had been different.
And if we are sampling randomly from the normal distribution, we should not expect to get the same <span class="math inline">\(\chi^{2}\)</span> value from 4 random standard normal deviates.
We can therefore ask, if we were to keep sampling 4 standard normal deviates and calculating new <span class="math inline">\(\chi^{2}\)</span> values, what would be the distribution of these <span class="math inline">\(\chi^{2}\)</span> values?
The answer is shown in Figure 28.1.</p>
<div class="figure"><span style="display:block;" id="fig:unnamed-chunk-135"></span>
<img src="bookdown-demo_files/figure-html/unnamed-chunk-135-1.png" alt="Plot of a curved distribution that rapidly increases to a maximum at around 2 then slowly decreases." width="672" />
<p class="caption">
Figure 28.2: A Chi-square distribution, which is the expected sum of 4 squared standard normal deviates, i.e., the sum of 4 values sampled from a standard normal distribution and squared.
</p>
</div>
<p>Looking at the shape of Figure 28.1, we can see that most of the time, the sum of deviations from the mean of <span class="math inline">\(\mu = 0\)</span> will be about 2.
But sometimes we will get a much lower or higher value of <span class="math inline">\(\chi^{2}\)</span> by chance, if we sample particularly low or high values of <span class="math inline">\(x_{i}\)</span>.</p>
<p>If we summed a different number of squared <span class="math inline">\(x_{i}\)</span> values, then we would expect the distribution of <span class="math inline">\(\chi^{2}\)</span> to change.
Had we sampled fewer than 4 <span class="math inline">\(x_{i}\)</span> values, the expected <span class="math inline">\(\chi^{2}\)</span> would be lower just because we are adding up fewer numbers.
Similarly, had we sampled more than 4 <span class="math inline">\(x_{i}\)</span> values, the expected <span class="math inline">\(\chi^{2}\)</span> would be higher just because we are adding up more numbers.
The shape of the <span class="math inline">\(\chi^{2}\)</span> distribution<a href="#fn49" class="footnote-ref" id="fnref49"><sup>49</sup></a> is therefore determined by the number of values sampled (<span class="math inline">\(N\)</span>), or more specifically the degrees of freedom (df, or sometimes <span class="math inline">\(v\)</span>), which in a sample is <span class="math inline">\(df = N - 1\)</span>.
This is the same idea as the t-distribution from <a href="Chapter_18.html#Chapter_18">Chapter 18</a>, which also changed shape depending on the degrees of freedom.
Figure 28.3 shows the different <span class="math inline">\(\chi^{2}\)</span> probability density distributions for different degrees of freedom.</p>
<div class="figure"><span style="display:block;" id="fig:unnamed-chunk-136"></span>
<img src="bookdown-demo_files/figure-html/unnamed-chunk-136-1.png" alt="A plot is shown with 3 different curve lines, which show 3 different Chi-square distributions with different degrees of freedom." width="672" />
<p class="caption">
Figure 28.3: Probability density functions for 3 different Chi-square distributions, each of which have different degrees of freedom (df).
</p>
</div>
<p>As with the F distribution from <a href="Chapter_23.html#the-f-distribution">Chapter 23.1</a>, visualising the <span class="math inline">\(\chi^{2}\)</span> distribution is much, much easier using an <a href="https://bradduthie.shinyapps.io/chi-square/">interactive application</a>.</p>
<blockquote>
<p><a href="https://bradduthie.shinyapps.io/chi-square/">Click here</a> for an interactive application demonstrating how the Chi-square distribution changes with different degrees of freedom.</p>
</blockquote>
<p>And as with the F distribution, it is not necessary to memorise how the <span class="math inline">\(\chi^{2}\)</span> distribution changes with different degrees of freedom.
The important point is that the distribution changes with different degrees of freedom, and we can map probabilities to the <span class="math inline">\(\chi^{2}\)</span> value on the x-axis in the same way as any other distribution.</p>
<p>What does any of this have to do with count data? It actually is a bit messy.
The <span class="math inline">\(\chi^{2}\)</span> distribution is not a perfect tool for comparing observed and expected counts <span class="citation">(<a href="#ref-Sokal1995" role="doc-biblioref">Sokal and Rohlf 1995</a>)</span>.
After all, counts are integer values, and the <span class="math inline">\(\chi^{2}\)</span> distribution is clearly continuous (unlike, e.g., the binomial or Poisson distributions from <a href="##probability-distributions">Chapter 14.4</a>.
The <span class="math inline">\(\chi^{2}\)</span> distribution is in fact a useful approximation for testing counts, and one that becomes less accurate when sample size <span class="citation">(<a href="#ref-Slakter1968" role="doc-biblioref">Slakter 1968</a>)</span> or expected count size <span class="citation">(<a href="#ref-Tate1973" role="doc-biblioref">Tate and Hyer 1973</a>)</span> is small.
Nevertheless, we can use the <span class="math inline">\(\chi^{2}\)</span> distribution as a tool for testing whether observed counts are significantly different from expected counts.
The first test that we will look at is the goodness of fit test.</p>
</div>
<div id="chi-squared-goodness-of-fit" class="section level2 hasAnchor" number="28.2">
<h2><span class="header-section-number">28.2</span> Chi-squared goodness of fit<a href="frequency-and-count-data.html#chi-squared-goodness-of-fit" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>The first kind of test that we will consider for count data is the goodness of fit test.
In this test, we have some number of counts that we expect to observe (e.g., expected counts of red versus blue flowers), then compare this expectation to the counts that we actually observe.
If the expected and observed counts differ by a lot, then we will get a large test statistic and reject the null hypothesis.
A simple concrete example will make this a bit more clear.</p>
<p>Recall the practical in <a href="Chapter_16.html#Chapter_16">Chapter 16</a>, in which players of the mobile app game <a href="https://play.google.com/store/apps/details?id=com.hyperluminal.stirlinguniversity.sustainabledevelopmentgame">Power Up!</a> chose a small, medium, or large dam at the start of the game.
Suppose that we are interested in the size of dam that policy-makers choose to build when playing the game, so we find 60 such people in Scotland and ask them to play the game.
Perhaps we do not think that the policy-makers will have any preference for a particular dam size (and therefore just pick 1 of the 3 dam sizes at random).
We would therefore expect an equal number of small, medium, and large dams to be selected among the 60 players.
That is, for our expected counts of each dam size (<span class="math inline">\(E_{size}\)</span>), we expect 20 small (<span class="math inline">\(E_{small} = 20\)</span>), 20 medium (<span class="math inline">\(E_{medium} = 20\)</span>), and 20 large (<span class="math inline">\(E_{large} = 20\)</span>) dams in total (because <span class="math inline">\(60/3 = 20\)</span>).</p>
<p>Of course, even if our players have no preference for a particular dam size, the number of small, medium, and large dams will not always be <em>exactly</em> the same.
The expected counts might still be a bit different from the observed counts of each dam size (<span class="math inline">\(O_{size}\)</span>).
Suppose, for example, we find that out of our total 60 policy-makers, we observe 18 small (<span class="math inline">\(O_{small} = 18\)</span>), 24 medium (<span class="math inline">\(O_{medium} = 24\)</span>), and 18 large (<span class="math inline">\(O_{large} = 18\)</span>), dams were actually chosen by game players.
What we want to test is the null hypothesis that there is no significant difference between expected and observed counts.</p>
<ul>
<li><span class="math inline">\(H_{0}\)</span>: There is no significant difference between expected and observed counts.</li>
<li><span class="math inline">\(H_{A}\)</span>: There is a significant difference between expected and observed counts.</li>
</ul>
<p>To get our test statistic<a href="#fn50" class="footnote-ref" id="fnref50"><sup>50</sup></a>, we now just need to take each observed count, subtract the expected count, square this difference, divide by the expected count, then add everything up,</p>
<p><span class="math display">\[\chi^{2} = \frac{(18 - 20)^{2}}{20} + \frac{(24 - 20)^{2}}{20} + \frac{(18 - 20)^{2}}{20}.\]</span></p>
<p>We can calculate the values in the numerator.
Note that all of these numbers must be positive (e.g., <span class="math inline">\(18 - 20 = -2\)</span>, but <span class="math inline">\(-2^{2} = 4\)</span>),</p>
<p><span class="math display">\[\chi^{2} = \frac{4}{20} + \frac{16}{20} + \frac{4}{20}.\]</span></p>
<p>When we sum the 3 terms, we get a value of <span class="math inline">\(\chi^{2} = 1.2\)</span>.
Note that if all of our observed values had been the same as the expected values (i.e., 20 small, medium, and large dams actually chosen), then we would get a <span class="math inline">\(\chi^{2}\)</span> value of 0.
The more the observed values differ from the expectation of 20, the higher the <span class="math inline">\(\chi^{2}\)</span> will be.
We can now check to see if the test statistic <span class="math inline">\(\chi^{2} = 1.2\)</span> is sufficiently large to reject the null hypothesis that our policy-makers have no preference for small, medium, or large dams.
There are <span class="math inline">\(N = 3\)</span> categories of counts (small, medium, and large), meaning that there are <span class="math inline">\(df = 3 - 1 = 2\)</span> degrees of freedom.
The <a href="https://bradduthie.shinyapps.io/chi-square/">interactive application</a>, can be used to compare our test statistic with the null distribution by setting df = 2 and the Chi-square value to 1.2.
As it turns out, if the null hypothesis is true, then the probability of observing a value of <span class="math inline">\(\chi^{2} = 1.2\)</span> or higher (i.e., the p-value) is <span class="math inline">\(P = 0.5488\)</span>.
Figure 28.3 shows the appropriate <span class="math inline">\(\chi^{2}\)</span> distribution plotted, with the area above the test statistic <span class="math inline">\(\chi^{2} = 1.2\)</span> shaded in grey.</p>
<div class="figure"><span style="display:block;" id="fig:unnamed-chunk-137"></span>
<img src="bookdown-demo_files/figure-html/unnamed-chunk-137-1.png" alt="Plot of curved distribution that starts around 0.5 and decreases, asymptoting toward 0." width="672" />
<p class="caption">
Figure 28.4: A Chi-square distribution, which is the expected sum of 4 squared standard normal deviates, i.e., the sum of 4 values sampled from a standard normal distribution and squared.
</p>
</div>
<p>Because <span class="math inline">\(P &gt; 0.05\)</span>, we do not reject the null hypothesis that there is no significant difference between expected and observed counts of chosen dam sizes.</p>
<p>Note that this was a simple example.
For a goodness of fit test, we can have any number of different count categories (at least, any number greater than 2).
The expectations also do not need to be integers.
For example, if we only managed to find 59 policy makers instead of 60, then our expected counts would have been <span class="math inline">\(59/3 = 19.33\)</span> instead of <span class="math inline">\(60/3 = 20\)</span>.
The expectations also do not <em>need</em> to be the same.
For example, we could have tested the null hypothesis that twice as many policy-makers would choose large dams (i.e., <span class="math inline">\(E_{large} = 40\)</span>, <span class="math inline">\(E_{medium} = 10\)</span>, and <span class="math inline">\(E_{small} = 10\)</span>).
For <span class="math inline">\(n\)</span> categories, the more general equation for the <span class="math inline">\(\chi^{2}\)</span> statistic is,</p>
<p><span class="math display">\[\chi^{2} = \sum_{i = 1}^{n} \frac{\left(O_{i} - E_{i}\right)^{2}}{E_{i}}.\]</span></p>
<p>We can therefore use this general equation to calculate a <span class="math inline">\(\chi^{2}\)</span> for any number of categories (<span class="math inline">\(n\)</span>).
Next, we will look at testing associations between counts in different types of categories.</p>
</div>
<div id="chi-squared-test-of-association" class="section level2 hasAnchor" number="28.3">
<h2><span class="header-section-number">28.3</span> Chi-squared test of association<a href="frequency-and-count-data.html#chi-squared-test-of-association" class="anchor-section" aria-label="Anchor link to header"></a></h2>
<p>The second kind of test that we will consider for count data is the Chi-square test of association.
While the goodness of fit test focused on a single categorical variable (dam sizes in the example above), the Chi-square test of association focuses on 2 different categorical variables.
What we want to know is whether or not the 2 categorical variables are independent of one another <span class="citation">(<a href="#ref-Box1978" role="doc-biblioref">Box, Hunter, and S 1978</a>)</span>.
In other words, does knowing something about one variable tell us anything about the other variable?
A concrete example will make it easier to explain.
We can again make use of the <a href="Chapter_16.html#Chapter_16">Chapter 16</a> game <a href="https://play.google.com/store/apps/details?id=com.hyperluminal.stirlinguniversity.sustainabledevelopmentgame">Power Up!</a>.
As mentioned in the <a href="frequency-and-count-data.html#chi-squared-goodness-of-fit">previous section</a>, game players choose a small, medium, or large game at the start of the game.
Players can play the game on either an Android or MacOS mobile device.
We therefore have 2 categorical variables, dam size and OS type.
We might want to know, do Android users choose the same dam sizes as MacOS users?
In other words, are dam size and OS type associated?
We can state this as a null and alternative hypthesis.</p>
<ul>
<li><span class="math inline">\(H_{0}\)</span>: There is no association between OS and dam size choice.</li>
<li><span class="math inline">\(H_{A}\)</span>: There is an association between OS and dam size choice.</li>
</ul>
<p>Consider the data in Table 28.1, which show counts of Android versus MacOS users and their dam choices.</p>
<table>
<caption><span id="tab:unnamed-chunk-138">Table 28.1: </span>Counts (N = 60) from a mobile game called ‘Power Up!’, in which players are confronted with trade-offs between energy output, energy justice, and biodiversity. Players can use 1 of 2 types of Operating System (Android or MacOS) and build one of 3 types of dam in the game (Small, Medium, or Large).</caption>
<thead>
<tr class="header">
<th></th>
<th align="right">Small</th>
<th align="right">Medium</th>
<th align="right">Large</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>Android</td>
<td align="right">8</td>
<td align="right">16</td>
<td align="right">6</td>
</tr>
<tr class="even">
<td>MacOS</td>
<td align="right">10</td>
<td align="right">8</td>
<td align="right">12</td>
</tr>
</tbody>
</table>
<p>Just looking at the counts in Table 28.1, it appears that there might be an association between the 2 variables.
For example, Android users appear to be more likely to choose a medium dam than MacOS users.
Medium dams are the most popular choice for Android users, but they are the least popular choice for MacOS users.
Nevertheless, could this just be due to chance?
If it were due to chance, then how unlikely are the counts in Table 28.1?
In other words, if Android and Mac users in the whole population really do choose dam sizes at the same frequencies, then what is the probability of getting a sample of 60 players in which the choices are as or more unbalanced as this?
This is what we want to answer with our Chi-squared test of association.</p>
<p>The general idea is the same as with the Chi-squared goodness of fit test.
We have our observed values (Table 28.1).
We now need to find the expected values to calculate a <span class="math inline">\(\chi^{2}\)</span> value.
But the expected values are now a bit more complicated.
With the goodness of fit test in <a href="frequency-and-count-data.html#chi-squared-goodness-of-fit">Chapter 28.2</a>, we just assumed that all categories were equally likely (i.e., the probability of choosing each size dam was the same).
There were 60 players and 3 dam sizes, so the expected frequency of each dam choice was 60/3 = 20
Now it is different.
We are not testing if dam sizes or OS choices are the same.
We want to know of they are <em>associated</em> with one another.
That is, regardless of the popularity of Android vs MacOS, or the frequency with which small, medium and large dams are selected, do Android users choose different dam sizes than MacOS users?
If dam size is not associated with OS, then we would predict that the relative frequency of small, medium, and large dams would be the same for both Android and MacOS.</p>
<p>To find the expected counts of each variable combination (e.g., Android and Small, or MacOS and Large), we need to get the probability that each category is selected independently.
For example, what is the probability of a player selecting a large dam, regardless of the OS that they are using?
Table 28.1 below shows these probabilities as additional rows and columns added onto Table 28.1</p>
<table>
<caption><span id="tab:unnamed-chunk-139">Table 28.2: </span>Counts (N = 60) from a mobile game called ‘Power Up!’, in which players are confronted with trade-offs between energy output, energy justice, and biodiversity. Players can use 1 of 2 types of Operating System (Android or MacOS) and build one of 3 types of dam in the game (Small, Medium, or Large). Outer rows and columns show the probabilities of categories being selected</caption>
<thead>
<tr class="header">
<th></th>
<th align="left">Small</th>
<th align="left">Medium</th>
<th align="left">Large</th>
<th align="left"><strong>Probability</strong></th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>Android</td>
<td align="left">8</td>
<td align="left">16</td>
<td align="left">6</td>
<td align="left">0.5</td>
</tr>
<tr class="even">
<td>MacOS</td>
<td align="left">10</td>
<td align="left">8</td>
<td align="left">12</td>
<td align="left">0.5</td>
</tr>
<tr class="odd">
<td><strong>Probability</strong></td>
<td align="left">0.3</td>
<td align="left">0.4</td>
<td align="left">0.3</td>
<td align="left">–</td>
</tr>
</tbody>
</table>
<p>Since there are 30 total Android users (<span class="math inline">\(8 + 16 + 6 = 30\)</span>) and 30 total MacOS users (Table 28.2), the probability of a player having an Android OS is <span class="math inline">\(30/60 = 0.5\)</span>, and the probability of a player having a MacOS is also <span class="math inline">\(30 / 60 = 0.5\)</span>.
Similarly, there are 18 small, 24 medium, and 18 large dam choices in total.
Hence, the probability of a player choosing a small dam is <span class="math inline">\(18/60 = 0.3\)</span>, medium is <span class="math inline">\(24/60 = 0.3\)</span>, and large is <span class="math inline">\(18/60 = 0.3\)</span>.
If these probabilities combine independently<a href="#fn51" class="footnote-ref" id="fnref51"><sup>51</sup></a>, then we can multiply them to find the probability of a particular combination of categories.
For example, the probability of a player using Android is 0.5 and choosing a small dam is 0.3, so the probability of a player having both Android <strong>and</strong> a small dam is <span class="math inline">\(0.5 \times 0.3 = 0.15\)</span> (see <a href="Chapter_15.html#Chapter_15">Chapter 15</a> for an introduction to probability models).
The probability of a player using Android <strong>and</strong> choosing a medium dam is <span class="math inline">\(0.5 \times 0.4 = 0.2\)</span>.
We can fill in all of these joint probabilities in a new Table 28.3.</p>
<table>
<caption><span id="tab:unnamed-chunk-140">Table 28.3: </span>Probabilities for each combination of categorical variables from a dataset in which players on either an Android or MacOS choose a dam size in the mobile app game ‘Power Up!’, assuming that variables are independent of one another.</caption>
<thead>
<tr class="header">
<th></th>
<th align="left">Small</th>
<th align="left">Medium</th>
<th align="left">Large</th>
<th align="left"><strong>Probability</strong></th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>Android</td>
<td align="left">0.15</td>
<td align="left">0.2</td>
<td align="left">0.15</td>
<td align="left">0.5</td>
</tr>
<tr class="even">
<td>MacOS</td>
<td align="left">0.15</td>
<td align="left">0.2</td>
<td align="left">0.15</td>
<td align="left">0.5</td>
</tr>
<tr class="odd">
<td><strong>Probability</strong></td>
<td align="left">0.3</td>
<td align="left">0.4</td>
<td align="left">0.3</td>
<td align="left">–</td>
</tr>
</tbody>
</table>
<p>From Table 28.3, we now have the probability of each combination of variables.
Note that all of these probabilities sum to 1.</p>
<p><span class="math display">\[0.15 + 0.2 + 0.15 + 0.15 + 0.2 + 0.15 = 1.\]</span></p>
<p>To get the expected count of each combination, we just need to multiply the probability by the sample size, i.e., the total number of players (N = 60).
For example, the expected count of players who use Android and choose a small dam will be <span class="math inline">\(0.15 \times 60 = 9\)</span>.
Table 28.4 fills in all of the expected counts.
Note that the sum of all the counts equals our sample size of 60.</p>
<table>
<caption><span id="tab:unnamed-chunk-141">Table 28.4: </span>Expected counts for each combination of categorical variables from a dataset in which players on either an Android or MacOS choose a dam size in the mobile app game ‘Power Up!’, assuming that variables are independent of one another.</caption>
<thead>
<tr class="header">
<th></th>
<th align="left">Small</th>
<th align="left">Medium</th>
<th align="left">Large</th>
<th align="left"><strong>Sum</strong></th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>Android</td>
<td align="left">9</td>
<td align="left">12</td>
<td align="left">9</td>
<td align="left">30</td>
</tr>
<tr class="even">
<td>MacOS</td>
<td align="left">9</td>
<td align="left">12</td>
<td align="left">9</td>
<td align="left">30</td>
</tr>
<tr class="odd">
<td><strong>Sum</strong></td>
<td align="left">18</td>
<td align="left">24</td>
<td align="left">18</td>
<td align="left">–</td>
</tr>
</tbody>
</table>
<p>We now have both the observed (Table 28.2) and expected (Table 28.4) counts (remember that the expected counts do not <em>need</em> to be integers).
To get our <span class="math inline">\(\chi^{2}\)</span> test statistic, we use the same formula as in <a href="frequency-and-count-data.html#chi-squared-goodness-of-fit">Chapter 28.2</a>,</p>
<p><span class="math display">\[\chi^{2} = \sum_{i = 1}^{n} \frac{\left(O_{i} - E_{i}\right)^{2}}{E_{i}}.\]</span></p>
<p>There are 9 total combinations of OS and dam size, so there are <span class="math inline">\(n = 9\)</span> values to sum up,</p>
<p><span class="math display">\[\chi^{2} = \frac{(8-9)^2}{9} + \frac{(16 - 12)^{2}}{12} + ... + \frac{(16 - 12)^{2}}{12} + \frac{(8-9)^2}{9}.\]</span></p>
<p>If we sum all of the 9 terms, we get a value of <span class="math inline">\(\chi^{2} = 4.889\)</span>.
We can compare this to the null <span class="math inline">\(\chi^{2}\)</span> distribution as we did in the <a href="frequency-and-count-data.html#chi-squared-goodness-of-fit">Chapter 28.2</a> goodness of fit test, but we need to know the correct degrees of freedom.
The correct degrees of freedom<a href="#fn52" class="footnote-ref" id="fnref52"><sup>52</sup></a> is the number of categories in variable 1 (<span class="math inline">\(n_{1}\)</span>) minus 1, times the number of categories in variable 2 (<span class="math inline">\(n_{2}\)</span>) minus 1,</p>
<p><span class="math display">\[df = (n_{1} - 1) \times (n_{2} - 1).\]</span></p>
<p>In the case of our example, the number of dams types minus 1 (<span class="math inline">\(n_{dam} = 3 - 1)\)</span> times the number of operating systems minus 1 (<span class="math inline">\(n_{OS} = 2 - 1\)</span>).
The correct degrees of freedom is therefore <span class="math inline">\(df = 2 \times 1 = 2\)</span>.
We now just need to find the p-value for a Chi-square distriution with 2 degrees of freedom and a test statistic of <span class="math inline">\(\chi^{2} = 4.889\)</span>.
From the <a href="https://bradduthie.shinyapps.io/chi-square/">interactive app</a> (set df to 2 and slide the Chi-square value to 4.9), we get a value of about <span class="math inline">\(P = 0.0868\)</span>.
In other words, if <span class="math inline">\(H_{0}\)</span> is true, then the probability of getting a <span class="math inline">\(\chi^{2}\)</span> of 4.889 or higher is <span class="math inline">\(P = 0.0868\)</span>.
Consequently, because <span class="math inline">\(P &gt; 0.05\)</span>, we would not reject the null hypothesis.
We should therefore conclude that there is no evidence for an association between OS and dam size choice.</p>
<p>Statistical programs such as R and Jamovi will calculate the <span class="math inline">\(\chi^{2}\)</span> value and get the p-value for the appropriate degrees of freedom <span class="citation">(<a href="#ref-Jamovi2022" role="doc-biblioref">The Jamovi Project 2022</a>; <a href="#ref-Rproject" role="doc-biblioref">R Core Team 2022</a>)</span>.
To do this in Jamovi, it is necessary to input the categorical data (e.g., Android, MacOS) in a tidy format, which will be a focus of the practical <a href="#Chapter_30">Chapter 30</a>.</p>
<p>There is one final point regarding expected and observed values of the Chi-square test of association.
There is another way of getting these expected values that is a bit faster (and more widely taught), but does not demonstrate the logic of expected counts as clearly.
If we wanted to, we could sum the rows and columns of our original observations.
Table 28.5 shows the original observations with the sum of each row and column.</p>
<table>
<caption><span id="tab:unnamed-chunk-142">Table 28.5: </span>Observed counts for each combination of categorical variables from a dataset in which players on either an Android or MacOS choose a dam size in the mobile app game ‘Power Up!’. The last row and column shows the sum of observed dam sizes and OS users, respectively.</caption>
<thead>
<tr class="header">
<th></th>
<th align="left">Small</th>
<th align="left">Medium</th>
<th align="left">Large</th>
<th align="left"><strong>Sum</strong></th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>Android</td>
<td align="left">8</td>
<td align="left">16</td>
<td align="left">6</td>
<td align="left">30</td>
</tr>
<tr class="even">
<td>MacOS</td>
<td align="left">10</td>
<td align="left">8</td>
<td align="left">12</td>
<td align="left">30</td>
</tr>
<tr class="odd">
<td><strong>Sum</strong></td>
<td align="left">18</td>
<td align="left">24</td>
<td align="left">18</td>
<td align="left">–</td>
</tr>
</tbody>
</table>
<p>We can get the expected counts from Table 28.5 if we multiply each row sum by each column sum, the divide by the total sample size (<span class="math inline">\(N = 60\)</span>).
For example, to get the expected counts of Android users who choose a small dam, we can multiply <span class="math inline">\((18 \times 30)/60 = 9\)</span>.
To get the expected counts of MacOS users who choose a medium dam, we can multiply <span class="math inline">\((30 \times 24)/60 = 12\)</span>.
This works for all of combinations of rows and columns, so we could do it to find all of the expected counts from Table 28.4.</p>
</div>
</div>
<h3>References<a href="references.html#references" class="anchor-section" aria-label="Anchor link to header"></a></h3>
<div id="refs" class="references csl-bib-body hanging-indent">
<div id="ref-Box1978" class="csl-entry">
Box, G E P, W G Hunter, and Hunter J S. 1978. <em><span class="nocase">Statistics for Experimenters: An Introduction to Design, Data Analysis, and Model Building</span></em>. New York: John Wiley &amp; Sons.
</div>
<div id="ref-Miller2004" class="csl-entry">
Miller, Irwin, and Marylees Miller. 2004. <em><span class="nocase">John E. Freund’s mathematical statistics</span></em>. 7th ed. Upper Saddle River, New Jersey: Pearson Prentice Hall.
</div>
<div id="ref-Rproject" class="csl-entry">
R Core Team. 2022. <em>R: A Language and Environment for Statistical Computing</em>. Vienna, Austria: R Foundation for Statistical Computing. <a href="https://www.R-project.org/">https://www.R-project.org/</a>.
</div>
<div id="ref-Slakter1968" class="csl-entry">
Slakter, Malcolm J. 1968. <span>“<span class="nocase">Accuracy of an Approximation to the Power of the Chi-Square Goodness of Fit Test with Small but Equal Expected Frequencies</span>.”</span> <em>Journal of the American Statistical Association</em> 63 (323): 912–18. <a href="https://doi.org/10.1080/01621459.1968.11009319">https://doi.org/10.1080/01621459.1968.11009319</a>.
</div>
<div id="ref-Sokal1995" class="csl-entry">
Sokal, Robert R, and F James Rohlf. 1995. <em><span>Biometry</span></em>. 3rd ed. New York: W. H. Freeman; Company.
</div>
<div id="ref-Tate1973" class="csl-entry">
Tate, Merle W, and Leon A Hyer. 1973. <span>“<span class="nocase">Inaccuracy of the X2 test of goodness of fit when expected frequencies are small</span>.”</span> <em>Journal of the American Statistical Association</em> 68 (344): 836–41. <a href="https://doi.org/10.1080/01621459.1973.10481433">https://doi.org/10.1080/01621459.1973.10481433</a>.
</div>
<div id="ref-Jamovi2022" class="csl-entry">
The Jamovi Project. 2022. <span>“Jamovi.”</span> Sydney, Australia. <a href="https://www.jamovi.org">https://www.jamovi.org</a>.
</div>
</div>
<div class="footnotes">
<hr />
<ol start="49">
<li id="fn49"><p>A random variable <span class="math inline">\(X\)</span> has a <span class="math inline">\(\chi^{2}\)</span> distribution if and only if its probability density function is defined by <span class="citation">(<a href="#ref-Miller2004" role="doc-biblioref">Miller and Miller 2004</a>)</span>, <span class="math display">\[f(x) = \left\{\begin{array}{ll}\frac{1}{2^{\frac{2}{v}}\Gamma\left(\frac{v}{2}\right)}x^{\frac{v-2}{2}}e^{-\frac{x}{2}} &amp; \quad for\:x &gt; 0 \\ 0 &amp; \quad elsewhere \end{array}\right.\]</span> In this equation, <span class="math inline">\(v\)</span> is the degrees of freedom of the distribution.<a href="frequency-and-count-data.html#fnref49" class="footnote-back">↩︎</a></p></li>
<li id="fn50"><p>A lot of statisticians will use <span class="math inline">\(X^{2}\)</span> to represent the test statistic here instead of <span class="math inline">\(\chi^{2}\)</span> <span class="citation">(<a href="#ref-Sokal1995" role="doc-biblioref">Sokal and Rohlf 1995</a>)</span>. The difference is the upper case ‘X’ versus the Greek letter Chi, ‘<span class="math inline">\(\chi\)</span>’. The X is used since the test statistic we calculate here is not <em>technically</em> from the <span class="math inline">\(\chi^{2}\)</span> distribution, just an approximation. We will not worry about the distinction here, and to avoid confusion, we will just go with <span class="math inline">\(\chi^{2}\)</span>.<a href="frequency-and-count-data.html#fnref50" class="footnote-back">↩︎</a></p></li>
<li id="fn51"><p>We can call these the ‘marginal probabilities’.<a href="frequency-and-count-data.html#fnref51" class="footnote-back">↩︎</a></p></li>
<li id="fn52"><p>This formula works due to a bit of a mathematical trick <span class="citation">(<a href="#ref-Sokal1995" role="doc-biblioref">Sokal and Rohlf 1995</a>)</span>. The actual logic of the degrees of freedom is a bit more involved. From our total of <span class="math inline">\(k = 6\)</span> different combinations, we actually need to subtract 1 degree of freedom for the total sample size (<span class="math inline">\(N = 60\)</span>), then a degree of freedom for each variable probability estimated (i.e., subtract <span class="math inline">\(n_{1} - 1\)</span> and <span class="math inline">\(n_{2} - 1\)</span> because we need this many degrees of freedom to get the <span class="math inline">\(n_{1}\)</span> and <span class="math inline">\(n_{2}\)</span> probabilities, respectively; if we have all but 1 probability, then we know the last probability because the probabilities must sum to 1). Since we lose <span class="math inline">\(n_{1} - 1\)</span> and <span class="math inline">\(n_{2} - 1\)</span> degrees of freedom, and 1 for the sample size, this results in <span class="math inline">\(df = k - (n_{1} - 1) - (n_{2} - 1) - 1\)</span>. In the case of the “Power Up!” example, we get <span class="math inline">\(df = 6 - (3 - 1) - (2 - 1) - 1 = 2\)</span>. The <span class="math inline">\(df = (n_{1} - 1) \times (n_{2} - 1)\)</span> formulation is possible because <span class="math inline">\(k = n_{1} \times n_{2}\)</span> <span class="citation">(<a href="#ref-Sokal1995" role="doc-biblioref">Sokal and Rohlf 1995</a>)</span>.<a href="frequency-and-count-data.html#fnref52" class="footnote-back">↩︎</a></p></li>
</ol>
</div>
            </section>

          </div>
        </div>
      </div>
<a href="Week9.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
<a href="correlation.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
    </div>
  </div>
<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
<script>
gitbook.require(["gitbook"], function(gitbook) {
gitbook.start({
"sharing": {
"github": false,
"facebook": true,
"twitter": true,
"linkedin": false,
"weibo": false,
"instapaper": false,
"vk": false,
"whatsapp": false,
"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
},
"fontsettings": {
"theme": "white",
"family": "sans",
"size": 2
},
"edit": {
"link": "https://github.com/rstudio/bookdown-demo/edit/master/09-Correlation.Rmd",
"text": "Edit"
},
"history": {
"link": null,
"text": null
},
"view": {
"link": null,
"text": null
},
"download": ["bookdown-demo.pdf", "bookdown-demo.epub"],
"search": {
"engine": "fuse",
"options": null
},
"toc": {
"collapse": "subsection"
}
});
});
</script>

<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
  (function () {
    var script = document.createElement("script");
    script.type = "text/javascript";
    var src = "true";
    if (src === "" || src === "true") src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-MML-AM_CHTML";
    if (location.protocol !== "file:")
      if (/^https?:/.test(src))
        src = src.replace(/^https?:/, '');
    script.src = src;
    document.getElementsByTagName("head")[0].appendChild(script);
  })();
</script>
</body>

</html>