forked from regan008/8500-Worksheets
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path4-DataVisualization.html
744 lines (718 loc) · 65.8 KB
/
4-DataVisualization.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.2.269">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<meta name="author" content="Candy Boatwright">
<meta name="dcterms.date" content="2024-02-28">
<title>Worksheet 4: Data Visualization</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1.6em;
vertical-align: middle;
}
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #7d9029; } /* Attribute */
code span.bn { color: #40a070; } /* BaseN */
code span.bu { color: #008000; } /* BuiltIn */
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4070a0; } /* Char */
code span.cn { color: #880000; } /* Constant */
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
code span.dt { color: #902000; } /* DataType */
code span.dv { color: #40a070; } /* DecVal */
code span.er { color: #ff0000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #40a070; } /* Float */
code span.fu { color: #06287e; } /* Function */
code span.im { color: #008000; font-weight: bold; } /* Import */
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
code span.op { color: #666666; } /* Operator */
code span.ot { color: #007020; } /* Other */
code span.pp { color: #bc7a00; } /* Preprocessor */
code span.sc { color: #4070a0; } /* SpecialChar */
code span.ss { color: #bb6688; } /* SpecialString */
code span.st { color: #4070a0; } /* String */
code span.va { color: #19177c; } /* Variable */
code span.vs { color: #4070a0; } /* VerbatimString */
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
</style>
<script src="4-DataVisualization_files/libs/clipboard/clipboard.min.js"></script>
<script src="4-DataVisualization_files/libs/quarto-html/quarto.js"></script>
<script src="4-DataVisualization_files/libs/quarto-html/popper.min.js"></script>
<script src="4-DataVisualization_files/libs/quarto-html/tippy.umd.min.js"></script>
<script src="4-DataVisualization_files/libs/quarto-html/anchor.min.js"></script>
<link href="4-DataVisualization_files/libs/quarto-html/tippy.css" rel="stylesheet">
<link href="4-DataVisualization_files/libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="4-DataVisualization_files/libs/bootstrap/bootstrap.min.js"></script>
<link href="4-DataVisualization_files/libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="4-DataVisualization_files/libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
</head>
<body class="fullcontent">
<div id="quarto-content" class="page-columns page-rows-contents page-layout-article">
<main class="content" id="quarto-document-content">
<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title">Worksheet 4: Data Visualization</h1>
</div>
<div class="quarto-title-meta">
<div>
<div class="quarto-title-meta-heading">Author</div>
<div class="quarto-title-meta-contents">
<p>Candy Boatwright </p>
</div>
</div>
<div>
<div class="quarto-title-meta-heading">Published</div>
<div class="quarto-title-meta-contents">
<p class="date">February 28, 2024</p>
</div>
</div>
</div>
</header>
<p><em>This is the fourth in a series of worksheets for History 8510 at Clemson University. The goal of these worksheets is simple: practice, practice, practice. The worksheet introduces concepts and techniques and includes prompts for you to practice in this interactive document. When you are finished, you should change the author name (above), knit your document, and upload it to canvas. Don’t forget to commit your changes as you go and push to github when you finish the worksheet.</em></p>
<section id="charts-and-graphs-with-ggplot2" class="level2">
<h2 class="anchored" data-anchor-id="charts-and-graphs-with-ggplot2">Charts and Graphs with <code>ggplot2()</code></h2>
<p>An essential element of working with data is trying to make sense of it and communicate your findings. Frequently, the way to do that is through some kind of data visualization. This week we are going to think about how to represent information graphically. How can we highlight patterns and trends in data in a way that helps ourselves and our readers/users understand historical data?</p>
<p>R has many visualization packages but the most powerful of those is <code>ggplot()</code> which builds on the concept of a grammar of graphics. To quote Hadley Wickham, “A grammar of graphics is a tool that enables us to concisely describe the components of a graphic. Such a grammar allows us to move beyond named graphics (e.g., the <code>scatterplot</code>) and gain insight into the deep structure that underlies statistical graphics.” In other words, <code>ggplot()</code> provides a set of tools to map data to visual elements on a plot, to specify the type of plot, and to control the fine details of how that plot will be displayed.</p>
<p>What does that mean in practice?</p>
<p>Any type of plot in R has 3 necessary layers.</p>
<p><strong>1) Data:</strong> A data frame with one or more variables, each one with one or more observations.</p>
<p><strong>2) Aesthetic:</strong> A mapping of one or more variables to one or more visual elements on the graph. For example, you could map a variable to the x-axis, another variable to the y-axis, and a categorical variable to color so that different categories get plotted with different colors.</p>
<p><strong>3) Geometry:</strong> The type or shape of the visual elements on the graph. For example, this could be a point in the case of a scatter plot, a bar in the case of a bar plot, or a line in the case of a line plot.</p>
<p>Lets load all the libraries we’ll use in this worksheet:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(gapminder) <span class="co">#this is a dataset</span></span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(ggplot2) </span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(tidyverse)</span>
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(DigitalMethodsData)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>To begin, we’re going to use one of R’s built in datasets. First lets take a look at the data:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb2"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span>(gapminder)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<ol class="example" type="1">
<li>Take a look at the data. What can you say about it?</li>
</ol>
<blockquote class="blockquote">
<p>It’s a dataset arranged in long form. Contains the average life expentancy, the population, and the GDP per capita for a sampling of countries across different countries from 1952-2007 in 5 year increments. The data does not seem to be complete, however, because not every country has data for every 5 years, although so do.</p>
</blockquote>
<p>This data includes variables for life exptenency and per capita GDP. Lets say we want to plot these two variables against each other for all country-years within the dataset. First we have to tell <code>ggplot()</code> what our data is.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>p <span class="ot"><-</span> <span class="fu">ggplot</span>(<span class="at">data =</span> gapminder)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>What happened here? A new variable, <code>p</code> was created but we didn’t get a plot yet. That is because we’ve told <code>ggplot</code> what data we’re going to use but we haven’t mapped anything to a plot yet. We need to tell <code>ggplot</code> what variables in the data should be represented by which visual elements in the plot. We also haven’t told it what kind of plot we want.</p>
<p>Mappings in <code>ggplot</code> are defined using the <code>aes()</code> function like this:</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb4"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>p <span class="ot"><-</span> <span class="fu">ggplot</span>(<span class="at">data =</span> gapminder, <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> gdpPercap, <span class="at">y=</span>lifeExp))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<p>In this code we gave <code>ggplot()</code> two arguments instead of one - data and mapping. This tells ggplot that we want the data about GDP to be mapped to the x-axis and data about life expectancy on the y-axis. The <code>mapping = aes()</code> argument is <strong>linking variables in our data to things you will see on the plot</strong>.</p>
<p>There are numerous types of aesthetic mappings. x and y values are the most common and obvious but we could also define things like color, shape, size, and line type (solid vs dashed etc.)</p>
<p>We can view the plot that we’ve created by calling the variable that we stored our plot in, <code>p</code>. What we get is an empty plot. You’ll notice that ggplot has created both the x and y-axis for us but there are not points on our plot. That is because we need to add a layer to the plot that includes the data. <code>geom_point()</code> helps us do that. It takes the x and y values and plots them in a scatterplot.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>p <span class="sc">+</span> <span class="fu">geom_point</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-5-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>To summarize what we did above, there were 3 steps we went through. First, tell <code>ggplot()</code> what our data is. Second, add <code>mapping = aes()</code> to map our variables to the plot. And, third, choose a geom. In this case our geom was <code>geom_point()</code> which created the points/data layer on our plot. A fourth step could have included adding a scale, legend, title or some other element to the graph. These are typically referred to as labs.</p>
<ol start="2" class="example" type="1">
<li>Modify the above plot so that the variables it draws on are life expectancy and population. Then, use a comment to annotate the code and mark each of the 3 parts described above.</li>
</ol>
<div class="cell">
<div class="sourceCode cell-code" id="cb6"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>p2 <span class="ot"><-</span> <span class="fu">ggplot</span>(<span class="at">data =</span> gapminder, <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> pop, <span class="at">y=</span>lifeExp)) <span class="co">#Part 1: fetches gapminder as data for ggplot; Part 2: adds pop and lifeExp as variable to the plot</span></span>
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>p <span class="sc">+</span> <span class="fu">geom_point</span>() <span class="co">#Part 3: adds geom_point as geom in order to create point graph</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-6-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>We can make this plot even more complex. Adding another value, color, to <code>aes()</code> we can change the color of each dot to correspond to a year. We also might want to rename the x and y-axis so that the labels have a more useful description of the variable. We can do so using the labs function.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(<span class="at">data =</span> gapminder, <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> gdpPercap, <span class="at">y=</span>lifeExp, <span class="at">color=</span>year)) <span class="sc">+</span> <span class="fu">geom_point</span>() <span class="sc">+</span> <span class="fu">labs</span>(<span class="at">x=</span><span class="st">"GDP Per Capita"</span>, <span class="at">y=</span><span class="st">"Life Expectency in Years"</span>, <span class="at">title=</span><span class="st">"Life Expectency and Per Capita GDP by Year"</span>) </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-7-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<ol start="3" class="example" type="1">
<li>Using the same plot above, can you edit the code to add a trend line? (Hint: You’ll need to add another geom)</li>
</ol>
<div class="cell">
<div class="sourceCode cell-code" id="cb8"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(<span class="at">data =</span> gapminder, <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> gdpPercap, <span class="at">y=</span>lifeExp, <span class="at">color=</span>year)) <span class="sc">+</span> <span class="fu">geom_point</span>() <span class="sc">+</span> <span class="fu">labs</span>(<span class="at">x=</span><span class="st">"GDP Per Capita"</span>, <span class="at">y=</span><span class="st">"Life Expectency in Years"</span>, <span class="at">title=</span><span class="st">"Life Expectency and Per Capita GDP by Year"</span>) <span class="sc">+</span> <span class="fu">geom_smooth</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'</code></pre>
</div>
<div class="cell-output cell-output-stderr">
<pre><code>Warning: The following aesthetics were dropped during statistical transformation: colour
ℹ This can happen when ggplot fails to infer the correct grouping structure in
the data.
ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
variable into a factor?</code></pre>
</div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-8-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<ol start="4" class="example" type="1">
<li>The file below contains data about the population of cities in 1930 and their recreation expenditures. Can you create a scatter plot from this data?</li>
</ol>
<div class="cell">
<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>rec <span class="ot"><-</span> <span class="fu">read.csv</span>(<span class="at">file =</span> <span class="st">"https://raw.githubusercontent.com/regan008/DigitalMethodsData/main/raw/Recreation-Expenditures.csv"</span>)</span>
<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(rec, <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> population, <span class="at">y =</span> total_expenditures, <span class="at">color =</span> population)) <span class="sc">+</span> <span class="fu">geom_point</span>() <span class="sc">+</span> <span class="fu">labs</span>(<span class="at">x=</span><span class="st">"Population"</span>, <span class="at">y=</span><span class="st">"Total Expenditures"</span>, <span class="at">title=</span><span class="st">"Recreation Expenditures and Population of American Cities in 1930"</span>) <span class="sc">+</span> <span class="fu">theme</span>( <span class="at">axis.text.x =</span> <span class="fu">element_blank</span>(), <span class="at">axis.text.y =</span> <span class="fu">element_blank</span>()) <span class="sc">+</span> <span class="fu">geom_smooth</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>`geom_smooth()` using method = 'loess' and formula = 'y ~ x'</code></pre>
</div>
<div class="cell-output cell-output-stderr">
<pre><code>Warning: The following aesthetics were dropped during statistical transformation: colour
ℹ This can happen when ggplot fails to infer the correct grouping structure in
the data.
ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
variable into a factor?</code></pre>
</div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-9-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p>The <code>geom_point()</code> function allows us to create scatterplots but its not the only geom within <code>ggplot2()</code>.</p>
<section id="histogram" class="level3">
<h3 class="anchored" data-anchor-id="histogram">Histogram</h3>
<p>A histogram is a graph used to represent the frequency distribution of a few data points of one variable. Histograms often classify data into various “bins” or “range groups” and count how many data points belong to each of those bins.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb14"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>recreational.data <span class="ot"><-</span> <span class="fu">read.csv</span>(<span class="st">"https://raw.githubusercontent.com/regan008/DigitalMethodsData/main/raw/Recreation-Expenditures.csv"</span>)</span>
<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(recreational.data, <span class="fu">aes</span>(<span class="at">x =</span> population)) <span class="sc">+</span></span>
<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_histogram</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre>
</div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-10-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<ol start="5" class="example" type="1">
<li>Use the Boston Women’s voters dataset to create a histogram of the age of voters.</li>
</ol>
<div class="cell">
<div class="sourceCode cell-code" id="cb16"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span>(<span class="st">"BostonWomenVoters"</span>)</span>
<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(BostonWomenVoters, <span class="fu">aes</span>(<span class="at">x =</span> Age)) <span class="sc">+</span> <span class="fu">geom_histogram</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre>
</div>
<div class="cell-output cell-output-stderr">
<pre><code>Warning: Removed 75 rows containing non-finite values (`stat_bin()`).</code></pre>
</div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-11-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<ol start="6" class="example" type="1">
<li>By default, <code>geom_histogram()</code> is choosing the number of bins. Can you change the number of bins or the bin width? (Hint: How might you look up the options for this function?)</li>
</ol>
<div class="cell">
<div class="sourceCode cell-code" id="cb19"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span>(<span class="st">"BostonWomenVoters"</span>)</span>
<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(BostonWomenVoters, <span class="fu">aes</span>(<span class="at">x =</span> Age)) <span class="sc">+</span> <span class="fu">geom_histogram</span>( ,<span class="at">binwidth =</span> .<span class="dv">7</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>Warning: Removed 75 rows containing non-finite values (`stat_bin()`).</code></pre>
</div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-12-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<div class="cell">
<div class="sourceCode cell-code" id="cb21"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span>(<span class="st">"BostonWomenVoters"</span>)</span>
<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(BostonWomenVoters, <span class="fu">aes</span>(<span class="at">x =</span> Age)) <span class="sc">+</span> <span class="fu">geom_histogram</span>( ,<span class="at">bins =</span> <span class="dv">8</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>Warning: Removed 75 rows containing non-finite values (`stat_bin()`).</code></pre>
</div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-13-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<ol start="7" class="example" type="1">
<li>Explain, what does changing the number of bins do?</li>
</ol>
<blockquote class="blockquote">
<p>The default is set at 30 bins so decreasing the number of bins for this graph will create a simplified graphic - it will appear as if each age group has one count.</p>
</blockquote>
</section>
<section id="barplots" class="level3">
<h3 class="anchored" data-anchor-id="barplots">Barplots</h3>
<p>A barplot (or barchart) is one of the most common types of graphic. Whereas a histogram shows the number of entries in each “bin” - a bar chart shows the relationship between a numeric and a categorical variable. Each entity in the categorical variable is represented as a bar. The size of the bar represents its numeric value.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb23"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a>sc.rec <span class="ot"><-</span> recreational.data <span class="sc">%>%</span> <span class="fu">group_by</span>(state) <span class="sc">%>%</span> <span class="fu">summarize</span>(<span class="at">state.expenditures =</span> <span class="fu">sum</span>(total_expenditures))</span>
<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(sc.rec, <span class="fu">aes</span>(<span class="at">x =</span> state, <span class="at">y =</span> state.expenditures )) <span class="sc">+</span></span>
<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_col</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-14-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<ol start="8" class="example" type="1">
<li>Using the <code>gayguides</code> data, create a bar chart that shows the number of locations in 1980 in South Carolina, Georgia, and North Carolina.</li>
</ol>
<div class="cell">
<div class="sourceCode cell-code" id="cb24"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span> (<span class="st">"gayguides"</span>)</span>
<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a>south80 <span class="ot"><-</span> gayguides <span class="sc">%>%</span> <span class="fu">group_by</span>(state) <span class="sc">%>%</span> <span class="fu">filter</span>(Year <span class="sc">==</span> <span class="dv">1980</span>) <span class="sc">%>%</span> <span class="fu">filter</span>(state <span class="sc">%in%</span> <span class="fu">c</span>(<span class="st">"SC"</span>, <span class="st">"GA"</span>, <span class="st">"NC"</span>)) <span class="sc">%>%</span> <span class="fu">summarize</span>(<span class="at">count =</span> <span class="fu">n</span>()) </span>
<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb24-5"><a href="#cb24-5" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(south80, <span class="fu">aes</span>(<span class="at">x =</span> state, <span class="at">y =</span> count)) <span class="sc">+</span> <span class="fu">geom_col</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-15-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<ol start="9" class="example" type="1">
<li>Using the <code>gayguides</code> data, plot the number of religious institutions in each state.</li>
</ol>
<div class="cell">
<div class="sourceCode cell-code" id="cb25"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span> (<span class="st">"gayguides"</span>)</span>
<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a>rel.inst <span class="ot"><-</span> gayguides <span class="sc">%>%</span> <span class="fu">group_by</span>(state) <span class="sc">%>%</span> <span class="fu">filter</span>(type <span class="sc">==</span> <span class="st">"Religious Institution"</span>) <span class="sc">%>%</span> <span class="fu">summarize</span>(<span class="at">count =</span> <span class="fu">n</span>()) </span>
<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb25-5"><a href="#cb25-5" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(rel.inst, <span class="fu">aes</span>(<span class="at">x =</span> state, <span class="at">y =</span> count)) <span class="sc">+</span> <span class="fu">theme</span>(<span class="at">axis.text.x=</span><span class="fu">element_text</span>(<span class="at">angle=</span><span class="dv">90</span>,<span class="at">hjust=</span><span class="dv">1</span>)) <span class="sc">+</span> <span class="fu">geom_point</span>() <span class="sc">+</span> <span class="fu">geom_segment</span>(<span class="fu">aes</span>(<span class="at">x=</span>state, <span class="at">xend=</span>state, <span class="at">y=</span><span class="dv">0</span>, <span class="at">yend=</span>count))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-16-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<ol start="10" class="example" type="1">
<li>Formulate a question based on a dataset of your choice. That question should be something that would be appropriately answered by a bar chart. What is the question you want to explore?</li>
</ol>
<blockquote class="blockquote">
<p>What is the number of Boston women voters whose occupation is listed as a housewife by their birth country?</p>
</blockquote>
<ol start="11" class="example" type="1">
<li>Now write the code to answer that question and use comments (<code>#</code>) to explain your thinking as you go.</li>
</ol>
<div class="cell">
<div class="sourceCode cell-code" id="cb26"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span>(<span class="st">"BostonWomenVoters"</span>) <span class="co">#calls BostonWomenVoters dataset</span></span>
<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a>housewife.by.country <span class="ot"><-</span> BostonWomenVoters <span class="sc">%>%</span> <span class="co">#begins function housewife.by.country</span></span>
<span id="cb26-4"><a href="#cb26-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">filter</span>(Occupation <span class="sc">==</span> <span class="st">"Housewife"</span>) <span class="sc">%>%</span> <span class="co">#filters by occupation to show only housewives</span></span>
<span id="cb26-5"><a href="#cb26-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">group_by</span>(Country.of.Birth) <span class="sc">%>%</span> <span class="co">#groups by country of birth</span></span>
<span id="cb26-6"><a href="#cb26-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">summarize</span>(<span class="at">count =</span> <span class="fu">n</span>()) <span class="sc">%>%</span> <span class="co">#counts the number of housewives by country</span></span>
<span id="cb26-7"><a href="#cb26-7" aria-hidden="true" tabindex="-1"></a> print <span class="co">#prints the subset</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 37 × 2
Country.of.Birth count
<chr> <int>
1 Armenia 1
2 Armenia (Turkey) 1
3 At sea 1
4 Austria 1
5 Azores 1
6 Belgium 1
7 British West Indies 9
8 Canada 341
9 Chile 1
10 Danish West Indies 1
# ℹ 27 more rows</code></pre>
</div>
<div class="sourceCode cell-code" id="cb28"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(housewife.by.country, <span class="fu">aes</span>(<span class="at">x =</span> Country.of.Birth, <span class="at">y =</span> count)) <span class="sc">+</span> <span class="fu">theme</span>(<span class="at">axis.text.x=</span><span class="fu">element_text</span>(<span class="at">angle=</span><span class="dv">90</span>)) <span class="sc">+</span> <span class="fu">geom_col</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-17-1.png" class="img-fluid" width="672"></p>
</div>
</div>
</section>
<section id="line-charts" class="level3">
<h3 class="anchored" data-anchor-id="line-charts">Line Charts</h3>
<p>Line charts excel at demonstrating trends over time.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb29"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span>(gayguides)</span>
<span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a>gg.byyear <span class="ot"><-</span> gayguides <span class="sc">%>%</span> <span class="fu">group_by</span>(Year) <span class="sc">%>%</span> <span class="fu">summarize</span>(<span class="at">count =</span> <span class="fu">n</span>())</span>
<span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(gg.byyear, <span class="fu">aes</span>(<span class="at">x =</span> Year, <span class="at">y =</span> count)) <span class="sc">+</span></span>
<span id="cb29-4"><a href="#cb29-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-18-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<ol start="12" class="example" type="1">
<li>Create a line chart that shows the number of religious institutions in the gay guides data over time.</li>
</ol>
<div class="cell">
<div class="sourceCode cell-code" id="cb30"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span> (<span class="st">"gayguides"</span>)</span>
<span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb30-3"><a href="#cb30-3" aria-hidden="true" tabindex="-1"></a>rel.inst.by.year <span class="ot"><-</span> gayguides <span class="sc">%>%</span> <span class="fu">group_by</span>(Year) <span class="sc">%>%</span> <span class="fu">filter</span>(type <span class="sc">==</span> <span class="st">"Religious Institution"</span>) <span class="sc">%>%</span> <span class="fu">summarize</span>(<span class="at">count =</span> <span class="fu">n</span>()) </span>
<span id="cb30-4"><a href="#cb30-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb30-5"><a href="#cb30-5" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(rel.inst.by.year, <span class="fu">aes</span>(<span class="at">x =</span> Year, <span class="at">y =</span> count)) <span class="sc">+</span> <span class="fu">geom_line</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-19-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<ol start="13" class="example" type="1">
<li>In the gayguides data, danger or risk is indicated in several different ways. Sometimes it through the use of an amenity categorization of (HOT) or (AYOR) which meant that these locations were frequented by police and should be used at your own risk. Other times, locations were listed as “inquire locally” in either the description or address field. There are many reasons a location would have requested to have been listed this way. Many times, it was due to harassment and the location deemed it too dangerous to publicly list its location. Can you create a subset of the gay guides data about risk and then use it to create a chart about the usage of these indicators over time?</li>
</ol>
<div class="cell">
<div class="sourceCode cell-code" id="cb31"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span> (<span class="st">"gayguides"</span>)</span>
<span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb31-3"><a href="#cb31-3" aria-hidden="true" tabindex="-1"></a>southern.risk <span class="ot"><-</span> gayguides <span class="sc">%>%</span> <span class="fu">group_by</span>(Year) <span class="sc">%>%</span> <span class="fu">filter</span>(state <span class="sc">==</span> <span class="st">"SC"</span> <span class="sc">|</span> state <span class="sc">==</span> <span class="st">"NC"</span> <span class="sc">|</span> state <span class="sc">==</span> <span class="st">"GA"</span>) <span class="sc">%>%</span> <span class="fu">filter</span>(description <span class="sc">%in%</span> <span class="fu">c</span>(<span class="st">"AYOR"</span>, <span class="st">"HOT"</span>, <span class="st">"Fuzz"</span>)) <span class="sc">%>%</span> <span class="fu">summarize</span>(<span class="at">count =</span> <span class="fu">n</span>()) <span class="sc">%>%</span> print</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 0 × 2
# ℹ 2 variables: Year <int>, count <int></code></pre>
</div>
<div class="sourceCode cell-code" id="cb33"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(southern.risk, <span class="fu">aes</span>(<span class="at">x =</span> Year, <span class="at">y =</span> count)) <span class="sc">+</span> <span class="fu">geom_line</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-20-1.png" class="img-fluid" width="672"></p>
</div>
</div>
</section>
<section id="faceting" class="level3">
<h3 class="anchored" data-anchor-id="faceting">Faceting</h3>
<p>Unlike the previous exmaples, <code>facet_wrap</code> is not a geom. It splits the chart window in several small parts (a grid), and displays a similar chart in each section. Each section usually shows the same graph for a specific group of the dataset. For example, remember the line chart above that graphed the number of locations in the <code>gayguides</code> data across each year? We can facet that by state to see the number of locations in each state in each year.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb34"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a>gg.byyear <span class="ot"><-</span> gayguides <span class="sc">%>%</span> <span class="fu">filter</span>(state <span class="sc">==</span> <span class="st">"SC"</span> <span class="sc">|</span> state <span class="sc">==</span> <span class="st">"NC"</span> <span class="sc">|</span> state <span class="sc">==</span> <span class="st">"GA"</span>) <span class="sc">%>%</span> <span class="fu">group_by</span>(state, Year) <span class="sc">%>%</span> <span class="fu">summarize</span>(<span class="at">count =</span> <span class="fu">n</span>())</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>`summarise()` has grouped output by 'state'. You can override using the
`.groups` argument.</code></pre>
</div>
<div class="sourceCode cell-code" id="cb36"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(gg.byyear, <span class="fu">aes</span>(<span class="at">x =</span> Year, <span class="at">y =</span> count)) <span class="sc">+</span></span>
<span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>() <span class="sc">+</span> <span class="fu">facet_wrap</span>(<span class="sc">~</span>state)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-21-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<ol start="14" class="example" type="1">
<li>Using the state recreation data, can you create a chart that outlines the total expenditures in South Carolina by city?</li>
</ol>
<div class="cell">
<div class="sourceCode cell-code" id="cb37"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span>(rec)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>Warning in data(rec): data set 'rec' not found</code></pre>
</div>
<div class="sourceCode cell-code" id="cb39"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a>SC.City.Exp <span class="ot"><-</span> rec <span class="sc">%>%</span> <span class="fu">filter</span>(state <span class="sc">==</span> <span class="st">"SC"</span>) <span class="sc">%>%</span> <span class="fu">group_by</span>(population, total_expenditures, city) <span class="sc">%>%</span> <span class="fu">summarize</span>(<span class="at">count =</span> <span class="fu">n</span>())</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>`summarise()` has grouped output by 'population', 'total_expenditures'. You can
override using the `.groups` argument.</code></pre>
</div>
<div class="sourceCode cell-code" id="cb41"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(SC.City.Exp, <span class="fu">aes</span>(<span class="at">x =</span> population, <span class="at">y =</span> total_expenditures)) <span class="sc">+</span></span>
<span id="cb41-2"><a href="#cb41-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>() <span class="sc">+</span> <span class="fu">facet_wrap</span>(<span class="sc">~</span>city)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?
`geom_line()`: Each group consists of only one observation.
ℹ Do you need to adjust the group aesthetic?</code></pre>
</div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-22-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<ol start="15" class="example" type="1">
<li>Using the gayguides data can you select 5 types of locations and chart the number of each type between 1965 and 1980?</li>
</ol>
<div class="cell">
<div class="sourceCode cell-code" id="cb43"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb43-1"><a href="#cb43-1" aria-hidden="true" tabindex="-1"></a>gg.by.type <span class="ot"><-</span> gayguides <span class="sc">%>%</span> <span class="fu">filter</span>(type <span class="sc">==</span> <span class="st">"Restaurant"</span> <span class="sc">|</span> type <span class="sc">==</span> <span class="st">"Bars/Clubs"</span> <span class="sc">|</span> type <span class="sc">==</span> <span class="st">"Cruising Areas"</span> <span class="sc">|</span> type <span class="sc">==</span> <span class="st">"Religious Institutions"</span> <span class="sc">|</span> type <span class="sc">==</span> <span class="st">"Bats"</span>) <span class="sc">%>%</span> <span class="fu">filter</span>(Year <span class="sc">>=</span> <span class="dv">1965</span> <span class="sc">&</span> Year <span class="sc"><=</span> <span class="dv">1980</span>) <span class="sc">%>%</span> <span class="fu">group_by</span>(type, Year) <span class="sc">%>%</span> <span class="fu">summarize</span>(<span class="at">count =</span> <span class="fu">n</span>())</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>`summarise()` has grouped output by 'type'. You can override using the
`.groups` argument.</code></pre>
</div>
<div class="sourceCode cell-code" id="cb45"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb45-1"><a href="#cb45-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(gg.by.type, <span class="fu">aes</span>(<span class="at">x =</span> Year, <span class="at">y =</span> count)) <span class="sc">+</span></span>
<span id="cb45-2"><a href="#cb45-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_line</span>() <span class="sc">+</span> <span class="fu">facet_wrap</span>(<span class="sc">~</span>type)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-23-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<ol start="16" class="example" type="1">
<li>Using this recreation dataset from last week, can you find the average number of volunteer and paid workers over time?</li>
</ol>
<div class="cell">
<div class="sourceCode cell-code" id="cb46"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a>rec.workers <span class="ot"><-</span> <span class="fu">read.csv</span>(<span class="st">"https://raw.githubusercontent.com/regan008/DigitalMethodsData/main/raw/RecreationData-Wide.csv"</span>)</span>
<span id="cb46-2"><a href="#cb46-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb46-3"><a href="#cb46-3" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span> (rec.workers)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>Warning in data(rec.workers): data set 'rec.workers' not found</code></pre>
</div>
<div class="sourceCode cell-code" id="cb48"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb48-1"><a href="#cb48-1" aria-hidden="true" tabindex="-1"></a>Vol.and.Pd <span class="ot"><-</span> rec.workers <span class="sc">%>%</span> <span class="fu">group_by</span>(type_of_worker) <span class="sc">%>%</span> <span class="fu">summarise</span>(<span class="at">count =</span> <span class="fu">n</span>())</span>
<span id="cb48-2"><a href="#cb48-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb48-3"><a href="#cb48-3" aria-hidden="true" tabindex="-1"></a><span class="co">#ggplot(Vol.and.Pd, aes(x = X1930, y = count)) +</span></span>
<span id="cb48-4"><a href="#cb48-4" aria-hidden="true" tabindex="-1"></a> <span class="co">#geom_line() + facet_wrap(~type_of_worker)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
</section>
<section id="practice" class="level3">
<h3 class="anchored" data-anchor-id="practice">Practice</h3>
<p>In the slots below use the datasets for class (or a dataset of your choice) to create a plot. The plot you create in each of these three examples should try to make some kind of historical observation. After you finish creating the plot, write a short paragraph that explains what kind of observation you were trying to make and what your plot demonstrates.</p>
<ol start="17" class="example" type="1">
<li>Plot 1</li>
</ol>
<div class="cell">
<div class="sourceCode cell-code" id="cb49"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb49-1"><a href="#cb49-1" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span>(<span class="st">"BostonWomenVoters"</span>)</span>
<span id="cb49-2"><a href="#cb49-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb49-3"><a href="#cb49-3" aria-hidden="true" tabindex="-1"></a>BWV.Students.By.State <span class="ot"><-</span> BostonWomenVoters <span class="sc">%>%</span> <span class="fu">filter</span>(Occupation <span class="sc">==</span> <span class="st">"Student"</span>) <span class="sc">%>%</span> <span class="fu">filter</span>(Country.of.Birth <span class="sc">==</span> <span class="st">"United States"</span>) <span class="sc">%>%</span> <span class="fu">group_by</span>(State.or.Province.of.Birth) <span class="sc">%>%</span> <span class="fu">summarize</span>(<span class="at">count =</span> (State.or.Province.of.Birth)) <span class="sc">%>%</span> <span class="fu">print</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
dplyr 1.1.0.
ℹ Please use `reframe()` instead.
ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
always returns an ungrouped data frame and adjust accordingly.</code></pre>
</div>
<div class="cell-output cell-output-stderr">
<pre><code>`summarise()` has grouped output by 'State.or.Province.of.Birth'. You can
override using the `.groups` argument.</code></pre>
</div>
<div class="cell-output cell-output-stdout">
<pre><code># A tibble: 22 × 2
# Groups: State.or.Province.of.Birth [8]
State.or.Province.of.Birth count
<chr> <chr>
1 Iowa Iowa
2 Maine Maine
3 Maine Maine
4 Maine Maine
5 Massachusetts Massachusetts
6 Massachusetts Massachusetts
7 Massachusetts Massachusetts
8 Massachusetts Massachusetts
9 Massachusetts Massachusetts
10 Massachusetts Massachusetts
# ℹ 12 more rows</code></pre>
</div>
<div class="sourceCode cell-code" id="cb53"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb53-1"><a href="#cb53-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(BWV.Students.By.State, <span class="fu">aes</span>(<span class="at">x =</span> State.or.Province.of.Birth)) <span class="sc">+</span> <span class="fu">theme</span>(<span class="at">axis.text.x=</span><span class="fu">element_text</span>(<span class="at">angle=</span><span class="dv">90</span>)) <span class="sc">+</span> <span class="fu">geom_bar</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-25-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<ol start="18" class="example" type="1">
<li><p>Plot 1 explanation: >I want to know the dispersion of home states of the women who were registered to vote in Boston in 1920 whose occupation was listed as a student. I need to filter by occupation, then by birth county to see only American-born voters. Then I need to group by state of birth. The plot demostrates the scarcity of women students in Boston in 1920 and that the vast majority of students are born in Boston.</p></li>
<li><p>Plot 2</p></li>
</ol>
<div class="cell">
<div class="sourceCode cell-code" id="cb54"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb54-1"><a href="#cb54-1" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span>(<span class="st">"gapminder"</span>)</span>
<span id="cb54-2"><a href="#cb54-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb54-3"><a href="#cb54-3" aria-hidden="true" tabindex="-1"></a>gm.Americas <span class="ot"><-</span> gapminder <span class="sc">%>%</span> <span class="fu">filter</span>(continent <span class="sc">==</span> <span class="st">"Americas"</span>)</span>
<span id="cb54-4"><a href="#cb54-4" aria-hidden="true" tabindex="-1"></a> </span>
<span id="cb54-5"><a href="#cb54-5" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(gm.Americas, <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> gdpPercap, <span class="at">y=</span>lifeExp, <span class="at">color=</span>country)) <span class="sc">+</span> <span class="fu">geom_point</span>() <span class="sc">+</span> <span class="fu">labs</span>(<span class="at">x=</span><span class="st">"GDP Per Capita"</span>, <span class="at">y=</span><span class="st">"Life Expectency in Years"</span>, <span class="at">title=</span><span class="st">"Life Expectency and Per Capita GDP by Year"</span>) <span class="sc">+</span> <span class="fu">geom_smooth</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>`geom_smooth()` using method = 'loess' and formula = 'y ~ x'</code></pre>
</div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-26-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<ol start="20" class="example" type="1">
<li><p>Plot 2 explanation: >I want to plot the life expectency and GDP by year of the countries in the Americas in the gapminder dataset. I filtered the dataset by continent and then plotted the GDP(x) and the life expectency(y). By changing the color to country it somewhat shows the individual countries growth over time, although it’s probably too cluttered to be very useful.</p></li>
<li><p>Plot 3. For this one, try a different kind of plot. <a href="https://github.com/regan008/DigitalMethodsData/blob/main/raw/RecreationData-Wide.csv">Check out a useful gallery here.</a> (For this week, avoid choosing a map. We’ll talk more about maps and geographic data over the next two weeks.)</p></li>
</ol>
<div class="cell">
<div class="sourceCode cell-code" id="cb56"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb56-1"><a href="#cb56-1" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span>(<span class="st">"gapminder"</span>)</span>
<span id="cb56-2"><a href="#cb56-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb56-3"><a href="#cb56-3" aria-hidden="true" tabindex="-1"></a>gm.Americas <span class="ot"><-</span> gapminder <span class="sc">%>%</span> <span class="fu">filter</span>(continent <span class="sc">==</span> <span class="st">"Americas"</span>) <span class="sc">%>%</span> <span class="fu">filter</span>(year <span class="sc">==</span> <span class="st">"1952"</span> <span class="sc">|</span> year <span class="sc">==</span> <span class="st">"1972"</span> <span class="sc">|</span> year <span class="sc">==</span> <span class="st">"1992"</span> <span class="sc">|</span> year <span class="sc">==</span> <span class="st">"2002"</span>)</span>
<span id="cb56-4"><a href="#cb56-4" aria-hidden="true" tabindex="-1"></a> </span>
<span id="cb56-5"><a href="#cb56-5" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(gm.Americas, <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> gdpPercap, <span class="at">y=</span>lifeExp, <span class="at">label =</span> country)) <span class="sc">+</span> <span class="fu">geom_label</span>() </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="4-DataVisualization_files/figure-html/unnamed-chunk-27-1.png" class="img-fluid" width="672"></p>
</div>
</div>
<p><span class="citation" data-cites="Plot">@Plot</span> 4 explanation: >Plots the life expectancy and GDP of countries in the Americas for the years 1952, 1972, 1992, and 2002. Putting the names of the countries allows for a more striking visualization of the correlation between the two variables.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb57"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb57-1"><a href="#cb57-1" aria-hidden="true" tabindex="-1"></a>Union.Incidents <span class="ot"><-</span> <span class="fu">read.csv</span>(<span class="st">"https://docs.google.com/spreadsheets/d/e/2PACX-1vSRFwjcJ0Xnnic2NrdWXC5ld2LkDwJAfRCU7lq0ohhYIaPjhd-s-MfAdBM0sgarXzH-qoCCkwr4yRns/pub?output=csv"</span>)</span>
<span id="cb57-2"><a href="#cb57-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb57-3"><a href="#cb57-3" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span> (Union.Incidents)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stderr">
<pre><code>Warning in data(Union.Incidents): data set 'Union.Incidents' not found</code></pre>
</div>
<div class="sourceCode cell-code" id="cb59"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb59-1"><a href="#cb59-1" aria-hidden="true" tabindex="-1"></a>Union.Words <span class="ot"><-</span> Union.Incidents <span class="sc">%>%</span> <span class="fu">select</span> (ArticleTitle, DiscriptionOfOccurence, ConsequenceOfOccurence) </span>
<span id="cb59-2"><a href="#cb59-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb59-3"><a href="#cb59-3" aria-hidden="true" tabindex="-1"></a><span class="co">#ggwordcloud(Union.Incidents, aes(label = Union.Words))</span></span>
<span id="cb59-4"><a href="#cb59-4" aria-hidden="true" tabindex="-1"></a><span class="co">#ggplot(Union.Incidents, aes(label = "ArticleTitle", "DiscriptionOfOccurence", "ConsequenceOfOccurence")) +</span></span>
<span id="cb59-5"><a href="#cb59-5" aria-hidden="true" tabindex="-1"></a> <span class="co">#geom_text_wordcloud() +</span></span>
<span id="cb59-6"><a href="#cb59-6" aria-hidden="true" tabindex="-1"></a> <span class="co">#scale_size_area(max_size = 40) +</span></span>
<span id="cb59-7"><a href="#cb59-7" aria-hidden="true" tabindex="-1"></a> <span class="co">#theme_minimal()</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<ol start="22" class="example" type="1">
<li>Plot 4 explanation: >Creates a wordcloud using the article title, description and consequence columns for the dataset I created about anti-Confederate incidents reported in the Greenville Enterprise.</li>
</ol>
</section>
</section>
</main>
<!-- /main column -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
const toggleBodyColorMode = (bsSheetEl) => {
const mode = bsSheetEl.getAttribute("data-mode");
const bodyEl = window.document.querySelector("body");
if (mode === "dark") {
bodyEl.classList.add("quarto-dark");
bodyEl.classList.remove("quarto-light");
} else {
bodyEl.classList.add("quarto-light");
bodyEl.classList.remove("quarto-dark");
}
}
const toggleBodyColorPrimary = () => {
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
if (bsSheetEl) {
toggleBodyColorMode(bsSheetEl);
}
}
toggleBodyColorPrimary();
const icon = "";
const anchorJS = new window.AnchorJS();
anchorJS.options = {
placement: 'right',
icon: icon
};
anchorJS.add('.anchored');
const clipboard = new window.ClipboardJS('.code-copy-button', {
target: function(trigger) {
return trigger.previousElementSibling;
}
});
clipboard.on('success', function(e) {
// button target
const button = e.trigger;
// don't keep focus
button.blur();
// flash "checked"
button.classList.add('code-copy-button-checked');
var currentTitle = button.getAttribute("title");
button.setAttribute("title", "Copied!");
let tooltip;
if (window.bootstrap) {
button.setAttribute("data-bs-toggle", "tooltip");
button.setAttribute("data-bs-placement", "left");
button.setAttribute("data-bs-title", "Copied!");
tooltip = new bootstrap.Tooltip(button,
{ trigger: "manual",
customClass: "code-copy-button-tooltip",
offset: [0, -8]});
tooltip.show();
}
setTimeout(function() {
if (tooltip) {
tooltip.hide();
button.removeAttribute("data-bs-title");
button.removeAttribute("data-bs-toggle");
button.removeAttribute("data-bs-placement");
}
button.setAttribute("title", currentTitle);
button.classList.remove('code-copy-button-checked');
}, 1000);
// clear code selection
e.clearSelection();
});
function tippyHover(el, contentFn) {
const config = {
allowHTML: true,
content: contentFn,
maxWidth: 500,
delay: 100,
arrow: false,
appendTo: function(el) {
return el.parentElement;
},
interactive: true,
interactiveBorder: 10,
theme: 'quarto',
placement: 'bottom-start'
};
window.tippy(el, config);
}
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
for (var i=0; i<noterefs.length; i++) {
const ref = noterefs[i];
tippyHover(ref, function() {
// use id or data attribute instead here
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
try { href = new URL(href).hash; } catch {}
const id = href.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
return note.innerHTML;
});
}
const findCites = (el) => {
const parentEl = el.parentElement;
if (parentEl) {
const cites = parentEl.dataset.cites;
if (cites) {
return {
el,
cites: cites.split(' ')
};
} else {
return findCites(el.parentElement)
}
} else {
return undefined;
}
};
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
for (var i=0; i<bibliorefs.length; i++) {
const ref = bibliorefs[i];
const citeInfo = findCites(ref);
if (citeInfo) {
tippyHover(citeInfo.el, function() {
var popup = window.document.createElement('div');
citeInfo.cites.forEach(function(cite) {
var citeDiv = window.document.createElement('div');
citeDiv.classList.add('hanging-indent');
citeDiv.classList.add('csl-entry');
var biblioDiv = window.document.getElementById('ref-' + cite);
if (biblioDiv) {
citeDiv.innerHTML = biblioDiv.innerHTML;
}
popup.appendChild(citeDiv);
});
return popup.innerHTML;
});
}
}
});
</script>
</div> <!-- /content -->
</body></html>