-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathData harvesting project.Rmd
2061 lines (1556 loc) · 73.4 KB
/
Data harvesting project.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
---
title: "Spotify Api"
subtitle: "A Data Harvesting Project"
categories: "2024"
author: "Fernanda Martín & Carlos San Juan"
date: '`r Sys.Date()`'
documentclass: krantz
monofont: "Source Code Pro"
monofontoptions: "Scale=0.7"
biblio-style: apalike
graphics: yes
output:
html_document:
toc: true
toc_float: true
toc_depth: 3
number_sections: true
fig_caption: true
theme: "paper"
highlight: "tango"
editor_options:
chunk_output_type: "inline"
---
```{r setup, include=FALSE, message=FALSE, warning=FALSE}
knitr::opts_chunk$set(out.width="100%", fig.align="center")
```
```{r, include=FALSE, message=FALSE, warning=FALSE}
library(tidyverse)
library(httr2)
library(httr)
library(ggplot2)
library(jsonlite)
library(xml2)
library(stringr)
library(stringdist)
library(stargazer)
library(plotly)
#install.packages("dotenv") Make sure you have downloaded all the libraries, especially this one.
library(dotenv)
library(fmsb)
library(data.table)
library(igraph)
library(dplyr)
library(tidyr)
library(viridis)
```
# Introduction to Spotify Api
In this data harvesting project, we're going to use the Spotify API. This API allows programmers to access and use Spotify data to add features like searching for music, getting details about artists, albums, and playlists, and playing songs in their own applications. Moreover, it offers many tools for developing unique applications that utilize Spotify's extensive music collection.
## What is an API?
An API (Application Programming Interface) is a set of rules and protocols that allow different applications to communicate with each other in a structured and secure way. It serves as an intermediary that facilitates interaction between software, allowing access to resources or services of other applications without the need to understand their internal implementation. APIs have a variety of applications, from system integration and mobile application development to process automation, accelerating software development and fostering collaboration between companies in the creation of interoperable digital ecosystems.
## What are we going to do?
We will use the Spotify API to delve into sentiment analysis of various musical collections, including albums and playlists. Through this process, we'll explore how emotions and tones vary across different works and genres, identifying patterns and trends in music. This analysis will allow us to better understand the connection between musicality and emotional expression, opening new insights into the impact of music on listeners.
# Get the credential to use the Api
To start using the Spotify Api, the following steps are required:
## Log in to Spotify and create an account for Developpers
The first step, which is mandatory, is to sign in with a Spotify account or create a Spotify account in order to access the API. You can register [by clicking here](https://developer.spotify.com/documentation/web-api). This link will take you to the Spotify Api home page where you will see the option to log in on the top right hand side, by clicking on `Log in` (This may have changed depending on when you read this paper). You can log in with a Google account or with your personal email address.
Once you are logged in, you will be returned to the Spotify Api homepage. On this page you can find all the documentation for the Api and the different uses it has. All Apis require detailed documentation to make them easy to use and interact with, providing details about endpoints, parameters and responses. That is why it is essential to be able to carry out this work, as well as, we encourage to read it to all those who are interested in understanding better how it works or people who want to work with it in the future.
Once logged in, the next step is to create a developer account. To do this, click on the top right-hand side of the tab where your name will appear, specifically on the `Dashboard` tab.
Once we are in the `Dashborad` we must click on the `create app` button. Once we have clicked on the bottom we must add a name, the one we want, and a description. You also need to fill in the `Redirect URI` box, enter one or more addresses that you want to allowlist with Spotify. This URI enables the Spotify authentication service to automatically invoke your app every time the user logs in (e.g. <http://localhost:8080>). This can be changed at any time.
It is also necessary to tick the Web API option and accept the Developers terms of service, to do this ,put a tick in the Developer Terms of Service checkbox and finally click on `CREATE`. Once these steps have been followed, your application is now registered, and you'll be redirected to the app overview page.
Congratulations, you can now use the Spotify API! There's just one last step left...
Once you are in the Dashboard created, click on the `Settings` option. On this page you will see your `Client ID` and if you click on `View client secret` you will see your `Client Secret`. The codes that appear are personal to each individual, so in order for the following code to work and for you to see the results of our work you need to do the following. You need to save them for the next step.
This step is very important, so pay attention. You have to create a text file called `.env`. You can do this from R, by clicking on the top left to generate a new file and more specifically by clicking on `Text files`. Inside that file you should put the following:
- "client_id=YourPasswordHere". With `YourPasswordHere` being your actual `Client ID`.
- "client_secret=YourPasswordHere". With `YourPasswordHere` being your actual `Client Secret`.
You only have to write in the file what contains the inverted commas with `YourPasswordHere` being your real passwords.
It is recommended that you save the file `.env` in the same folder that you have saved this work in order to have it located and easy to access, because we will use it later.
# Authorization
Authorization refers to the process of granting a user or application access permissions to Spotify data and features (e.g your application needs permission from a user to access their playlists).
Spotify implements the OAuth 2.0 authorization framework:
.](auth_intro.png)
## Preliminary Step
Once we have done the previous steps and pasted the client codes in the text files we must read them in R
```{r}
dotenv::load_dot_env()
client_ID <- Sys.getenv("client_id")
client_secret <- Sys.getenv("client_secret")
```
If you have followed the steps correctly, this chunk should have run perfectly. Otherwise, make sure you have followed our instructions precisely. If you have not saved the .env file in the same folder as the repository, you should do the following, replace the code with the following: dotenv::load_dot_env("your_path_here/.env")
## Request and Acess Token
The next step is to get the access token, which is a string containing the credentials and permissions that can be used to access a certain resource (e.g. artists, albums or tracks) or user data (e.g. your profile or playlists). For this, we will need the client_ID and the client_secret that we created earlier, so... Let's get the token.
First, we set the URL to which we are going to make the request.
```{r}
URL <- "https://accounts.spotify.com/api/token"
```
```{r}
token_req <- POST(
URL,
accept_json(),
authenticate(client_ID, client_secret),
body = list(grant_type = 'client_credentials'),
encode = 'form'
)
```
Once the token has been requested, the next and final step is to verify the status code of the response and display the token.
```{r}
#Check the status code of the response
status_code(token_req)
#Print the content of the reply
content(token_req)
```
As you can see the first code shows `200` this means that the request was successful. In case you get another code (like `404` or `401`) you have to repeat the previous steps because something has been executed incorrectly. If the number `500` means that there is an error on the server.
El segundo codigo, el primer dato se refiere a tu token personal, el segundo al tipo de token, el cual debería ser `Bearer`y finalmente el tiempo en el que puedes usar la token, en este caso 1 hora.
The following code is intended to save the token:
```{r}
token <- content(token_req)$access_token
```
## Last Steps
We are going to establish a baseline that we will use for the following requests. First, we'll set up base urls that we'll use in future queries.
```{r}
spotify_req <- "https://api.spotify.com/v1"
```
To efficiently manage future requests and prevent server overload, we will implement some basic rules in the code. Specifically, we will limit the number of attempts to a maximum of 5 for each request. Additionally, to control the frequency of requests, we will establish a sending rate of up to 20 requests per minute. This translates into a speed limit that ensures a maximum of 2 requests per second, on average, but adjusted to allow a bit more flexibility. Each request will have a maximum timeout of 20,000 milliseconds (10 seconds) to complete before being considered a failure. Lastly, we will configure the request headers to accept any type of content, thus ensuring broad compatibility with different data formats.
```{r}
req <- request(spotify_req) |>
req_auth_bearer_token(token) |>
req_retry(max_tries = 5) |>
req_throttle(rate = 20) |>
req_timeout(20000) |>
req_headers("Content-type" = "*/*")
```
We have finished the introduction! Now it's time to become ninjas to perform Data Harvesting.
# Playlist "Bucle"
This playlist is original to the website, which is updated daily based on the songs we listen to the most in the previous days. Updating every two days, this playlist not only stays fresh and relevant but also offers a unique window into the moods and musical preferences of its audience at any given time.
By analysing the lyrics, melodies, and perhaps even the comments or popularity of the songs included, a comprehensive view of how the community is feeling can be gained. This type of analysis could reveal emotional trends, such as an increased preference for more upbeat or melancholic songs, depending on external factors such as the season, world events, or even social changes within the community.
In the following, we will use our own Playlist as a reference to make a sentiment analysis, to see how we are feeling. As it is updated daily, we will not be able to talk about the results, because it is constantly changing. So using it as a reference, we could see how we are evolving and if we are in a moment of greater happiness or sadness.
First of all, we are going to extract the dataset of the music we listen to the most.
```{r}
En_Bucle <- "37i9dQZF1Epm557tGqQWq5"
```
Before we proceed, a note: if, instead of observing how we are, you wish to know how you are—yes, we are speaking to you, reader—you should follow these steps.
This is the original link to my playlist "En Bucle" : <https://open.spotify.com/playlist/37i9dQZF1Epm557tGqQWq5>. As you can see, to make it work we have to select what comes after the last bar ("37i9dQZF1Epm557tGqQWq5"). Well, if you want to use a sentiment analysis on you. Go to the Spotify website, search for this Playlist, copy the link, and name the mentioned part of the link, as En_Bucle.
Now it is time to continue our analysis of sentiment:
```{r}
Bucle <- req |>
req_url_path_append(paste("playlists", En_Bucle, sep = "/")) |>
req_perform() |>
resp_body_json(simplifyVector = TRUE)
Bucle <- Bucle$track$items |>
select(track) |>
unnest(track) |>
select(c(artists,album, name, id, explicit, duration_ms, popularity, href))
Bucle
Buclesongs <- Bucle$id
```
As can be observed, we have information on the top 30 most listened to songs, which is why we have 30 rows, one for each song. Our aim is to gather information strictly related to the track itself, excluding details about the thumbnail or the uploader. Therefore, we will choose the track field and proceed to expand it, revealing the contained details.
We have also saved the Bucle_Id for what we'll see later on.
However, we still retain some dataframes like those of the artist or album. Let's work with that...
```{r}
Buclefiltered_1 <- Bucle |>
unnest(artists, names_sep = "_") |>
select(artist_name = artists_name, name) |>
distinct(name, .keep_all = TRUE) |>
group_by(name, artist_name)
Buclefiltered_1
Bucle_1 <- left_join(Buclefiltered_1, Bucle, by = "name")
Buclefiltered_2 <- Bucle |>
unnest(album, names_sep = "_") |>
select(album_name = album_name, name) |>
distinct(name, .keep_all = TRUE) |>
group_by(name, album_name)
Bucle_2 <- left_join(Buclefiltered_2, Bucle_1, by = "name")
Bucle_final <- Bucle_2 |>
select(!c(album, artists, href, explicit, duration_ms))
```
The next step is to extract more information from the Spotify API to carry out the sentiment analysis.
The great thing about working with the Spotify API is the ability to dive into deep analysis of music through unique data and metrics offered by the platform. Besides providing classic indicators such as tempo, key, and duration of songs, Spotify goes further by offering unique and insightful indicators that reflect the inherent qualities of music. These include "instrumentalness," which measures the likelihood that a song does not contain vocals; "danceability," which assesses how suitable a song is for dancing based on a combination of musical elements, including tempo, rhythm stability, beat strength, and overall regularity; and "energy," which is a measure representing the perceived intensity and activity of a song, determined by dynamics, loudness, timbre, onset, and the song's overall vibe.
The ability to explore not only the basic aspects of songs, such as their duration or key but also to delve into how the songs feel, how danceable they are, or their energy, opens up a world of possibilities for creative and detailed research.
That's why we're going to use the following code, to observe the main emotions generated by the songs we have in our "En Bucle" Playlist.
```{r}
En_Bucle_feelings <- data.frame()
for(i in 1:length(unique(Buclesongs))) {
Sys.sleep(2)
Buclesong <- req |>
req_url_path_append(paste("audio-features", unique(Buclesongs)[i], sep = "/")) |>
req_perform() |>
resp_body_json(simplifyVector = TRUE) |>
as_tibble()
En_Bucle_feelings <- rbind(Buclesong, En_Bucle_feelings)
}
En_Bucle_feelings
```
The problem with doing this code is that we lose the name of the songs. Although there is a solution for this, to make everything much clearer we are going to make a full_join to join the two dataframes.
```{r}
Bucle_full <- Bucle_final |>
full_join(En_Bucle_feelings, by = "id") |>
select(name, album_name, everything()) |>
mutate(duration = duration_ms / 1000) |>
select(-duration_ms)
Bucle_full <- Bucle_full |>
ungroup() |>
mutate(Position = row_number())
Bucle_full
```
We did it! We have all the songs, with their respective sentiments! Now it's time to analyse the results.
## Sentiment Analysis
First, we will begin by giving a brief overview of the main characteristics of the songs offered by the Spotify API.
Before that, I will provide a series of definitions of the variables we will use in the analysis, so when the results come out upon executing the code, you can understand my mood if you use my playlist, or yours, in case you have modified the original link for yours. In any case, we cannot directly discuss the obtained results since these can change over time. Let's go through them:
- **Danceability**: Danceability describes how suitable a track is for dancing based on a combination of musical elements including tempo, rhythm stability, beat strength, and overall regularity. A value of 0.0 is least danceable and 1.0 is most danceable.
- **Energy**: Energy is a measure from 0.0 to 1.0 and represents a perceptual measure of intensity and activity. Typically, energetic tracks feel fast, loud, and noisy. For example, death metal has high energy, while a Bach prelude scores low on the scale. Perceptual features contributing to this attribute include dynamic range, perceived loudness, timbre, onset rate, and general entropy.
- **Instrumentalness**: Predicts whether a track contains no vocals. "Ooh" and "aah" sounds are treated as instrumental in this context. Rap or spoken word tracks are clearly "vocal". The closer the instrumentalness value is to 1.0, the greater likelihood the track contains no vocal content. Values above 0.5 are intended to represent instrumental tracks, but confidence is higher as the value approaches 1.0.
- **Liveness**: Detects the presence of an audience in the recording. Higher liveness values represent an increased probability that the track was performed live. A value above 0.8 provides strong likelihood that the track is live.
- **Speechiness**: Speechiness detects the presence of spoken words in a track. The more exclusively speech-like the recording (e.g., talk show, audiobook, poetry), the closer to 1.0 the attribute value. Values above 0.66 describe tracks that are probably made entirely of spoken words. Values between 0.33 and 0.66 describe tracks that may contain both music and speech, either in sections or layered, including such cases as rap music. Values below 0.33 most likely represent music and other non-speech-like tracks.
- **Valence**: A measure from 0.0 to 1.0 describing the musical positiveness conveyed by a track. Tracks with high valence sound more positive (e.g., happy, cheerful, euphoric), while tracks with low valence sound more negative (e.g., sad, depressed, angry).
```{r}
# Resumen estadístico
summary(Bucle_full$danceability)
summary(Bucle_full$energy)
summary(Bucle_full$instrumentalness)
summary(Bucle_full$liveness)
summary(Bucle_full$speechiness)
summary(Bucle_full$valence)
```
To illustrate these concepts more clearly, next, we will provide a chart that displays these results in a more understandable manner.
```{r}
Bucle_long <- pivot_longer(Bucle_full,
cols = c(danceability,
energy,
instrumentalness,
liveness,
speechiness,
valence),
names_to = "Feature", values_to = "Value")
p <- ggplot(Bucle_long, aes(x = Feature, y = Value, fill = Feature)) +
geom_boxplot() +
theme_minimal() +
labs(title = "Distribution of Song Characteristics",
subtitle = "Comparation between Danceability, Energy, Instrumentalness, Liveness, Speechiness, and Valence",
x = "",
y = "Value") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(face = "bold", size = 20),
plot.subtitle = element_text(face = "italic", size = 14),
plot.caption = element_text(size = 10)) +
scale_fill_manual(values = c("danceability" = "#ba01ff",
"energy" = "#02fcf3",
"speechiness" = "#ff6f00",
"liveness" = "#b6ff00",
"instrumentalness" = "blue",
"valence" = "#ff0092"))
ggplotly(p)
```
In this box plot, you will be able to see how your most listened to songs are distributed, which can likely say a lot about your current mood.
Nonetheless, we believe that plotting each variable may be much more useful to graphically see if there are patterns or not. To do this, we will focus exclusively on three characteristics: Danceability, Valence, and Energy.
## Danceability
```{r}
dance <- Bucle_full |>
ggplot(aes(x = Position,
y = danceability,
color = danceability,
text = paste("Song: ",
name, "\nAuthor: ",
artist_name))) +
geom_point(size = 2, alpha = 0.7, shape = 18) +
scale_color_gradientn(colors = c("#ff2a6d",
"#fd8c25",
"#faf834",
"#48f23a",
"#31faff",
"#4250f2"),
values = scales::rescale(c(0, 1)),
limits = c(0, 1),
breaks = c(0, 0.25, 0.5, 0.75, 1),
labels = c("Static",
"0.25",
"0.5",
"0.75",
"Highly Danceable"),
name = "") +
expand_limits(y = c(0.25, 1)) +
labs(
title = "Let's Dance!",
x = "Position in Your Rank",
y = "Danceability",
subtitle = "Dive into the rhythm of the Top songs and their danceability"
) +
theme_light(base_size = 14) +
theme(
plot.title = element_text(size = 22, face = "bold"),
plot.subtitle = element_text(size = 18),
legend.position = "right",
text = element_text(family = "Helvetica"),
legend.title = element_text(family = "Helvetica", face = "bold"),
legend.text = element_text(family = "Helvetica")
) +
guides(color = guide_colourbar(title.position = "top", title.hjust = 0.5))
dance_plotly <- ggplotly(dance, tooltip = c("text", "color")) |>
layout(
annotations = list(
text = "Feel the beat! The color intensity mirrors each song's danceability.",
x = 0.5,
y = 1.1,
xref = "paper",
yref = "paper",
showarrow = FALSE,
align = "center",
font = list(size = 14)
),
margin = list(t = 100)
)
dance_plotly
```
In this chart, we can see the distribution of songs, with those closest to the x-axis being the least danceable and those further away being more danceable. If we hover over the diamond shape, we can see the song and its artist.
## Valence
```{r}
valence <- Bucle_full |>
ggplot(aes(x = Position,
y = valence,
color = valence,
text = paste("Song: ",
name, "\nAuthor: ",
artist_name))) +
geom_point(size = 2, alpha = 0.7, shape = 18) +
scale_color_gradientn(colors = c("#ff2a6d",
"#fd8c25",
"#faf834",
"#48f23a",
"#31faff",
"#4250f2"),
values = scales::rescale(c(0, 1)),
limits = c(0, 1),
breaks = c(0, 0.25, 0.5, 0.75, 1),
labels = c("Melancholy",
"0.25", "0.5",
"0.75", "Joy"),
name = "") +
expand_limits(y = c(0.25, 1)) +
labs(
title = "The Spectrum of Emotions",
x = "Position in Your Rank",
y = "Valence",
subtitle = "Explore the emotional breadth of the Top songs through their valence"
) +
theme_light(base_size = 14) +
theme(
plot.title = element_text(size = 22, face = "bold"),
plot.subtitle = element_text(size = 18),
legend.position = "right",
text = element_text(family = "Helvetica"),
legend.title = element_text(family = "Helvetica", face = "bold"),
legend.text = element_text(family = "Helvetica")
) +
guides(color = guide_colourbar(title.position = "top", title.hjust = 0.5))
valence_plotly <- ggplotly(valence, tooltip = c("text", "color")) |>
layout(
annotations = list(
text = "Embrace the mood! Color intensity reflects each song's valence, from melancholy to joy.",
x = 0.5,
y = 1.1,
xref = "paper",
yref = "paper",
showarrow = FALSE,
align = "center",
font = list(size = 14)
),
margin = list(t = 100)
)
dance_plotly
htmlwidgets::saveWidget(dance_plotly, "tu_grafico.html")
```
In this graph you can see the state of your emotions.
## Energy
```{r}
energy <- Bucle_full |>
ggplot(aes(x = Position,
y = energy,
color = energy,
text = paste("Song: ", name, "\nAuthor: ",
artist_name))) +
geom_point(size = 2, alpha = 0.7, shape = 18) +
scale_color_gradientn(colors = c("#ff2a6d",
"#fd8c25",
"#faf834",
"#48f23a",
"#31faff",
"#4250f2"),
values = scales::rescale(c(0, 1)),
limits = c(0, 1),
breaks = c(0, 0.25, 0.5, 0.75, 1),
labels = c("Low Energy",
"0.25", "0.5",
"0.75", "High Energy"),
name = "") + # Nombre de la leyenda
expand_limits(y = c(0.25, 1)) +
labs(
title = "The Energy Spectrum",
x = "Position in Your Rank",
y = "Energy",
subtitle = "Explore the intensity of the Top songs through their energy"
) +
theme_light(base_size = 14) +
theme(
plot.title = element_text(size = 22, face = "bold"),
plot.subtitle = element_text(size = 18),
legend.position = "right",
text = element_text(family = "Helvetica"),
legend.title = element_text(family = "Helvetica", face = "bold"),
legend.text = element_text(family = "Helvetica")
) +
guides(color = guide_colourbar(title.position = "top", title.hjust = 0.5))
energy_plotly <- ggplotly(energy, tooltip = c("text", "color")) |>
layout(
annotations = list(
text = "Feel the energy! Color intensity reflects each song's energy level, from low to high.",
x = 0.5,
y = 1.1,
xref = "paper",
yref = "paper",
showarrow = FALSE,
align = "center",
font = list(size = 14)
),
margin = list(t = 100)
)
energy_plotly
```
In this graph we can see how energising the music you listen to is.
## Comparison with the TOP 50.
I hope your sentiment analysis accurately reflects your current emotions, but most importantly, that you are happy. However, we want to go a bit further. To observe more clearly how we are feeling, we must compare ourselves with others. For this purpose, we will use the Top 50 Playlist. These playlists, which I would say are among the most famous on Spotify, allow us to see which songs are the most listened to at the moment. Therefore, if we perform a sentiment analysis of this playlist, it will show us how the majority of the population is feeling, so by comparing it with this list of music, we can observe the differences that our analysis presents compared to that of the majority of the population.
We'll use the Top 50 songs in Spain as a reference. We'll repeat the same steps we've followed previously, so we will omit the explanations on how to extract the information.
```{r}
top50 <- "37i9dQZEVXbNFJfN1Vw8d9"
top50españa <- req |>
req_url_path_append(paste("playlists", top50, sep = "/")) |>
req_perform() |>
resp_body_json(simplifyVector = T)
top50españa <- top50españa$tracks$items |>
select(track) |>
unnest() |>
select(c(artists, explicit, duration_ms, name, id, popularity))
espsongs <- top50españa$id
```
```{r}
top50espsongs <- data.frame()
for(i in 1:length(unique(espsongs))) {
top50song <- req |>
req_url_path_append(paste("audio-features", espsongs[i], sep = "/")) |>
req_perform() |>
resp_body_json(simplifyVector = TRUE) |>
as.tibble()
3
top50espsongs <- rbind(top50song, top50espsongs)
}
top50espsongs
```
We already have the sentiments of the top 50 songs in Spain at the moment. However, the table does not show the names of the songs. Let's show who each song belongs to.
```{r}
top50españa <- top50españa |>
select(c(name, id))
top50espdfull <- top50españa |>
full_join(top50espsongs, by = "id") |>
mutate(Position = row_number(),
playlist = "TOP 50") |>
select(c(name, Position, danceability, energy, key, loudness, speechiness, acousticness, instrumentalness, liveness, valence, tempo, playlist))
top50espdfull
```
We make both dataframes the same format and join them together.
```{r}
Bucle_full <- Bucle_full |>
select(c(name, Position, danceability, energy, key, loudness, speechiness, acousticness, instrumentalness, liveness, valence, tempo)) |>
mutate(playlist = "En Bucle")
top50espdfull <- top50espdfull |>
filter( Position <= 30)
final_df_songs <- rbind(Bucle_full,top50espdfull)
final_df_songs
```
## Let's plot the difference
In this section, we will compare the results obtained from the previous charts. To do this, we will display density graphs of the different sentiments:
```{r}
dance <- ggplot(final_df_songs, aes(x = danceability,
fill = playlist,
text = paste("Playlist:", playlist))) +
geom_density(alpha = 0.7, color = NA) +
scale_fill_manual(values = c("#40E0D0", "#FF00FF")) +
labs(
title = "Who Dances More?",
subtitle = "Exploring Danceability Across Different Playlists",
x = "Danceability",
y = "Density",
fill = "Playlist"
) +
xlim(c(0.25, 1)) +
theme_minimal(base_family = "Helvetica") +
theme(
legend.title = element_text(size = 12, color = "#333333"),
plot.title = element_text(size = 20, face = "bold", color = "#333333"),
plot.subtitle = element_text(size = 16, color = "#555555"),
legend.position = "right",
legend.text = element_text(size = 12, color = "#333333"),
text = element_text(size = 12, color = "#333333")
)
library(plotly)
dance_plotly <- ggplotly(dance, tooltip = c("text")) |>
layout(
title = "In-depth Danceability Analysis Across Playlists",
annotations = list(
text = "A comparative visualization of song danceability in various playlists.",
x = 0.5,
y = 1.85,
xref = "paper",
yref = "paper",
showarrow = FALSE,
align = "center",
font = list(size = 14, color = "#444444")
),
margin = list(t = 100)
)
dance_plotly
```
```{r}
valence <- ggplot(final_df_songs, aes(x = valence,
fill = playlist,
text = paste("Playlist:", playlist))) +
geom_density(alpha = 0.7, color = NA) +
scale_fill_manual(values = c("#40E0D0", "#FF00FF")) +
labs(
title = "Who Dances More?",
subtitle = "Exploring Valence Across Different Playlists",
x = "Valence",
y = "Density",
fill = "Playlist"
) +
xlim(c(0, 1)) +
theme_minimal(base_family = "Helvetica") +
theme(
legend.title = element_text(size = 12, color = "#333333"),
plot.title = element_text(size = 20, face = "bold", color = "#333333"),
plot.subtitle = element_text(size = 16, color = "#555555"),
legend.position = "right",
legend.text = element_text(size = 12, color = "#333333"),
text = element_text(size = 12, color = "#333333")
)
valence_plotly <- ggplotly(valence, tooltip = c("text")) |>
layout(
title = "In-depth Valence Analysis Across Playlists",
annotations = list(
text = "A comparative visualization of song danceability in various playlists.",
x = 0.5,
y = 1.85,
xref = "paper",
yref = "paper",
showarrow = FALSE,
align = "center",
font = list(size = 14, color = "#444444")
),
margin = list(t = 100)
)
valence_plotly
```
```{r}
energy <- ggplot(final_df_songs, aes(x = energy,
fill = playlist,
text = paste("Playlist:", playlist))) +
geom_density(alpha = 0.7, color = NA) +
scale_fill_manual(values = c("#40E0D0", "#FF00FF")) +
labs(
title = "Who Moves More?",
subtitle = "Exploring Energy Across Different Playlists",
x = "Valence",
y = "Density",
fill = "Playlist"
) +
xlim(c(0, 1)) +
theme_minimal(base_family = "Helvetica") +
theme(
legend.title = element_text(size = 12, color = "#333333"),
plot.title = element_text(size = 20, face = "bold", color = "#333333"),
plot.subtitle = element_text(size = 16, color = "#555555"),
legend.position = "right",
legend.text = element_text(size = 12, color = "#333333"),
text = element_text(size = 12, color = "#333333")
)
energy_plotly <- ggplotly(energy, tooltip = c("text")) |>
layout(
title = "In-depth Energy Analysis Across Playlists",
annotations = list(
text = "A comparative visualization of song danceability in various playlists.",
x = 0.5,
y = 1.85,
xref = "paper",
yref = "paper",
showarrow = FALSE,
align = "center",
font = list(size = 14, color = "#444444")
),
margin = list(t = 100)
)
energy_plotly
```
These charts offer a detailed comparison between your personal sentiments, as reflected by your music choices, and those prevalent within the broader society. By analyzing the density of specific emotional qualities in your favorite songs—such as danceability, energy, and valence—and comparing them with the general trends observed in popular music, we can gain insights into how your emotional state aligns with or diverges from the societal norm. This analysis not only sheds light on your individual preferences and mood but also provides a context for understanding how these preferences fit into the wider emotional landscape of the current society.
## Significant differences
To deepen our understanding beyond visual comparisons, we will undertake a statistical analysis to ascertain whether the emotional indicators derived from our BUCLE playlist significantly diverge from those of the TOP 50 playlist, representing the broader listening preferences. This involves comparing metrics such as danceability, energy, and valence between the two sets, employing statistical tests to identify meaningful differences. Through this analysis, we aim to uncover not just the surface-level preferences, but also to gain insights into the nuanced ways our personal music choices might mirror or contrast with the prevailing trends in society's musical mood and preferences. This step is crucial for providing a more objective and quantifiable perspective on how individual sentiment aligns with or deviates from collective trends.
```{r}
# ANOVA para 'danceability'
anova_danceability <- aov(danceability ~ playlist, data = final_df_songs)
summary(anova_danceability)
# ANOVA para 'energy'
anova_energy <- aov(energy ~ playlist, data = final_df_songs)
summary(anova_energy)
# ANOVA para 'valence'
anova_valence <- aov(valence ~ playlist, data = final_df_songs)
summary(anova_valence)
```
Next, we will represent the confidence intervals to more easily determine if both playlists contain significant differences.
```{r}
if(!require(emmeans)) install.packages("emmeans")
library(emmeans)
library(ggplot2)
emmeans_danceability <- emmeans(anova_danceability, ~ playlist)
df_danceability <- summary(emmeans_danceability) |> as.data.frame()
emmeans_energy <- emmeans(anova_energy, ~ playlist)
df_energy <- summary(emmeans_energy) |> as.data.frame()
emmeans_valence <- emmeans(anova_valence, ~ playlist)
df_valence <- summary(emmeans_valence) |> as.data.frame()
plot_emmeans <- function(df, title) {
ggplot(df, aes(x = playlist, y = emmean, group = playlist)) +
geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.1, color = "#7bcb13") +
geom_point(size = 4, color = "#ff6f00") +
labs(title = title, x = "Playlist", y = "Estimated Mean") +
theme_minimal()
}
library(plotly)
p_danceability <- plot_emmeans(df_danceability, "Danceability by Playlist")
ggplotly(p_danceability)
p_energy <- plot_emmeans(df_energy, "Energy by Playlist")
ggplotly(p_energy)
p_valence <- plot_emmeans(df_valence, "Valence by Playlist")
ggplotly(p_valence)
```
To determine if there are significant differences between groups by observing confidence intervals (CIs), simply check if these intervals overlap with each other. If the CIs of two groups do not overlap, it indicates a statistically significant difference between the groups, suggesting that a true difference likely exists in the measure you are analyzing.
# Playlist Wrapper
To go one step further with this sentiment analysis, we are going to see how our moods have evolved over the last few years. To do this, we will make use of the Wrappers.
Spotify Wrapper is a marketing campaign that Spotify has been running since 2016, where at the end of each year the platform provides you with a compilation of data about your activity on the platform. Basically, it gives you a summary of your musical tastes, your most listened songs, artists and genres throughout the year. In addition, it gives you a playlist compiling your most listened to songs that year.
To make the evolutionary analysis of your feelings we will make use of those playlists generated by Spotify and personalised for you, called "Your top songs".
Here are all my "Yor top songs" from 2016 to 2024, which are the playlists we will work with to see how my feelings have evolved during all these years:
mytop_2016 <https://open.spotify.com/playlist/2DwzHuKl80qtRmO5A8Kc5W>
mytop_2017 <https://open.spotify.com/playlist/5dQS4LIObrlUQEcW4MbW7C>
mytop_2018 <https://open.spotify.com/playlist/2W8VlGXKkonQJTGzTc7GCW>
mytop_2019 <https://open.spotify.com/playlist/53zrJXPjANeI1yEhC6YCIu>
mytop_2020 <https://open.spotify.com/playlist/5ligD03AYrWyPtEmkq7ajk>
mytop_2021 <https://open.spotify.com/playlist/3KOuUyW64dmoExlfpCO5zS>
mytop_2022 <https://open.spotify.com/playlist/4bPc0DpsW7qGltDIp1gPZ1>
mytop_2023 [https://open.spotify.com/playlist/5j93ZBC2GpG4xxMXPouONo](#0){.uri}[ ](#0)
mytop_2024 <https://open.spotify.com/playlist/0mXUI84PW5dNoBFVwy3itd>
## Evolutionary sentiment analysis
The procedure for taking sentiment measurements from each of the playlists for each year will be the same as we have followed previously for the "En bucle" Playlist. So, first of all, we will take the endspoints from each of our yearly Playlists.
```{r}
mytop_2016 <- "2DwzHuKl80qtRmO5A8Kc5W"
mytop_2017<- "5dQS4LIObrlUQEcW4MbW7C"
mytop_2018 <- "2W8VlGXKkonQJTGzTc7GCW"
mytop_2019 <- "53zrJXPjANeI1yEhC6YCIu"
mytop_2020 <- "5ligD03AYrWyPtEmkq7ajk"
mytop_2021 <- "3KOuUyW64dmoExlfpCO5zS"
mytop_2022 <- "4bPc0DpsW7qGltDIp1gPZ1"
mytop_2023 <- "5j93ZBC2GpG4xxMXPouONo"
mytop_2024 <- "0mXUI84PW5dNoBFVwy3itd"
```
The next step will be to go year by year extracting the information from the playlist in data frame format, to then extract the metrics of the feelings of each of the songs that make up our playlists. This way we will understand better what we are doing with each playlist, being able to go step by step, although it is a longer and more tedious procedure, it is simpler and we will be able to see in detail how each of our years have been in Spotify. Also, this way we won't be overloading both our computer and the Spotify API.
Once we have all the sentiment metrics for each of the songs of our year, we will average each of the metrics for that year to get a summary of the yearly sentiment.
### Your top songs from 2016 to 2024
**2016**
```{r}
mytop_2016 <- req |>
req_url_path_append(paste("playlists", mytop_2016, sep = "/")) |>
req_perform() |>
resp_body_json(simplifyVector = TRUE)
mytop_2016 <- mytop_2016$track$items |>
select(track) |>
unnest(track) |>
select(c(artists,album, name, id, explicit, duration_ms, popularity, href))
mytop_2016 <- mytop_2016 |>
mutate(my_year = "2016")
songs2016 <- mytop_2016$id
mytop2016_filtered <- mytop_2016 |>
unnest(artists, names_sep = "_") |>
select(artist_name = artists_name, name) |>
distinct(name, .keep_all = TRUE) |>
group_by(name, artist_name)
mytop2016_1 <- left_join(mytop2016_filtered, mytop_2016, by = "name")
mytop2016_filtered_2 <- mytop_2016 |>
unnest(album, names_sep = "_") |>
select(album_name = album_name, name) |>
distinct(name, .keep_all = TRUE) |>
group_by(name, album_name)
mytop2016_2 <- left_join(mytop2016_filtered_2, mytop2016_1, by = "name")
mytop2016_final <- mytop2016_2 |>
select(!c(album, artists, href, explicit, duration_ms))
mytop2016_feelings <- data.frame()
for(i in 1:length(unique(songs2016))) {
mysongs2016 <- req |>
req_url_path_append(paste("audio-features", unique(songs2016)[i], sep = "/")) |>
req_perform() |>
resp_body_json(simplifyVector = TRUE) |>
as.tibble()
mytop2016_feelings <- rbind(mysongs2016, mytop2016_feelings)
}
my2016_full <- mytop2016_final |>
full_join(mytop2016_feelings, by = "id") |>
select(name, album_name, everything()) |>
mutate(duration = duration_ms / 1000) |>
select(-duration_ms)
my2016_full <- my2016_full |>
ungroup() |>
mutate(Position = row_number())
my2016_full <- my2016_full |>
mutate(your_year = "2016")
my2016_mean <- my2016_full|>
group_by(your_year) |>
summarize(across(where(is.numeric), mean))
my2016_mean
```
**2017**
```{r}
mytop_2017 <- req |>
req_url_path_append(paste("playlists", mytop_2017, sep = "/")) |>
req_perform() |>
resp_body_json(simplifyVector = TRUE)
mytop_2017 <- mytop_2017$track$items |>
select(track) |>
unnest(track) |>
select(c(artists,album, name, id, explicit, duration_ms, popularity, href))
mytop_2017 <- mytop_2017 |>
mutate(my_year = "2017")
songs2017 <- mytop_2017$id
mytop2017_filtered <- mytop_2017 |>
unnest(artists, names_sep = "_") |>
select(artist_name = artists_name, name) |>
distinct(name, .keep_all = TRUE) |>
group_by(name, artist_name)
mytop2017_1 <- left_join(mytop2017_filtered, mytop_2017, by = "name")
mytop2017_filtered_2 <- mytop_2017 |>
unnest(album, names_sep = "_") |>
select(album_name = album_name, name) |>
distinct(name, .keep_all = TRUE) |>
group_by(name, album_name)
mytop2017_2 <- left_join(mytop2017_filtered_2, mytop2017_1, by = "name")
mytop2017_final <- mytop2017_2 |>
select(!c(album, artists, href, explicit, duration_ms))
mytop2017_feelings <- data.frame()
for(i in 1:length(unique(songs2017))) {
mysongs2017 <- req |>
req_url_path_append(paste("audio-features", unique(songs2017)[i], sep = "/")) |>
req_perform() |>
resp_body_json(simplifyVector = TRUE) |>
as.tibble()
mytop2017_feelings <- rbind(mysongs2017, mytop2017_feelings)
}
my2017_full <- mytop2017_final |>
full_join(mytop2017_feelings, by = "id") |>
select(name, album_name, everything()) |>
mutate(duration = duration_ms / 1000) |>
select(-duration_ms)
my2017_full <- my2017_full |>
ungroup() |>
mutate(Position = row_number())