-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalysing_script.R
148 lines (114 loc) · 3.53 KB
/
analysing_script.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
library(tidyverse)
library(ggdist)
library(patchwork)
# Combining ---------------------------------------------------------------
playlist_2016 <- read_csv(here::here("data", "clean_2016_playlist.csv"))
playlist_2017 <- read_csv(here::here("data", "clean_2017_playlist.csv"))
playlist_2018 <- read_csv(here::here("data", "clean_2018_playlist.csv"))
playlist_2019 <- read_csv(here::here("data", "clean_2019_playlist.csv"))
playlist_2020 <- read_csv(here::here("data", "clean_2020_playlist.csv"))
bad_features <- c("type", "id", "uri", "track_href", "analysis_url")
all_playlists <-
playlist_2020 |>
bind_rows(
playlist_2019,
playlist_2018,
playlist_2017,
playlist_2016
) |>
filter(!(feature %in% bad_features)) |>
mutate(score = as.numeric(score)) |>
mutate(playlist = as.factor(playlist))
wider_all_playlists <-
all_playlists |>
pivot_wider(
names_from = feature,
values_from = score
)
# Analysis ----------------------------------------------------------------
# Looking at the difference in valence 2018 vs 2020
wider_all_playlists |>
ggplot() +
aes(x = valence, fill = playlist) +
geom_density(alpha = 0.5, position = "identity")
# Separating things
wide_2020 <- wider_all_playlists |> filter(playlist == "2020")
wide_2019 <- wider_all_playlists |> filter(playlist == "2019")
wide_2018 <- wider_all_playlists |> filter(playlist == "2018")
wide_2017 <- wider_all_playlists |> filter(playlist == "2017")
wide_2016 <- wider_all_playlists |> filter(playlist == "2016")
# T Test
t.test(x = wide_2020$valence, y = wide_2018$valence)
means <-
wider_all_playlists |>
group_by(playlist) |>
summarize(
across(
danceability:duration_ms,
~ mean(.x, na.rm = T)
)
)
audio_feature_graph <- function(data, audio_feature) {
# For passing in columns into function
feature <- sym(audio_feature)
plot <- data |>
ggplot() +
aes(x = !!feature, y = playlist, color = playlist, fill = playlist)
plot <-
plot +
stat_slab(
size = .5,
alpha = .2
) +
stat_halfeye(fill = "transparent")
plot <-
plot +
theme_minimal() +
labs(y = "") +
theme(
legend.position = "none",
plot.title = element_text(hjust = 0.5)
)
return(plot)
}
# colnames(wider_all_playlists) |>
# datapasta::vector_paste()
all_audio_features <- c(
"danceability", "energy", "loudness", "speechiness",
"acousticness", "liveness",
"valence", "tempo", "duration_ms"
)
all_plots <- all_audio_features |>
map(~ audio_feature_graph(wider_all_playlists, .x))
all_plots <-
wrap_plots(all_plots) +
plot_annotation(
title = "Yearly Playlist Summary by Audio Feature",
subtitle = "Distribution of Audio Feature score by yearly 'Your top 100 songs 20XX'",
caption = "Data: My account via Spotify's API"
)
all_plots
# Correlation analysis ----------------------------------------------------
library(correlation)
library(ggraph)
results <- correlation(
wider_all_playlists,
select = all_audio_features
)
ggm_graph <- results |>
plot() +
geom_node_text(label = all_audio_features, color = "black") +
labs(
title = "Gaussian Graphical Model of Audio Features",
caption = "Data: My account via Spotify's API"
)
corr_matrix <- results |>
summary(redundant = TRUE) |>
plot() +
labs(
title = "Correlation Matrix of Audio Features",
caption = "Data: My account via Spotify's API"
)
ggsave("yearly_playlist_summary.png", all_plots)
ggsave("GGM_audio_features.png", ggm_graph)
ggsave("correlation_matrix.png", corr_matrix, width = 12, height = 7)