-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcp.Rmd
196 lines (119 loc) · 4.23 KB
/
cp.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
---
title: "Changepoint analysis"
output:
html_document:
toc: yes
html_notebook: default
bibliography: changepoint.bib
---
```{r, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, cache = TRUE, warning = FALSE)
```
## What is changepoint analysis
Changepoint analysis is...
More details here...<https://en.wikipedia.org/wiki/Change_detection>, <http://www.variation.com/cpa/tech/changepoint.html>
In R with `changepoint` package[@Killick2013]
```{r, message=FALSE, warning=FALSE}
if(!require("changepoint"))install.packages("changepoint")
library(changepoint)
## using the example given
set.seed(1) ## random start point
x <- c(rnorm(100, 0, 1), rnorm(100, 0, 10)) ## some random data
test <- cpt.var(x) ##look for changes in variance
plot(test) ## plot
cp1 <- test@cpts ## locate changepoints
cp1
```
## Real data 1. Under 18 conceptions
```{r, message=FALSE, warning=FALSE}
library(fingertipsR)
library(dplyr)
if(!require("ggfortify"))install.packages("ggfortify")
library(ggfortify)
inds <- indicators(ProfileID = 19) ## get details of PHOF indicators
teen <- inds %>% filter(stringr::str_detect(IndicatorName, "conceptions")) ## identify the ID for conceptions data
data <- fingertips_data(IndicatorID = 20401 ) ## download data
data %>%
filter(AreaName == "England", Category == "") %>%
select(Timeperiod, Value) %>%
mutate_all(funs(as.numeric)) -> test1 ## prepare data for cp analysis
rownames(test1) <- test1$Timeperiod
cp <- cpt.mean(test1[,2], method = "PELT") ## run cp on mean values/ choose PELT method
cps <- cp@cpts ## dates of change points
test1[cps,] ## filter dataset to identify cp locations
autoplot(cp) + labs(y = "rate", x = "date") ## plot
```
## Real data 2. Monthly prescribing
```{r}
## Use Indicator 92511 - Monthly prescribing data
ind_ab <- fingertips_data(IndicatorID = 92511, AreaTypeID = 19 ) %>%
filter(AreaName == "England") %>%
select(Timeperiod, Value) %>%
mutate_if(is.factor, as.character) %>%
mutate(date = seq_along(Timeperiod))
rownames(ind_ab) <- ind_ab$Timeperiod
## Create time series starting with Jan 2011 by month
ind_ab_ts <- ts(ind_ab$Value, start = c(2011, 1), frequency = 12, names = ind_ab$Timeperiod)
qplot(data=ind_ab, x = date, Value, geom = c("line", "point"), group = 1) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
cp_pres <- cpt.mean(ind_ab_ts)
plot(cp_pres)
cp_1 <- cp_pres@cpts ## dates of change points
ind_ab[cp_1,] ## filter dataset to identify cp locations
autoplot(cp_pres) + labs(y = "rate", x = "date") + geom_line(aes(y = ind_ab$Value)) ## plot
class(ind_ab_ts)
```
```{r}
## try Bayesian version
if(!require("bcp"))install.packages("bcp")
library(bcp)
bcp1 <- bcp(ind_ab$Value, return.mcmc = TRUE)
residuals(bcp1)
plot(bcp1)
```
## Daily data: Fingertips hits
```{r}
library(RGA)
ga_token <- authorize(client.id = "xxxxxxxxxxxxxxxxxxxxxxx.apps.googleusercontent.com", client.secret = "xxxxxxxxxxxxxxxx", cache = TRUE)
## extract profile ids, website urls and site names
lookup <- list_profiles() %>%
select(name, id, websiteUrl)
## select Fingertips id
lookup %>%
filter(stringr::str_detect(name, "Finger")) %>%
select(id)
## download page view data
df <- data.frame()
for(i in 1:length(id)) {
id<-"78054916"
first <- firstdate(id)
ga <- get_ga(id, start.date = first, end.date = "today",
metrics = "ga:users,
ga:sessions,
ga:avgTimeonPage,
ga:pageviews",
dimension = "ga:date" )
ga <- cbind(id, ga)
df <- rbind(df, ga)
}
## Lets just plot
qplot(data = df, date, pageviews, geom = c("line", "smooth")) +
labs(title = "Fingertips daily page views")
```
#### Changepoint analysis
```{r}
df_ts <- ts(df$users, name = df$date, start = c(2013, 330), frequency = 365.25)
cp_df_mean <- cpt.mean(df_ts, method = "BinSeg", Q = 10)
cp_df_var <- cpt.var(df_ts, method = "PELT", Q = 5)
autoplot(cp_df_mean)
cpdf <- cp_df_mean@cpts
df[cpdf, ] %>%
ggplot(aes(date, pageviews)) +
geom_line() +
geom_point() +
labs(title = "Changepoints for Fingertips")
```
The analysis shows that:
* Page views significantly increased every year from
+ an average 450 a day from Oct 2014 to March 2016
+ 2600 on average since May 2016
## References