-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathreplication_rates.rmd
94 lines (77 loc) · 3.02 KB
/
replication_rates.rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
---
title: "Replication rates"
output: html_notebook
---
# Replication rates
```{r}
library(mbtools)
rates <- fread("data/rates.csv")
manifest <- fread("data/manifest.csv")
rates <- rates[manifest, on = c(id = "vendor_observation_id"), allow.cartesian = T]
taxonomy <- fread("data/contigs.classification.names.txt", sep="\t", na.strings=c("NA", "not classified", ""), fill=T)
for (col in c("superkingdom", "phylum", "class", "order", "family", "genus", "species")) {
taxonomy[[col]] <- gsub(":.+$", "", taxonomy[[col]])
}
names(taxonomy)[1] <- "contig"
rates <- taxonomy[rates, on="contig"]
head(rates)
```
```{r, fig.width=3.5, fig.height=4}
theme_set(theme_minimal())
ggplot(rates, aes(x = subset, y = rate, color = phylum, group = subset)) +
scale_y_log10() +
geom_jitter(width=0.3) +
labs(x = "", y = "replication rate [a.u.]") + theme(axis.text.x = element_text(angle=45, hjust=1, vjust=1))
```
```{r, fig.width=8, fig.height=1.5}
pj <- position_jitter(width=0.3)
ggplot(rates, aes(x = subset, y = rate, color = phylum, group=subset)) +
scale_y_log10() +
geom_point(position=pj, size=1) + facet_wrap(~ phylum, nrow=1) +
stat_summary(fun.y = "mean", geom = "point", pch=23, stroke = 1, size=2, color="black") +
coord_flip() +
labs(x = "", y = "replication rate [a.u.]") + guides(color = FALSE)
ggsave("figures/replication.png", width=8, height=1.5, dpi=300)
```
```{r}
for (p in rates[!is.na(phylum), unique(phylum)]) {
if (rates[phylum == p, uniqueN(subset) == 2]) {
print(p)
glm(log(rate) ~ age + bmi + subset, data=rates[phylum == p]) %>% summary() %>% print()
}
}
print("NA")
glm(log(rate) ~ age + bmi + subset, data=rates[is.na(phylum)]) %>% summary()
print("all")
glm(log(rate) ~ age + bmi + subset, data=rates) %>% summary()
```
```{r}
library(arivale.data.interface)
gastro <- get_snapshot("assessments_digestive_health", clean=T)
gastro[, public_client_id := as.integer(public_client_id)]
merged <- gastro[, .(public_client_id, assessment_digestion_bowel_movements_enum)][rates, on="public_client_id"]
merged[, "bowel_movement" := assessment_digestion_bowel_movements_enum]
```
```{r}
ggplot(merged, aes(x=bowel_movement, y=rate, color=phylum, group=bowel_movement)) +
geom_jitter(width=0.2) + scale_y_log10() +
geom_boxplot(color="black", width=0.2, outlier.color=NA)
glm(log(rate) ~ bowel_movement + sex + age, data=unique(merged[, .(rate, bowel_movement, sex, age)]), family="gaussian") %>% summary()
```
```{r}
glm(factor(subset) ~ bowel_movement + sex + age, data=unique(merged[, .(public_client_id, bowel_movement, sex, age, subset)]), family="binomial") %>% summary()
```
# Cohort composition
```{r}
library(mbtools)
genera <- fread("data/G_counts.csv")
mat <- dcast(genera, `sample` ~ genus, value.var="reads")
ids <- mat[, `sample`]
mat <- as.matrix(mat[, "genus" := NULL])
rownames(mat) <- ids
taxa <- matrix(colnames(mat), ncol=1)
colnames(taxa) <- "genus"
rownames(taxa) <- taxa[,1]
ps <- phyloseq(otu_table(mat, taxa_are_rows = F), tax_table(taxa))
plot_taxa(ps, "genus")
```