-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy path10x_Amyg-n5_step02_clust-annot_MNT.R
executable file
·648 lines (521 loc) · 30.7 KB
/
10x_Amyg-n5_step02_clust-annot_MNT.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
### MNT 10x snRNA-seq workflow: step 02
### **Region-specific analyses**
### - (2x) amygdala samples from: Br5161 & Br5212
### Initiated MNT 29Jan2020
### MNT 21Apr2021: add expansion samples (n=3, incl'g 2 female)
#####################################################################
library(SingleCellExperiment)
library(EnsDb.Hsapiens.v86)
library(scater)
library(scran)
library(batchelor)
library(DropletUtils)
library(jaffelab)
library(dendextend)
library(dynamicTreeCut)
source("plotExpressionCustom.R")
### Palette taken from `scater`
tableau10medium = c("#729ECE", "#FF9E4A", "#67BF5C", "#ED665D",
"#AD8BC9", "#A8786E", "#ED97CA", "#A2A2A2",
"#CDCC5D", "#6DCCDA")
tableau20 = c("#1F77B4", "#AEC7E8", "#FF7F0E", "#FFBB78", "#2CA02C",
"#98DF8A", "#D62728", "#FF9896", "#9467BD", "#C5B0D5",
"#8C564B", "#C49C94", "#E377C2", "#F7B6D2", "#7F7F7F",
"#C7C7C7", "#BCBD22", "#DBDB8D", "#17BECF", "#9EDAE5")
# ===
# Load 'pilot' samples
load("/dcs04/lieber/marmaypag/Tran_LIBD001/Matt/MNT_thesis/snRNAseq/10x_pilot_FINAL/rdas/revision/all-FACS-n14_preprint_SCEs_processing-QC_MNTMar2021.rda",
verbose=T)
# pilot.data, pilot.data.unfiltered, e.out, ref.sampleInfo
rm(pilot.data.unfiltered, e.out)
# Load 2021 expansion set
load("/dcs04/lieber/marmaypag/Tran_LIBD001/Matt/MNT_thesis/snRNAseq/10x_pilot_FINAL/rdas/revision/all-FACS-n10_2021rev_SCEs_processing-QC_MNTMar2021.rda", verbose=T)
# pilot.data.2, pilot.data.2.unfiltered, e.out.2, ref.sampleInfo.rev
rm(pilot.data.2.unfiltered, e.out.2)
### MNT comment: At this point, each sample (which is a SCE object in the list, 'pilot.data') has been
# QC'd for cell/nucleus calling ('emptyDrops()' test) and mito rate thresholding
# Additionally, there have been a computed 'doubletScore', which will QC with, after
# clustering (e.g. that there are no doublet-driven clusters, etc.)
### Merging shared-region samples ============================================
# Newest iterations for normalization: multiBatchNorm-alize
# Order as will do for `fastMNN()` (this shouldn't matter here)
sce.amy <- cbind(pilot.data[["br5161.amy"]], pilot.data[["br5212.amy"]],
pilot.data.2[["br5701.amy"]],
pilot.data.2[["br5276.amy.neun"]], pilot.data.2[["br5400.amy.neun"]]
)
sce.amy
#class: SingleCellExperiment
# dim: 33538 15177
# metadata(5): Samples Samples Samples Samples Samples
# assays(1): counts
# rownames(33538): MIR1302-2HG FAM138A ... AC213203.1 FAM231C
# rowData names(6): gene_id gene_version ... gene_biotype Symbol.uniq
# colnames(15177): AAACCCAAGCACGATG-1 AAACCCACAGCGGTCT-1 ...
# TTTGGTTGTTGTTTGG-1 TTTGGTTTCAGACCGC-1
# colData names(16): Sample Barcode ... protocol sequencer
# reducedDimNames(0):
# altExpNames(0):
# Use `multiBatchNorm()` to compute log-normalized counts, matching the scaling across samples
sce.amy <- multiBatchNorm(sce.amy, batch=sce.amy$sampleID)
# Use the simple `modelGeneVar` - this makes more sense over `combineVar`, since the
# cell composition is already known to be quite different (with NeuN selection)
geneVar.amy <- modelGeneVar(sce.amy)
chosen.hvgs.amy <- geneVar.amy$bio > 0
sum(chosen.hvgs.amy)
# [1] 10109
### Dimensionality reduction ================================================================
# Run `fastMNN` (internally uses `multiBatchPCA`), taking top 100 (instead of default 50 PCs)
set.seed(109)
mnn.hold <- fastMNN(sce.amy, batch=sce.amy$sampleID,
merge.order=c("br5161.amy","br5212.amy","br5701.amy",
"br5276.amy.neun","br5400.amy.neun"),
subset.row=chosen.hvgs.amy, d=100,
correct.all=TRUE, get.variance=TRUE,
BSPARAM=BiocSingular::IrlbaParam())
# This temp file just used for getting batch-corrected components (drops a variety of entries)
table(colnames(mnn.hold) == colnames(sce.amy)) # all TRUE
table(mnn.hold$batch == sce.amy$sampleID) # all TRUE
# Add them to the SCE, as well as the metadata (though the latter might not be so usefl)
reducedDim(sce.amy, "PCA_corrected") <- reducedDim(mnn.hold, "corrected") # 100 components
metadata(sce.amy) <- metadata(mnn.hold)
# Save into a new region-specific SCE object/flie
save(sce.amy, chosen.hvgs.amy, ref.sampleInfo, ref.sampleInfo.rev,
file="rdas/revision/regionSpecific_Amyg-n5_cleaned-combined_SCE_MNT2021.rda")
## 'getClusteredPCs()' evaluated in qsub mode (with 'R-batchJob_Amyg-n2_optimalPCselxn_MNTFeb2020.R')
# --> saved into same .rda
### Picking up with optimally-defined PC space ===
load("/dcs04/lieber/marmaypag/Tran_LIBD001/Matt/MNT_thesis/snRNAseq/10x_pilot_FINAL/rdas/revision/regionSpecific_Amyg-n5_cleaned-combined_SCE_MNT2021.rda",
verbose=TRUE)
# sce.amy, chosen.hvgs.amy, pc.choice.amy, ref.sampleInfo, ref.sampleInfo.rev
# How many PCs is optimal?:
metadata(pc.choice.amy)$chosen
# [1] 87
## Assign this chosen (87 PCs) to 'PCA_opt'
reducedDim(sce.amy, "PCA_opt") <- reducedDim(sce.amy, "PCA_corrected")[ ,1:(metadata(pc.choice.amy)$chosen)]
## t-SNE
set.seed(109)
sce.amy <- runTSNE(sce.amy, dimred="PCA_opt")
## UMAP
set.seed(109)
sce.amy <- runUMAP(sce.amy, dimred="PCA_opt")
# How do these look?
plotReducedDim(sce.amy, dimred="TSNE", colour_by="sampleID")
plotReducedDim(sce.amy, dimred="UMAP", colour_by="sampleID")
### Clustering: Two-step ======================================================
### Step 1: Perform graph-based clustering in this optimal PC space
# - take k=20 NN to build graph
snn.gr <- buildSNNGraph(sce.amy, k=20, use.dimred="PCA_opt")
clusters.k20 <- igraph::cluster_walktrap(snn.gr)$membership
table(clusters.k20)
## 59 prelim clusters
# Assign as 'prelimCluster'
sce.amy$prelimCluster <- factor(clusters.k20)
plotReducedDim(sce.amy, dimred="TSNE", colour_by="prelimCluster")
# Is sample driving this 'high-res' clustering at this level?
table(sce.amy$prelimCluster, sce.amy$sampleID) # (a little bit, but is typical)
# rbind the ref.sampleInfo[.rev]
ref.sampleInfo <- rbind(ref.sampleInfo, ref.sampleInfo.rev)
# Save for now
save(sce.amy, chosen.hvgs.amy, pc.choice.amy, ref.sampleInfo,
file="/dcs04/lieber/marmaypag/Tran_LIBD001/Matt/MNT_thesis/snRNAseq/10x_pilot_FINAL/rdas/revision/regionSpecific_Amyg-n5_cleaned-combined_SCE_MNT2021.rda")
### Step 2: Hierarchical clustering of pseudo-bulked ("PB'd") counts
# ** That is, to pseudo-bulk (aka 'cluster-bulk') on raw counts, on all [non-zero] genes,
# normalize with `librarySizeFactors()`, log2-transform, then perform HC'ing
# Preliminary cluster index for pseudo-bulking
clusIndexes = splitit(sce.amy$prelimCluster)
prelimCluster.PBcounts <- sapply(clusIndexes, function(ii){
rowSums(assays(sce.amy)$counts[ ,ii])
}
)
# Compute LSFs at this level
sizeFactors.PB.all <- librarySizeFactors(prelimCluster.PBcounts)
# Normalize with these LSFs
geneExprs.temp <- t(apply(prelimCluster.PBcounts, 1, function(x) {log2(x/sizeFactors.PB.all + 1)}))
## Perform hierarchical clustering
dist.clusCollapsed <- dist(t(geneExprs.temp))
tree.clusCollapsed <- hclust(dist.clusCollapsed, "ward.D2")
dend <- as.dendrogram(tree.clusCollapsed, hang=0.2)
# Just for observation
par(cex=.6)
myplclust(tree.clusCollapsed, main="5x Amyg prelim-kNN-cluster relationships", cex.main=2, cex.lab=1.5, cex=1.8)
clust.treeCut <- cutreeDynamic(tree.clusCollapsed, distM=as.matrix(dist.clusCollapsed),
minClusterSize=2, deepSplit=1, cutHeight=225)
table(clust.treeCut)
unname(clust.treeCut[order.dendrogram(dend)])
## Cutting at 225 looks good for the main neuronal branch, but a lot of glial
# prelim clusters are dropped off (0's)
# Cut at 400 for broad glia branch (will manually merge remaining dropped off)
glia.treeCut <- cutreeDynamic(tree.clusCollapsed, distM=as.matrix(dist.clusCollapsed),
minClusterSize=2, deepSplit=1, cutHeight=400)
unname(glia.treeCut[order.dendrogram(dend)])
# Take those and re-assign to the first assignments
clust.treeCut[order.dendrogram(dend)][c(38:59)] <- ifelse(glia.treeCut[order.dendrogram(dend)][c(38:59)] == 0,
0, glia.treeCut[order.dendrogram(dend)][c(38:59)] + 10)
unname(clust.treeCut[order.dendrogram(dend)])
# Add new labels to those prelimClusters cut off
clust.treeCut[order.dendrogram(dend)][which(clust.treeCut[order.dendrogram(dend)]==0)] <- max(clust.treeCut)+c(1:10)
# 'Re-write', since there are missing numbers
clust.treeCut[order.dendrogram(dend)] <- as.numeric(as.factor(clust.treeCut[order.dendrogram(dend)]))
## Define color pallet
cluster_colors <- unique(c(tableau20, tableau10medium)[clust.treeCut[order.dendrogram(dend)]])
names(cluster_colors) <- unique(clust.treeCut[order.dendrogram(dend)])
labels_colors(dend) <- cluster_colors[as.character(clust.treeCut[order.dendrogram(dend)])]
# Print for future reference
pdf("pdfs/revision/regionSpecific_Amyg-n5_HC-prelimCluster-relationships_MNT2021.pdf")
par(cex=0.8, font=2)
plot(dend, main="5x Amyg prelim-kNN-cluster relationships with collapsed assignments")
dev.off()
# Make reference for new cluster assignment
clusterRefTab.amy <- data.frame(origClust=order.dendrogram(dend),
merged=clust.treeCut[order.dendrogram(dend)])
# Assign as 'collapsedCluster'
sce.amy$collapsedCluster <- factor(clusterRefTab.amy$merged[match(sce.amy$prelimCluster, clusterRefTab.amy$origClust)])
# Print some visualizations:
pdf("pdfs/revision/regionSpecific_Amyg-n5_reducedDims-with-collapsedClusters_MNT2021.pdf")
plotReducedDim(sce.amy, dimred="PCA_corrected", ncomponents=5, colour_by="collapsedCluster", point_alpha=0.5)
plotTSNE(sce.amy, colour_by="sampleID", point_alpha=0.5)
plotTSNE(sce.amy, colour_by="protocol", point_alpha=0.5)
plotTSNE(sce.amy, colour_by="collapsedCluster", point_alpha=0.5)
plotTSNE(sce.amy, colour_by="sum", point_alpha=0.5)
plotTSNE(sce.amy, colour_by="doubletScore", point_alpha=0.5)
# And some more informative UMAPs
plotUMAP(sce.amy, colour_by="sampleID", point_alpha=0.5)
plotUMAP(sce.amy, colour_by="collapsedCluster", point_alpha=0.5)
dev.off()
## Print marker genes for annotation
markers.mathys.custom = list(
'neurons' = c('SYT1', 'SNAP25', 'GRIN1'),
'excitatory_neuron' = c('CAMK2A', 'NRGN','SLC17A7', 'SLC17A6', 'SLC17A8'),
'inhibitory_neuron' = c('GAD1', 'GAD2', 'SLC32A1'),
'oligodendrocyte' = c('MBP', 'MOBP', 'PLP1'),
'oligodendrocyte_precursor' = c('PDGFRA', 'VCAN', 'CSPG4'),
'microglia' = c('CD74', 'CSF1R', 'C3'),
'astrocytes' = c('GFAP', 'TNC', 'AQP4', 'SLC1A2'),
'endothelial' = c('CLDN5', 'FLT1', 'VTN', 'PECAM1'),
# Added post-hoc, looking at markers (step3) - MNT 25May2021
'Tcell' = c('SKAP1', 'ITK', 'CD247')
)
pdf("pdfs/revision/regionSpecific_Amyg-n5_marker-logExprs_collapsedClusters_MNT2021.pdf", height=6, width=8)
for(i in 1:length(markers.mathys.custom)){
print(
plotExpression(sce.amy, exprs_values = "logcounts", features=c(markers.mathys.custom[[i]]), ncol=2,
x="collapsedCluster", colour_by="collapsedCluster", point_alpha=0.4, point_size=.7,
add_legend=F) + stat_summary(fun = median, fun.min = median, fun.max = median,
geom = "crossbar", width = 0.3,
colour=rep(tableau20[1:17], length(markers.mathys.custom[[i]]))) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle(label=paste0(names(markers.mathys.custom)[i], " markers"))
)
}
dev.off()
## QC - How do the total UMI distribution look?
newClusIndex <- splitit(sce.amy$collapsedCluster)
sapply(newClusIndex, function(x) {quantile(sce.amy[,x]$sum)})
# 1 2 3 4 5 6 7 8
# 0% 2876.0 831 1685 2219.0 7358.00 566.00 1238.00 1182.0
# 25% 29547.5 22550 9983 12195.5 37322.50 7653.50 20472.75 7471.5
# 50% 51398.0 34143 18481 36275.0 55357.00 12241.00 45897.50 11275.0
# 75% 67826.0 43717 39510 49672.5 73867.75 18813.75 67085.00 16469.5
# 100% 156945.0 115493 121273 94599.0 165583.00 82358.00 171608.00 35728.0
# 9 10 11 12 13 14 15 16 17
# 0% 641.0 1209.0 435 4036.00 121.0 756.00 7739.0 3346.00 1711.0
# 25% 4634.5 9787.5 3952 8556.00 245.0 4527.75 16961.0 11318.50 3453.0
# 50% 6510.5 13243.0 5381 10896.00 313.0 7092.00 20019.0 14411.50 4584.0
# 75% 8964.0 16967.5 7055 13478.25 468.5 9291.50 30862.5 26971.75 5476.5
# 100% 30028.0 39830.0 20500 27167.00 2770.0 95032.00 58639.0 92477.00 15214.0
# 18 19 20 21
# 0% 1927.0 101.0 438.0 188.0
# 25% 4826.5 373.5 2382.0 850.5
# 50% 5850.0 743.0 3657.0 1156.0
# 75% 7433.5 1018.0 5029.5 1842.5
# 100% 11453.0 1556.0 6237.0 7739.0
## Obser.: looks like collapsedCluster's 13 & 19 should be dropped - their marker
## expression is noisy too
table(sce.amy$collapsedCluster)
# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# 728 541 525 555 344 414 216 1555 6080 1459 1201 44 1067 86 55 52
# 17 18 19 20 21
# 31 39 71 31 83
# * 13 is quite overrepresented in Br5400, but this sample also had a median
# of <1000 total UMIs that were still kept after QC - thus probably a low trxts-driven cluster
# * 19 is pretty evenly-ish distributed
## doublet score?
sapply(newClusIndex, function(x) {round(quantile(sce.amy$doubletScore[x]),2)})
# 1 2 3 4 5 6 7 8 9 10 11 12 13 14
# 0% 0.22 0.15 0.16 0.11 0.01 0.13 0.20 0.00 0.00 0.00 0.00 0.66 0.00 0.16
# 25% 0.55 0.37 0.36 0.31 0.10 0.28 0.97 0.07 0.16 0.07 0.03 1.02 0.02 0.28
# 50% 0.84 0.84 0.69 0.76 0.16 0.40 1.40 0.16 0.39 0.19 0.08 1.19 0.06 0.36
# 75% 1.02 1.09 1.12 1.00 0.33 0.64 1.54 0.29 0.86 0.35 0.19 1.31 0.33 0.68
# 100% 4.94 6.94 17.29 3.33 5.61 3.40 7.24 12.15 7.72 13.11 10.28 9.84 2.45 1.19
# 1 5 16 17 18 19 20 21
# 0% 0.01 0.28 0.17 0.20 0.00 0.01 0.01
# 25% 0.04 0.40 0.26 0.33 0.01 0.05 0.04
# 50% 0.05 0.52 0.33 0.52 0.02 0.07 0.16
# 75% 0.08 0.63 0.49 0.64 0.11 0.12 0.21
# 100% 1.18 2.11 1.01 0.83 1.00 0.85 0.77
# At 'prelimCluster' level?
clusIndex <- splitit(sce.amy$prelimCluster)
sapply(clusIndex, function(x) {round(quantile(sce.amy$doubletScore[x]),2)})
# 47 for sure (maybe 25 too) - 55 relatively high
# - none of these are single-donor-specific though (where doublets would arise)
## Add annotations, looking at marker gene expression
# (canonical, above, and from markers looked at in 'step3')
annotationTab.amy <- data.frame(collapsedCluster=c(1:21))
annotationTab.amy$cellType <- NA
annotationTab.amy$cellType[c(1:4,6:7,14,16)] <- paste0("Inhib_", c("A","B","C","D","E","F","G","H"))
annotationTab.amy$cellType[c(5,12,15)] <- paste0("Excit_", c("A","B","C"))
annotationTab.amy$cellType[c(8:11)] <- c("Astro_A", "Oligo", "OPC", "Micro")
annotationTab.amy$cellType[c(13,19)] <- paste0("drop.lowNTx_", c("A","B"))
annotationTab.amy$cellType[c(17:18,20:21)] <- c("Endo","Mural", "Tcell", "Astro_B")
sce.amy$cellType <- annotationTab.amy$cellType[match(sce.amy$collapsedCluster,
annotationTab.amy$collapsedCluster)]
sce.amy$cellType <- factor(sce.amy$cellType)
cell_colors.amy <- cluster_colors[order(as.integer(names(cluster_colors)))]
names(cell_colors.amy) <- annotationTab.amy$cellType
cell_colors.amy
# Inhib_A Inhib_B Inhib_C Inhib_D Excit_A Inhib_E
# "#1F77B4" "#AEC7E8" "#FF7F0E" "#FFBB78" "#2CA02C" "#98DF8A"
# Inhib_F Astro_A Oligo OPC Micro Excit_B
# "#D62728" "#FF9896" "#9467BD" "#C5B0D5" "#8C564B" "#C49C94"
# drop.lowNTx_A Inhib_G Excit_C Inhib_H Endo Mural
# "#E377C2" "#F7B6D2" "#7F7F7F" "#C7C7C7" "#BCBD22" "#DBDB8D"
# drop.lowNTx_B Tcell Astro_B
# "#17BECF" "#9EDAE5" "#729ECE"
# Save
save(sce.amy, chosen.hvgs.amy, pc.choice.amy, clusterRefTab.amy, ref.sampleInfo, annotationTab.amy, cell_colors.amy,
file="/dcs04/lieber/marmaypag/Tran_LIBD001/Matt/MNT_thesis/snRNAseq/10x_pilot_FINAL/rdas/revision/regionSpecific_Amyg-n5_cleaned-combined_SCE_MNT2021.rda")
## Re-print marker expression plots with annotated cluster names ===
pdf("pdfs/revision/regionSpecific_Amyg-n5_marker-logExprs_collapsedClusters_MNT2021.pdf", height=5, width=9)
for(i in 1:length(markers.mathys.custom)){
print(
plotExpressionCustom(sce = sce.amy,
features = markers.mathys.custom[[i]],
features_name = names(markers.mathys.custom)[[i]],
anno_name = "cellType") +
scale_color_manual(values = cell_colors.amy)
)
}
dev.off()
## Optionally, for a cleaner version, drop those 'drop.lowNTx_'s and re-print
sce.amy <- sce.amy[ ,-grep("drop.", sce.amy$cellType)]
sce.amy$cellType <- droplevels(sce.amy$cellType)
## Re-print reducedDims with these annotations (keep the 'drop.' clusters here) ===
pdf("pdfs/revision/regionSpecific_Amyg-n5_reducedDims-with-collapsedClusters_MNT2021.pdf",width=8)
plotReducedDim(sce.amy, dimred="PCA_corrected", ncomponents=5, colour_by="cellType", point_alpha=0.5) +
scale_color_manual(values = cell_colors.amy) + labs(colour="Cell type")
plotTSNE(sce.amy, colour_by="sampleID", point_alpha=0.5, point_size=2)
plotTSNE(sce.amy, colour_by="protocol", point_alpha=0.5, point_size=2)
plotTSNE(sce.amy, colour_by="prelimCluster", text_by="prelimCluster",
text_size=3, point_alpha=0.5, point_size=2)
plotTSNE(sce.amy, colour_by="cellType", text_by="cellType",
text_size=3, point_alpha=0.5, point_size=2) +
scale_color_manual(values = cell_colors.amy,
labels=paste0(levels(sce.amy$cellType)," (",table(sce.amy$cellType),")")) +
labs(colour="Cell type")
plotTSNE(sce.amy, colour_by="sum", point_alpha=0.5, point_size=2)
plotTSNE(sce.amy, colour_by="doubletScore", point_alpha=0.5, point_size=2)
# And some more informative UMAPs
plotUMAP(sce.amy, colour_by="prelimCluster", text_by="prelimCluster",
text_size=3, point_alpha=0.5, point_size=2)
plotUMAP(sce.amy, colour_by="cellType", text_by="cellType",
text_size=3, point_alpha=0.5, point_size=2) +
scale_color_manual(values = cell_colors.amy,
labels=paste0(levels(sce.amy$cellType)," (",table(sce.amy$cellType),")")) +
labs(colour="Cell type")
dev.off()
## -> proceed to 'step03_markerDetxn-analyses[...].R'
## Some diggings-into for paper =======
# BLA or central amygdala-specific (Cartpt in all subregions... in mouse at least)
plotExpression(sce.amy, exprs_values = "logcounts", features=toupper(c("Cyp26b1", "Bmp3", "Cartpt")),
x="cellType.split", colour_by="cellType.split", point_alpha=0.5, point_size=.7,
add_legend=F) + stat_summary(fun = median, fun.min = median, fun.max = median,
geom = "crossbar", width = 0.3,
colour=rep(tableau20[1:12], 3)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
# Split by sample
sce.amy$sample.cellType.split <- paste0(ss(sce.amy$sample,"\\.",2), "_", sce.amy$cellType.split)
plotExpression(sce.amy, exprs_values = "logcounts", features=toupper(c("Cyp26b1", "Bmp3")),
x="sample.cellType.split", colour_by="sample.cellType.split", point_alpha=0.5, point_size=.7,
add_legend=F, show_median=T) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
## Looks like some BLA
# Actually faceting is nicer and maintains colors but have to print genes, separately
# From bulk-RNA-seq (AnJa shared): Look at PART1 (BLA >) & GABRQ/SYTL5 (MeA >)
pdf("pdfs/revision/exploration/zGenesByDonor_PART1-GABRQ-SYTL5_AMY-subclusters_MNT.pdf", height=3, width=7)
# BLA-enriched
plotExpression(sce.amy, exprs_values = "logcounts", features="PART1",
x="cellType.split", colour_by="cellType.split", point_alpha=0.5, point_size=.7,
add_legend=F) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size=7)) + facet_grid(~ sce.amy$donor) +
stat_summary(fun = median, fun.min = median, fun.max = median,
geom = "crossbar", width = 0.3,
colour=rep(tableau20[1:12][c(1:2,5:12, 1:7,9:12)]))
# MeA-enriched
plotExpression(sce.amy, exprs_values = "logcounts", features="GABRQ",
x="cellType.split", colour_by="cellType.split", point_alpha=0.5, point_size=.7,
add_legend=F) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size=7)) + facet_grid(~ sce.amy$donor) +
stat_summary(fun = median, fun.min = median, fun.max = median,
geom = "crossbar", width = 0.3,
colour=rep(tableau20[1:12][c(1:2,5:12, 1:7,9:12)]))
plotExpression(sce.amy, exprs_values = "logcounts", features="SYTL5",
x="cellType.split", colour_by="cellType.split", point_alpha=0.5, point_size=.7,
add_legend=F) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size=7)) + facet_grid(~ sce.amy$donor) +
stat_summary(fun = median, fun.min = median, fun.max = median,
geom = "crossbar", width = 0.3,
colour=rep(tableau20[1:12][c(1:2,5:12, 1:7,9:12)]))
dev.off()
## Reference chunk: If wanting to manually edit 'text_by=' coordinates =======
# Adapted from scater::plotReducedDim():
# Hidden function needed for this chunk ====
.coerce_to_factor <- function(x, level.limit, msg) {
if (!is.null(x)) {
x <- as.factor(x)
if (nlevels(x) > level.limit) {
stop(sprintf("more than %i levels for '%s'", level.limit, msg))
}
}
x
}
# ====
text_by <- "cellType"
text_out <- retrieveCellInfo(sce.amy, text_by, search="colData")
text_out$val <- .coerce_to_factor(text_out$val, level.limit=Inf)
## actually not necessary if the colData chosen (usually cellType[.etc] is factorized)
df_to_plot <- data.frame(reducedDim(sce.amy, "TSNE"))
by_text_x <- vapply(split(df_to_plot$X1, text_out$val), median, FUN.VALUE=0)
by_text_y <- vapply(split(df_to_plot$X2, text_out$val), median, FUN.VALUE=0)
# plot_out <- plot_out + annotate("text", x=by_text_x, y=by_text_y,
# label=names(by_text_x), size=text_size, colour=text_colour)
plotTSNE(sce.amy, colour_by="cellType", point_size=3.5, point_alpha=0.5,
text_size=8, theme_size=16) +
annotate("text", x=by_text_x, y=by_text_y,
label=names(by_text_x), size=6) +
ggtitle("t-SNE on optimal fastMNN-corrected PCs (87)") +
scale_color_manual(values = cell_colors.amy)
# OR
sce.amy$labels <- ifelse(!duplicated(sce.amy$cellType), as.character(sce.amy$cellType), NA)
Labs.df <- data.frame(by_text_x, by_text_y, labs=names(by_text_x))
colDF <- data.frame(colData(sce.amy))
DFforLabs <- cbind(reducedDim(sce.amy,"TSNE"), data.frame(colDF$labels))
colnames(DFforLabs) <- c("X","Y","labels")
# plotTSNE(sce.amy.tsne.optb, colour_by="cellType.split", point_size=4.5, point_alpha=0.5,
# text_size=8, theme_size=18) +
# geom_text_repel(data=DFforLabs,
# aes(label=labels)) +
# ggtitle("t-SNE on top 15 PCs (>= 0.25% var)")
## OK THIS FINALLY WORKS
# -> can replace those X,Y with the median positions for those labels?
DFforLabs.edit <- DFforLabs
DFforLabs.edit$X[!is.na(DFforLabs$labels)] <- by_text_x[match(as.character(DFforLabs$labels[!is.na(DFforLabs$labels)]),
names(by_text_x))]
DFforLabs.edit$Y[!is.na(DFforLabs$labels)] <- by_text_y[match(as.character(DFforLabs$labels[!is.na(DFforLabs$labels)]),
names(by_text_y))]
## Finally print
library(ggrepel)
#dir.create("pdfs/revision/pubFigures")
pdf("pdfs/revision/pubFigures/regionSpecific_Amyg-n5_main-fig-TSNE_MNT2021.pdf", width=9)
set.seed(70321)
plotTSNE(sce.amy, colour_by="cellType", point_size=4.5, point_alpha=0.5,
theme_size=16) + labs(colour="Cell type") +
geom_text_repel(data=DFforLabs.edit, size=5.0,
aes(label=labels)) +
scale_color_manual(values = cell_colors.amy,
labels=paste0(levels(sce.amy$cellType)," (",table(sce.amy$cellType),")")) +
ggtitle("t-SNE on optimal fastMNN-corrected PCs (87)")
dev.off()
### end reference chunk ========
apply(table(sce.amy$cellType, sce.amy$donor),2,function(x){round(prop.table(x),3)})
# br5161 br5212 br5276 br5400 br5701
# Astro_A 0.148 0.107 0.102 0.064 0.108
# Astro_B 0.002 0.003 0.022 0.007 0.001
# Endo 0.000 0.000 0.009 0.004 0.001
# Excit_A 0.032 0.062 0.002 0.008 0.005
# Excit_B 0.000 0.012 0.000 0.000 0.001
# Excit_C 0.002 0.013 0.000 0.004 0.000
# Inhib_A 0.000 0.000 0.162 0.208 0.000
# Inhib_B 0.011 0.035 0.031 0.141 0.021
# Inhib_C 0.039 0.005 0.126 0.049 0.003
# Inhib_D 0.011 0.023 0.055 0.156 0.014
# Inhib_E 0.000 0.000 0.179 0.004 0.001
# Inhib_F 0.007 0.021 0.016 0.046 0.002
# Inhib_G 0.000 0.000 0.034 0.005 0.000
# Inhib_H 0.000 0.000 0.022 0.001 0.000
# Micro 0.126 0.093 0.006 0.067 0.101
# Mural 0.001 0.000 0.011 0.004 0.002
# Oligo 0.516 0.533 0.134 0.177 0.582
# OPC 0.104 0.089 0.088 0.053 0.153
# Tcell 0.001 0.002 0.001 0.002 0.004
round(apply(apply(table(sce.amy$cellType, sce.amy$donor),2,prop.table),1,mean),3)
#Astro_A Astro_B Endo Excit_A Excit_B Excit_C Inhib_A Inhib_B Inhib_C Inhib_D
# 0.106 0.007 0.003 0.022 0.003 0.004 0.074 0.048 0.044 0.052
#Inhib_E Inhib_F Inhib_G Inhib_H Micro Mural Oligo OPC Tcell
# 0.037 0.019 0.008 0.005 0.079 0.003 0.389 0.097 0.002
### Session info for 03Jul2021 ==============================
sessionInfo()
# R version 4.0.4 RC (2021-02-08 r79975)
# Platform: x86_64-pc-linux-gnu (64-bit)
# Running under: CentOS Linux 7 (Core)
#
# Matrix products: default
# BLAS: /jhpce/shared/jhpce/core/conda/miniconda3-4.6.14/envs/svnR-4.0.x/R/4.0.x/lib64/R/lib/libRblas.so
# LAPACK: /jhpce/shared/jhpce/core/conda/miniconda3-4.6.14/envs/svnR-4.0.x/R/4.0.x/lib64/R/lib/libRlapack.so
#
# locale:
# [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8
# [4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
# [7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C
# [10] LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#
# attached base packages:
# [1] parallel stats4 stats graphics grDevices datasets utils methods
# [9] base
#
# other attached packages:
# [1] ggrepel_0.9.1 dynamicTreeCut_1.63-1 dendextend_1.14.0
# [4] jaffelab_0.99.30 rafalib_1.0.0 DropletUtils_1.10.3
# [7] batchelor_1.6.2 scran_1.18.5 scater_1.18.6
# [10] ggplot2_3.3.3 EnsDb.Hsapiens.v86_2.99.0 ensembldb_2.14.1
# [13] AnnotationFilter_1.14.0 GenomicFeatures_1.42.3 AnnotationDbi_1.52.0
# [16] SingleCellExperiment_1.12.0 SummarizedExperiment_1.20.0 Biobase_2.50.0
# [19] GenomicRanges_1.42.0 GenomeInfoDb_1.26.7 IRanges_2.24.1
# [22] S4Vectors_0.28.1 BiocGenerics_0.36.1 MatrixGenerics_1.2.1
# [25] matrixStats_0.58.0
#
# loaded via a namespace (and not attached):
# [1] googledrive_1.0.1 ggbeeswarm_0.6.0 colorspace_2.0-0
# [4] ellipsis_0.3.2 scuttle_1.0.4 bluster_1.0.0
# [7] XVector_0.30.0 BiocNeighbors_1.8.2 rstudioapi_0.13
# [10] farver_2.1.0 bit64_4.0.5 fansi_0.4.2
# [13] xml2_1.3.2 splines_4.0.4 R.methodsS3_1.8.1
# [16] sparseMatrixStats_1.2.1 cachem_1.0.4 Rsamtools_2.6.0
# [19] ResidualMatrix_1.0.0 dbplyr_2.1.1 R.oo_1.24.0
# [22] HDF5Array_1.18.1 compiler_4.0.4 httr_1.4.2
# [25] dqrng_0.2.1 assertthat_0.2.1 Matrix_1.3-2
# [28] fastmap_1.1.0 lazyeval_0.2.2 limma_3.46.0
# [31] BiocSingular_1.6.0 prettyunits_1.1.1 tools_4.0.4
# [34] rsvd_1.0.3 igraph_1.2.6 gtable_0.3.0
# [37] glue_1.4.2 GenomeInfoDbData_1.2.4 dplyr_1.0.5
# [40] rappdirs_0.3.3 Rcpp_1.0.6 vctrs_0.3.6
# [43] Biostrings_2.58.0 rhdf5filters_1.2.0 rtracklayer_1.50.0
# [46] DelayedMatrixStats_1.12.3 stringr_1.4.0 beachmat_2.6.4
# [49] lifecycle_1.0.0 irlba_2.3.3 statmod_1.4.35
# [52] XML_3.99-0.6 edgeR_3.32.1 zlibbioc_1.36.0
# [55] scales_1.1.1 hms_1.0.0 ProtGenerics_1.22.0
# [58] rhdf5_2.34.0 RColorBrewer_1.1-2 curl_4.3
# [61] memoise_2.0.0 gridExtra_2.3 segmented_1.3-3
# [64] biomaRt_2.46.3 stringi_1.5.3 RSQLite_2.2.7
# [67] BiocParallel_1.24.1 rlang_0.4.10 pkgconfig_2.0.3
# [70] bitops_1.0-7 lattice_0.20-41 purrr_0.3.4
# [73] Rhdf5lib_1.12.1 labeling_0.4.2 GenomicAlignments_1.26.0
# [76] cowplot_1.1.1 bit_4.0.4 tidyselect_1.1.1
# [79] magrittr_2.0.1 R6_2.5.0 generics_0.1.0
# [82] DelayedArray_0.16.3 DBI_1.1.1 pillar_1.6.0
# [85] withr_2.4.2 RCurl_1.98-1.3 tibble_3.1.1
# [88] crayon_1.4.1 utf8_1.2.1 BiocFileCache_1.14.0
# [91] viridis_0.6.0 progress_1.2.2 locfit_1.5-9.4
# [94] grid_4.0.4 blob_1.2.1 digest_0.6.27
# [97] R.utils_2.10.1 openssl_1.4.3 munsell_0.5.0
# [100] beeswarm_0.3.1 viridisLite_0.4.0 vipor_0.4.5
# [103] askpass_1.1