-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy path10x_NAc-n8_step02_clust-annot_MNT.R
executable file
·650 lines (535 loc) · 32.4 KB
/
10x_NAc-n8_step02_clust-annot_MNT.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
### MNT 10x snRNA-seq workflow: step 02
### **Region-specific analyses: nucleus accumbens (NAc)**
### - Preprint: (3x) un-selected samples + (2x) NeuN-sorted samples
### - Revision: (3x) samples (2 female, 1 NeuN-sorted)
### Initiated MNT 04Mar2020
#####################################################################
library(SingleCellExperiment)
library(EnsDb.Hsapiens.v86)
library(scater)
library(scran)
library(batchelor)
library(DropletUtils)
library(jaffelab)
library(dendextend)
library(dynamicTreeCut)
source('plotExpressionCustom.R')
### Palette taken from `scater`
tableau10medium = c("#729ECE", "#FF9E4A", "#67BF5C", "#ED665D",
"#AD8BC9", "#A8786E", "#ED97CA", "#A2A2A2",
"#CDCC5D", "#6DCCDA")
tableau20 = c("#1F77B4", "#AEC7E8", "#FF7F0E", "#FFBB78", "#2CA02C",
"#98DF8A", "#D62728", "#FF9896", "#9467BD", "#C5B0D5",
"#8C564B", "#C49C94", "#E377C2", "#F7B6D2", "#7F7F7F",
"#C7C7C7", "#BCBD22", "#DBDB8D", "#17BECF", "#9EDAE5")
# ===
# Load 'pilot' samples
load("/dcs04/lieber/marmaypag/Tran_LIBD001/Matt/MNT_thesis/snRNAseq/10x_pilot_FINAL/rdas/revision/all-FACS-n14_preprint_SCEs_processing-QC_MNTMar2021.rda",
verbose=T)
# pilot.data, pilot.data.unfiltered, e.out, ref.sampleInfo, pilot.data.alt, e.out.alt
rm(pilot.data.unfiltered, e.out, e.out.alt)
# Load 2021 expansion set
load("/dcs04/lieber/marmaypag/Tran_LIBD001/Matt/MNT_thesis/snRNAseq/10x_pilot_FINAL/rdas/revision/all-FACS-n10_2021rev_SCEs_processing-QC_MNTMar2021.rda", verbose=T)
# pilot.data.2, pilot.data.2.unfiltered, e.out.2, ref.sampleInfo.rev
rm(pilot.data.2.unfiltered, e.out.2)
### MNT comment: At this point, each sample (which is a SCE object in the list, 'pilot.data') has been
# QC'd for cell/nucleus calling ('emptyDrops()' test) and mito rate thresholding
# Additionally, there have been a computed 'doubletScore', which will QC with, after
# clustering (e.g. that there are no doublet-driven clusters, etc.)
### Merging shared-region samples ============================================
# Newest iterations for normalization: multiBatchNorm-alize
# Order as will do for `fastMNN()` (this shouldn't matter here)
sce.nac <- cbind(pilot.data[["br5161.nac"]], pilot.data[["br5212.nac"]], pilot.data[["br5287.nac"]],
# Revision un-selected samples:
pilot.data.2[["br5400.nac"]], pilot.data.2[["br5276.nac"]],
# NeuN-enriched:
pilot.data[["br5207.nac.neun"]],
# Re-sequenced Br5182-NAc sample:
pilot.data.alt[["br5182.nac.neun"]],
pilot.data.2[["br5701.nac.neun"]] # (revision, NeuN-enriched)
)
sce.nac
# class: SingleCellExperiment
# dim: 33538 20571
# metadata(8): Samples Samples ... Samples Samples
# assays(1): counts
# rownames(33538): MIR1302-2HG FAM138A ... AC213203.1 FAM231C
# rowData names(6): gene_id gene_version ... gene_biotype Symbol.uniq
# colnames(20571): AAACCCACATCGAACT-1 AAACCCATCCAACCAA-1 ...
# TTTGGTTAGCAGCACA-1 TTTGGTTAGGTCACAG-1
# colData names(16): Sample Barcode ... protocol sequencer
# reducedDimNames(0):
# altExpNames(0):
# Use `multiBatchNorm()` to compute log-normalized counts, matching the scaling across samples
sce.nac <- multiBatchNorm(sce.nac, batch=sce.nac$sampleID)
# Use the simple `modelGeneVar` - this makes more sense over `combineVar`, since the
# cell composition is already known to be quite different (with NeuN selection)
geneVar.nac <- modelGeneVar(sce.nac)
chosen.hvgs.nac <- geneVar.nac$bio > 0
sum(chosen.hvgs.nac)
# [1] 11372
### Dimensionality reduction ================================================================
# Run `fastMNN` (internally uses `multiBatchPCA`), taking top 100 (instead of default 50 PCs)
set.seed(109)
mnn.hold <- fastMNN(sce.nac, batch=sce.nac$sampleID,
merge.order=c("br5161.nac","br5212.nac","br5287.nac",
"br5400.nac","br5276.nac",
"br5207.nac.neun","br5182.nac.neun",
"br5701.nac.neun"),
subset.row=chosen.hvgs.nac, d=100,
correct.all=TRUE, get.variance=TRUE,
BSPARAM=BiocSingular::IrlbaParam())
# This temp file just used for getting batch-corrected components (drops a variety of entries)
table(colnames(mnn.hold) == colnames(sce.nac)) # all TRUE
table(mnn.hold$batch == sce.nac$sampleID) # all TRUE
# Add them to the SCE, as well as the metadata (though the latter might not be so usefl)
reducedDim(sce.nac, "PCA_corrected") <- reducedDim(mnn.hold, "corrected") # 100 components
metadata(sce.nac) <- metadata(mnn.hold)
# rbind the ref.sampleInfo[.rev]
ref.sampleInfo <- rbind(ref.sampleInfo, ref.sampleInfo.rev)
# Save into a new data file, which will dedicate for pan-brain-analyses
save(sce.nac, chosen.hvgs.nac, ref.sampleInfo,
file="rdas/revision/regionSpecific_NAc-n8_cleaned-combined_MNT2021.rda")
## 'getClusteredPCs()' evaluated in qsub mode (with 'R-batchJob_NAc-n8_optimalPCselxn_MNT2021.R')
# --> saved into same .rda
### Resume work in optimal PC space
load("rdas/revision/regionSpecific_NAc-n8_cleaned-combined_MNT2021.rda", verbose=T)
# sce.nac, chosen.hvgs.nac, ref.sampleInfo, pc.choice.nac
## How many PCs is optimal?:
metadata(pc.choice.nac)$chosen
# 94
# Assign this chosen (94 PCs) to 'PCA_opt'
reducedDim(sce.nac, "PCA_opt") <- reducedDim(sce.nac, "PCA_corrected")[ ,1:(metadata(pc.choice.nac)$chosen)]
## t-SNE
set.seed(109)
sce.nac <- runTSNE(sce.nac, dimred="PCA_opt")
## UMAP
set.seed(109)
sce.nac <- runUMAP(sce.nac, dimred="PCA_opt")
# How do these look?
plotReducedDim(sce.nac, dimred="TSNE", colour_by="sampleID")
plotReducedDim(sce.nac, dimred="UMAP", colour_by="sampleID")
### Clustering: Two-step ======================================================
### Step 1: Perform graph-based clustering in this optimal PC space
# - take k=20 NN to build graph
snn.gr <- buildSNNGraph(sce.nac, k=20, use.dimred="PCA_opt")
clusters.k20 <- igraph::cluster_walktrap(snn.gr)$membership
table(clusters.k20)
# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# 3927 4262 239 251 988 283 285 718 40 5146 314 638 529 63 98 99
# 17 18 19 20 21 22 23 24 25 26 27 28 29
# 1000 651 22 58 86 429 36 52 240 41 37 21 18
# Assign as 'prelimCluster'
sce.nac$prelimCluster <- factor(clusters.k20)
table(sce.nac$prelimCluster, sce.nac$donor)
# Save for now
save(sce.nac, chosen.hvgs.nac, ref.sampleInfo, pc.choice.nac,
file="/dcs04/lieber/marmaypag/Tran_LIBD001/Matt/MNT_thesis/snRNAseq/10x_pilot_FINAL/rdas/revision/regionSpecific_NAc-n8_cleaned-combined_MNT2021.rda")
### Step 2: Hierarchical clustering of pseudo-bulked ("PB'd") counts
# ** That is, to pseudo-bulk (aka 'cluster-bulk') on raw counts, on all [non-zero] genes,
# normalize with `librarySizeFactors()`, log2-transform, then perform HC'ing
# (load the above)
sce.nac <- sce.nac[ ,-grep("drop.", sce.nac$cellType)]
sce.nac$cellType <- droplevels(sce.nac$cellType)
## Preliminary cluster index for pseudo-bulking
#clusIndexes = splitit(sce.nac$prelimCluster)
clusIndexes = splitit(sce.nac$cellType)
cluster.PBcounts <- sapply(clusIndexes, function(ii){
rowSums(assays(sce.nac)$counts[ ,ii])
}
)
# Btw: prelimCluster 24 & 26, especially, may be doublet-driven, but also 23, 28 & 29:
round(sapply(clusIndexes, function(x){quantile(sce.nac$doubletScore[x])}), 3)
# 1 2 3 4 5 6 7 8 9 10 11 12 13
# 0% 0.000 0.000 0.017 0.000 0.000 0.000 0.027 0.000 0.366 0.000 0.000 0.000 0.000
# 25% 0.230 0.248 0.094 0.027 0.194 0.060 0.213 0.205 0.530 0.125 0.018 0.060 0.095
# 50% 0.435 0.443 0.119 0.043 0.436 0.121 0.305 0.332 0.890 0.358 0.043 0.102 0.302
# 75% 0.843 0.823 0.182 1.330 0.781 0.204 0.613 0.587 1.413 0.830 0.075 0.204 0.594
# 100% 20.479 11.620 2.213 3.736 4.856 2.315 11.178 8.403 3.119 11.355 5.496 14.674 9.045
# 14 15 16 17 18 19 20 21 22 23 24 25 26
# 0% 0.000 0.017 0.008 0.000 0.000 0.008 0.034 0.009 0.000 2.269 2.028 0.000 3.550
# 25% 0.021 0.053 0.058 0.079 0.047 0.025 0.131 0.036 0.025 4.380 8.185 0.026 13.219
# 50% 0.028 0.080 0.095 0.145 0.089 0.059 0.234 0.060 0.053 7.026 12.780 0.044 16.139
# 75% 0.049 1.136 0.127 0.294 0.131 0.102 0.406 0.478 0.105 8.306 15.285 0.112 18.726
# 100% 0.089 2.919 1.871 6.723 4.215 1.328 1.856 17.153 1.517 12.332 25.141 4.006 24.444
# 27 28 29
# 0% 0.362 0.825 1.832
# 25% 0.936 2.701 3.478
# 50% 1.166 6.252 4.889 - tested findMarkers interactively on these and there are indeed
# 75% 1.956 6.918 5.006 no real PW markers for prelimClusters 23, 24, 26, 28; can drop
# 100% 6.708 10.558 5.225 - 29 actually has real markers, including SOX4, BCAN, GPR17, TNS3 (OPC_COP)
# And total N transcripts / cluster:
sapply(clusIndexes, function(x){quantile(sce.nac$sum[x])})
# 1 2 3 4 5 6 7 8 9 10
# 0% 1774.0 2680.00 2362.0 1951.0 1828.00 2531 3807 4290.0 5246.00 565.00
# 25% 21349.0 20138.00 16942.5 12086.0 7261.25 13269 21212 23099.0 12017.75 4231.75
# 50% 26944.0 25311.00 20716.0 16269.0 9707.50 16479 27878 29959.5 16953.50 5894.50
# 75% 35404.5 33227.75 25313.0 21893.5 12737.75 20451 34620 40725.5 24388.50 7531.75
# 100% 114562.0 171929.00 72727.0 70890.0 29548.00 77527 109993 124105.0 65162.00 31312.00
# 11 12 13 14 15 16 17 18 19 20
# 0% 1197.00 2500.00 101 105.0 7257.00 798.0 635.00 1379.0 1164.0 4534.00
# 25% 20204.75 17329.75 289 469.5 21798.25 4049.5 7457.75 8630.0 2566.5 11219.00
# 50% 25540.50 22217.00 461 866.0 28172.50 6524.0 10811.50 11284.0 4229.5 13969.50
# 75% 33380.75 27756.00 881 1327.5 38033.25 9175.5 15545.00 14231.5 6374.0 17356.75
# 100% 92007.00 113864.00 71212 5239.0 118689.00 17601.0 37265.00 29825.0 8855.0 21624.00
# 21 22 23 24 25 26 27 28 29
# 0% 751.00 1504 7539.00 18719.00 2579.00 3430 4273 3388 9916.00
# 25% 12951.25 4268 13242.25 40880.25 14446.50 28996 16622 9667 14691.75
# 50% 15630.50 5545 17571.00 49361.00 18832.00 37389 24143 12934 18738.50
# 75% 22642.50 6789 25108.25 63095.75 25553.25 45372 35972 15563 24131.25
# 100% 73538.00 12537 36220.00 95998.00 80360.00 71127 69839 26000 42806.00
### MNT comment 25Jun: skip 'collapsing' of prelimClusters, since several dropped from above QC:
# Compute LSFs at this level
sizeFactors.PB <- librarySizeFactors(cluster.PBcounts)
# Normalize with these LSFs
geneExprs.temp <- t(apply(cluster.PBcounts, 1, function(x) {log2(x/sizeFactors.PB + 1)}))
## Perform hierarchical clustering
dist.clusCollapsed <- dist(t(geneExprs.temp))
tree.clusCollapsed <- hclust(dist.clusCollapsed, "ward.D2")
dend <- as.dendrogram(tree.clusCollapsed, hang=0.2)
# Just for observation
myplclust(tree.clusCollapsed, main="all NAc sample clusters (n=8) prelim-kNN-cluster relationships",
cex.main=1, cex.lab=0.8, cex=0.6)
## Define color pallet
# cluster_colors <- unique(c(tableau20, tableau10medium)[clust.treeCut[order.dendrogram(dend)]])
# names(cluster_colors) <- unique(clust.treeCut[order.dendrogram(dend)])
# labels_colors(dend) <- cluster_colors[as.character(clust.treeCut[order.dendrogram(dend)])]
# MNT 28Jul2021: from below, where the cellType colors are defined:
# As with plotting top PCs:
# Trick some cell class colors - use blue & red
cell_colors.nac.hold <- cell_colors.nac
cell_colors.nac <- cell_colors.nac[names(cell_colors.nac) %in% levels(sce.nac$cellType)]
cell_colors.nac["MSN.excit"] <- cell_colors.nac.hold["MSN.D1_A"]
cell_colors.nac["MSN.inhib"] <- cell_colors.nac.hold["drop.doublet_B"]
cell_colors.nac["MSN.broad"] <- cell_colors.nac.hold["drop.doublet_C"]
labels_colors(dend) <- cell_colors.nac[match(labels(dend), names(cell_colors.nac))]
dend.rename <- dend
labels(dend.rename)[labels(dend.rename) %in% c("MSN.D1_A","MSN.D1_D", "MSN.D2_A","MSN.D2_B")] <- "MSN.excit"
labels(dend.rename)[labels(dend.rename) %in% c("MSN.D1_B","MSN.D1_C","MSN.D1_E",
"MSN.D1_F", "MSN.D2_C","MSN.D2_D")] <- "MSN.inhib"
labels_colors(dend.rename) <- cell_colors.nac[match(labels(dend.rename), names(cell_colors.nac))]
# Print for future reference
pdf("pdfs/revision/regionSpecific_NAc-n8_finalClasses-HC-relationships_flaggedClustersDropped_MNT2021.pdf",height=5)
par(cex=1.1, font=2, mai=c(1.5,.5,.5,.5))
plot(dend, main="NAc cell class HC relationships",
ylab="Height", cex.lab=1.5, cex.main=1.5)
colored_bars(colors=labels_colors(dend.rename),
y_shift=-450, rowLabels="")
dev.off()
## Print marker genes for annotation
markers.mathys.custom = list(
'neurons' = c('SYT1', 'SNAP25', 'GRIN1'),
'excitatory_neuron' = c('CAMK2A', 'NRGN','SLC17A7', 'SLC17A6', 'SLC17A8'),
'inhibitory_neuron' = c('GAD1', 'GAD2', 'SLC32A1'),
'oligodendrocyte' = c('MBP', 'MOBP', 'PLP1'),
'oligodendrocyte_precursor' = c('PDGFRA', 'VCAN', 'CSPG4'),
# Post-hoc from interactive marker analysis
'differn_committed_OPC' = c("SOX4", "BCAN", "GPR17", "TNS3"),
'microglia' = c('CD74', 'CSF1R', 'C3'),
'astrocytes' = c('GFAP', 'TNC', 'AQP4', 'SLC1A2'),
'endothelial' = c('CLDN5', 'FLT1', 'VTN'),
'Tcell' = c('SKAP1', 'ITK', 'CD247'),
# Post-hoc: Some intersecting markers from mural cells in HPC, AMY
'Mural' = c('COL1A2', 'TBX18', 'RBPMS'),
# Kristen's MSN markers - not printed for the broader collapsed clusters
'MSNs.pan' = c("PPP1R1B","BCL11B"),# "CTIP2")
'MSNs.D1' = c("DRD1", "PDYN", "TAC1"),
'MSNs.D2' = c("DRD2", "PENK")
)
## Print broad cell type markers
pdf("pdfs/revision/regionSpecific_NAc-n8_marker-logExprs_collapsedClusters_MNT2021.pdf", height=5, width=9)
for(i in 1:length(markers.mathys.custom)){
print(
plotExpressionCustom(sce = sce.nac,
features = markers.mathys.custom[[i]],
features_name = names(markers.mathys.custom)[[i]],
anno_name = "collapsedCluster")
)
}
dev.off()
# Do this for the prelimClusters too, since there's just 29
# First remove the will-be 'drop.lowNTx_' prelimCluster 13
# Post-hoc: also drop 24 & 26
sce.temp <- sce.nac
sce.temp <- sce.temp[ ,!(sce.temp$prelimCluster == 13)]
sce.temp <- sce.temp[ ,!(sce.temp$prelimCluster %in% c(24,26))]
sce.temp$prelimCluster <- droplevels(sce.temp$prelimCluster)
pdf("pdfs/revision/regionSpecific_NAc-n8_marker-logExprs_prelimClusters_MNT2021.pdf", height=5, width=12)
for(i in 1:length(markers.mathys.custom)){
print(
plotExpressionCustom(sce = sce.temp,
features = markers.mathys.custom[[i]],
features_name = names(markers.mathys.custom)[[i]],
anno_name = "prelimCluster") +
scale_color_manual(values = c(tableau10medium, tableau20))
)
}
dev.off()
## Save
save(sce.nac, chosen.hvgs.nac, pc.choice.nac, clusterRefTab.nac, ref.sampleInfo,
file="/dcs04/lieber/marmaypag/Tran_LIBD001/Matt/MNT_thesis/snRNAseq/10x_pilot_FINAL/rdas/revision/regionSpecific_NAc-n8_cleaned-combined_MNT2021.rda")
### MNT Note 25Jun2021 ===
# Since there were only 29 'prelimCluster' from kNN graph-based clustering, and 5 of these
# are being dropped due to doublet ID/low Tx-driven clustering, we are not not utilizing any
# hierarchical clustering like with the other regions for a 'collapsedCluster' column ===
## Annotate 'prelimCluster's, based on broad cell type markers ===
# (canonical, above, and from markers looked at in 'step3')
annotationTab.nac <- data.frame(cluster=c(1:29))
annotationTab.nac$cellType <- NA
annotationTab.nac$cellType[c(4,9,15,25,27)] <- paste0("Inhib_", c("A","B","C","D","E"))
annotationTab.nac$cellType[c(1,3,6,8,12,21)] <- paste0("MSN.D1_", c("A","B","C","D","E","F"))
annotationTab.nac$cellType[c(2,7,11,20)] <- paste0("MSN.D2_", c("A","B","C","D"))
annotationTab.nac$cellType[c(5,10)] <- c("Oligo_A","Oligo_B")
annotationTab.nac$cellType[c(16,17,22)] <- c("Astro_A","Astro_B","Micro")
annotationTab.nac$cellType[c(18,29)] <- c("OPC","OPC_COP")
annotationTab.nac$cellType[c(14,19)] <- c("Micro_resting","Macrophage")
annotationTab.nac$cellType[c(13, 23,24,26,28)] <- c("drop.lowNTx",
paste0("drop.doublet_", c("A","B","C","D")))
sce.nac$cellType <- annotationTab.nac$cellType[match(sce.nac$prelimCluster,
annotationTab.nac$cluster)]
sce.nac$cellType <- factor(sce.nac$cellType)
cell_colors.nac <- c(tableau20, tableau10medium)[1:29]
names(cell_colors.nac) <- annotationTab.nac$cellType
cell_colors.nac
# MSN.D1_A MSN.D2_A MSN.D1_B Inhib_A Oligo_A MSN.D1_C
# "#1F77B4" "#AEC7E8" "#FF7F0E" "#FFBB78" "#2CA02C" "#98DF8A"
# MSN.D2_B MSN.D1_D Inhib_B Oligo_B MSN.D2_C MSN.D1_E
# "#D62728" "#FF9896" "#9467BD" "#C5B0D5" "#8C564B" "#C49C94"
# drop.lowNTx Micro_resting Inhib_C Astro_A Astro_B OPC
# "#E377C2" "#F7B6D2" "#7F7F7F" "#C7C7C7" "#BCBD22" "#DBDB8D"
# Macrophage MSN.D2_D MSN.D1_F Micro drop.doublet_A drop.doublet_B
# "#17BECF" "#9EDAE5" "#729ECE" "#FF9E4A" "#67BF5C" "#ED665D"
# Inhib_D drop.doublet_C Inhib_E drop.doublet_D OPC_COP
# "#AD8BC9" "#A8786E" "#ED97CA" "#A2A2A2" "#CDCC5D"
# Save
save(sce.nac, chosen.hvgs.nac, pc.choice.nac, ref.sampleInfo, annotationTab.nac, cell_colors.nac,
file="/dcs04/lieber/marmaypag/Tran_LIBD001/Matt/MNT_thesis/snRNAseq/10x_pilot_FINAL/rdas/revision/regionSpecific_NAc-n8_cleaned-combined_MNT2021.rda")
## Re-print marker expression plots with annotated cluster names ===
pdf("pdfs/revision/regionSpecific_NAc-n8_marker-logExprs_annotatedClusters_MNT2021.pdf", height=5, width=9)
for(i in 1:length(markers.mathys.custom)){
print(
plotExpressionCustom(sce = sce.nac,
features = markers.mathys.custom[[i]],
features_name = names(markers.mathys.custom)[[i]],
anno_name = "cellType") +
scale_color_manual(values = cell_colors.nac)
)
}
dev.off()
## Optionally, for a cleaner version, drop those 'drop.[lowNTx/doublet]_'s and re-print
sce.nac <- sce.nac[ ,-grep("drop.", sce.nac$cellType)]
sce.nac$cellType <- droplevels(sce.nac$cellType)
## Re-print reducedDims with these annotations (keep the 'drop.' clusters here) ===
pdf("pdfs/revision/regionSpecific_NAc-n8_reducedDims-with-annotatedClusters_MNT2021.pdf",width=8)
plotReducedDim(sce.nac, dimred="PCA_corrected", ncomponents=5, colour_by="cellType", point_alpha=0.5) +
scale_color_manual(values = cell_colors.nac) + labs(colour="Cell type")
plotTSNE(sce.nac, colour_by="sampleID", point_alpha=0.5, point_size=2)
plotTSNE(sce.nac, colour_by="protocol", point_alpha=0.5, point_size=2)
plotTSNE(sce.nac, colour_by="prelimCluster", text_by="prelimCluster",
text_size=3, point_alpha=0.5, point_size=2)
plotTSNE(sce.nac, colour_by="cellType", text_by="cellType",
text_size=3, point_alpha=0.5, point_size=2) +
scale_color_manual(values = cell_colors.nac,
labels=paste0(levels(sce.nac$cellType)," (",table(sce.nac$cellType),")")) +
labs(colour="Cell type")
plotTSNE(sce.nac, colour_by="sum", point_alpha=0.5, point_size=2)
plotTSNE(sce.nac, colour_by="doubletScore", point_alpha=0.5, point_size=2)
# And some more informative UMAPs
plotUMAP(sce.nac, colour_by="prelimCluster", text_by="prelimCluster",
text_size=3, point_alpha=0.5, point_size=2)
plotUMAP(sce.nac, colour_by="cellType", text_by="cellType",
text_size=3, point_alpha=0.5, point_size=2) +
scale_color_manual(values = cell_colors.nac,
labels=paste0(levels(sce.nac$cellType)," (",table(sce.nac$cellType),")")) +
labs(colour="Cell type")
dev.off()
## -> proceed to 'step03_markerDetxn-analyses[...].R'
# Adapted from scater::plotReducedDim():
# Hidden function needed for this chunk ====
.coerce_to_factor <- function(x, level.limit, msg) {
if (!is.null(x)) {
x <- as.factor(x)
if (nlevels(x) > level.limit) {
stop(sprintf("more than %i levels for '%s'", level.limit, msg))
}
}
x
}
# ====
### For main Fig (revision): TSNE with [repelled] labels (ggrepel::geom_text_repel()) ===
load("rdas/revision/regionSpecific_NAc-n8_cleaned-combined_MNT2021.rda", verbose=T)
# sce.nac, chosen.hvgs.nac, pc.choice.nac, ref.sampleInfo, annotationTab.nac, cell_colors.nac
# Drop those 'drop.[lowNTx/doublet]_'s and re-print
sce.nac <- sce.nac[ ,-grep("drop.", sce.nac$cellType)]
sce.nac$cellType <- droplevels(sce.nac$cellType)
text_by <- "cellType"
text_out <- retrieveCellInfo(sce.nac, text_by, search="colData")
text_out$val <- .coerce_to_factor(text_out$val, level.limit=Inf)
## actually not necessary if the colData chosen (usually cellType[.etc] is factorized)
df_to_plot <- data.frame(reducedDim(sce.nac, "TSNE"))
by_text_x <- vapply(split(df_to_plot$X1, text_out$val), median, FUN.VALUE=0)
by_text_y <- vapply(split(df_to_plot$X2, text_out$val), median, FUN.VALUE=0)
# plot_out <- plot_out + annotate("text", x=by_text_x, y=by_text_y,
# label=names(by_text_x), size=text_size, colour=text_colour)
plotTSNE(sce.nac, colour_by="cellType", point_size=4.5, point_alpha=0.5,
text_size=8, theme_size=18) +
annotate("text", x=by_text_x, y=by_text_y,
label=names(by_text_x), size=6) +
scale_color_manual(values = cell_colors.nac) +
ggtitle("t-SNE on optimal fastMNN-corrected PCs (94)")
# OR
sce.nac$labels <- ifelse(!duplicated(sce.nac$cellType), as.character(sce.nac$cellType), NA)
Labs.df <- data.frame(by_text_x, by_text_y, labs=names(by_text_x))
colDF <- data.frame(colData(sce.nac))
DFforLabs <- cbind(reducedDim(sce.nac,"TSNE"), data.frame(colDF$labels))
colnames(DFforLabs) <- c("X","Y","labels")
# -> can replace those X,Y with the median positions for those labels
DFforLabs.edit <- DFforLabs
DFforLabs.edit$X[!is.na(DFforLabs$labels)] <- by_text_x[match(as.character(DFforLabs$labels[!is.na(DFforLabs$labels)]),
names(by_text_x))]
DFforLabs.edit$Y[!is.na(DFforLabs$labels)] <- by_text_y[match(as.character(DFforLabs$labels[!is.na(DFforLabs$labels)]),
names(by_text_y))]
## Finally print
library(ggrepel)
pdf("pdfs/revision/pubFigures/regionSpecific_NAc-n8_main-fig-TSNE_MNT2021.pdf", height=6.5, width=10.5)
set.seed(109)
plotTSNE(sce.nac, colour_by="cellType", point_size=4.5, point_alpha=0.5,
theme_size=16) + labs(colour="Cell type") +
geom_text_repel(data=DFforLabs.edit, size=4.5,
aes(label=labels)) +
scale_color_manual(values = cell_colors.nac,
labels=paste0(levels(sce.nac$cellType)," (",table(sce.nac$cellType),")")) +
ggtitle("t-SNE on optimal fastMNN-corrected PCs (94)")
dev.off()
# # Grant version - different seeds still overlay D1.1 & D1.3 w/ bigger font, so just adjust those positions
# # - crank up the axes label sizes too - will just need to trim off the title & legend
# # bc these are linked
# DFforLabs.edit[!is.na(DFforLabs$labels), ]
# DFforLabs.edit$Y[which(DFforLabs$labels=="MSN.D1.3")] <- DFforLabs.edit$Y[which(DFforLabs$labels=="MSN.D1.3")]-2
#
# pdf("pdfs/pubFigures/FINAL_forGrant_NAc-ALL-n5_tSNE_15PCs_MNTMay2020.pdf", width=9)
# set.seed(109)
# plotTSNE(sce.nac, colour_by="cellType", point_size=6.5, point_alpha=0.5,
# theme_size=25) +
# geom_text_repel(data=DFforLabs.edit, size=7,
# aes(label=labels)) +
# ggtitle("t-SNE on top 15 PCs (>= 0.25% var)")
# dev.off()
## Heatmap of broad marker genes (leaving out defined markers/subcluster for now) ===
library(pheatmap)
cell.idx <- splitit(sce.nac$cellType)
dat <- as.matrix(assay(sce.nac, "logcounts"))
pdf('pdfs/revision/pubFigures/heatmap-geneExprs_NAc-n8_mean-broadMarkers_MNT2021.pdf', useDingbats=TRUE, height=6, width=7)
#pdf('pdfs/revision/pubFigures/heatmap-geneExprs_NAc-n8_median-broadMarkers_MNT2021.pdf', useDingbats=TRUE, height=6, width=7)
genes <- c('SNAP25','CAMK2A','SLC17A7','GAD1','DRD1','TAC1','DRD2','PENK','PPP1R1B',
'MBP','PDGFRA','AQP4','CD74','CD163')
current_dat <- do.call(cbind, lapply(cell.idx, function(ii) rowMeans(dat[genes, ii])))
# # or medians:
# current_dat <- do.call(cbind, lapply(cell.idx, function(ii) rowMedians(dat[genes, ii])))
# # for some reason `rowMedians()` doesn't keep row names...
# rownames(current_dat) <- genes
current_dat <- current_dat[ ,c(11:20, 3:7, 21:22,24,23,1:2,9:10,8)]
pheatmap(current_dat, cluster_rows = FALSE, cluster_cols = FALSE, breaks = seq(0.02, 4, length.out = 101),
color = colorRampPalette(RColorBrewer::brewer.pal(n = 7, name = "OrRd"))(100), fontsize = 16,
legend=T)
grid::grid.text(label="Expression \n (log2)",x=0.95,y=0.61, gp=grid::gpar(fontsize=10))
dev.off()
## For Day Lab ( & Keri's requests) - MNT 03Apr2020 ========================
# First drop "Ambig.lowNtrxts" (93 nuclei)
sce.nac <- sce.nac[ ,sce.nac$cellType.final != "ambig.lowNtrxts"]
sce.nac$cellType.final <- droplevels(sce.nac$cellType.final)
## AFTER re-assigning MSN.broad (and added all DRD genes for kicks)
pdf("pdfs/exploration/zref_logExprs_gene-requests_Martinowich.pdf", width=7, height=9)
plotExpression(sce.nac, exprs_values = "logcounts", features=c("PVALB", "KIT", "CHAT", "RELN", "TH",
paste0("DRD", 1:5)),
x="cellType.final", colour_by="cellType.final", point_alpha=0.5, point_size=.7,
add_legend=F, ncol=3) +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median, geom = "crossbar",
width = 0.3, colour=rep(tableau20[1:14], 10)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle(label="Miscellaneous genes of interest")
pdf.options(height=4.5)
dev.off()
# Added 27Apr2020
pdf("pdfs/exploration/zref_logExprs_gene-requests_Martinowich.2.pdf", width=7, height=5)
plotExpression(sce.nac, exprs_values = "logcounts", features=c("NGFR","NTRK1","NTRK2","NTRK3","BDNF"),
x="cellType.final", colour_by="cellType.final", point_alpha=0.5, point_size=.7,
add_legend=F, ncol=3) +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median, geom = "crossbar",
width = 0.3, colour=rep(tableau20[1:14], 5)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
dev.off()
c("NGFR", "NTRK1", "NTRK2", "NTRK3", "BDNF") %in% rownames(sce.nac)
# Other checks by request - not to print
plotExpression(sce.nac, exprs_values = "logcounts", features="HTR7",
x="cellType.final", colour_by="cellType.final", point_alpha=0.5, point_size=.7,
add_legend=F, ncol=3) +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median, geom = "crossbar",
width = 0.3, colour=rep(tableau20[1:14], 1)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
### Session info for 25Jun2021 ============
sessionInfo()
# R version 4.0.4 RC (2021-02-08 r79975)
# Platform: x86_64-pc-linux-gnu (64-bit)
# Running under: CentOS Linux 7 (Core)
#
# Matrix products: default
# BLAS: /jhpce/shared/jhpce/core/conda/miniconda3-4.6.14/envs/svnR-4.0.x/R/4.0.x/lib64/R/lib/libRblas.so
# LAPACK: /jhpce/shared/jhpce/core/conda/miniconda3-4.6.14/envs/svnR-4.0.x/R/4.0.x/lib64/R/lib/libRlapack.so
#
# locale:
# [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8
# [4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
# [7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C
# [10] LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#
# attached base packages:
# [1] parallel stats4 stats graphics grDevices datasets utils methods
# [9] base
#
# other attached packages:
# [1] dynamicTreeCut_1.63-1 dendextend_1.14.0 jaffelab_0.99.30
# [4] rafalib_1.0.0 DropletUtils_1.10.3 batchelor_1.6.2
# [7] scran_1.18.5 scater_1.18.6 ggplot2_3.3.3
# [10] EnsDb.Hsapiens.v86_2.99.0 ensembldb_2.14.1 AnnotationFilter_1.14.0
# [13] GenomicFeatures_1.42.3 AnnotationDbi_1.52.0 SingleCellExperiment_1.12.0
# [16] SummarizedExperiment_1.20.0 Biobase_2.50.0 GenomicRanges_1.42.0
# [19] GenomeInfoDb_1.26.7 IRanges_2.24.1 S4Vectors_0.28.1
# [22] BiocGenerics_0.36.1 MatrixGenerics_1.2.1 matrixStats_0.58.0
#
# loaded via a namespace (and not attached):
# [1] googledrive_1.0.1 ggbeeswarm_0.6.0 colorspace_2.0-0
# [4] ellipsis_0.3.2 scuttle_1.0.4 bluster_1.0.0
# [7] XVector_0.30.0 BiocNeighbors_1.8.2 rstudioapi_0.13
# [10] farver_2.1.0 bit64_4.0.5 fansi_0.4.2
# [13] xml2_1.3.2 splines_4.0.4 R.methodsS3_1.8.1
# [16] sparseMatrixStats_1.2.1 cachem_1.0.4 Rsamtools_2.6.0
# [19] ResidualMatrix_1.0.0 dbplyr_2.1.1 R.oo_1.24.0
# [22] HDF5Array_1.18.1 compiler_4.0.4 httr_1.4.2
# [25] dqrng_0.2.1 assertthat_0.2.1 Matrix_1.3-2
# [28] fastmap_1.1.0 lazyeval_0.2.2 limma_3.46.0
# [31] BiocSingular_1.6.0 prettyunits_1.1.1 tools_4.0.4
# [34] rsvd_1.0.3 igraph_1.2.6 gtable_0.3.0
# [37] glue_1.4.2 GenomeInfoDbData_1.2.4 dplyr_1.0.5
# [40] rappdirs_0.3.3 Rcpp_1.0.6 vctrs_0.3.6
# [43] Biostrings_2.58.0 rhdf5filters_1.2.0 rtracklayer_1.50.0
# [46] DelayedMatrixStats_1.12.3 stringr_1.4.0 beachmat_2.6.4
# [49] lifecycle_1.0.0 irlba_2.3.3 statmod_1.4.35
# [52] XML_3.99-0.6 edgeR_3.32.1 zlibbioc_1.36.0
# [55] scales_1.1.1 hms_1.0.0 ProtGenerics_1.22.0
# [58] rhdf5_2.34.0 RColorBrewer_1.1-2 curl_4.3
# [61] memoise_2.0.0 gridExtra_2.3 segmented_1.3-3
# [64] biomaRt_2.46.3 stringi_1.5.3 RSQLite_2.2.7
# [67] BiocParallel_1.24.1 rlang_0.4.10 pkgconfig_2.0.3
# [70] bitops_1.0-7 lattice_0.20-41 purrr_0.3.4
# [73] Rhdf5lib_1.12.1 labeling_0.4.2 GenomicAlignments_1.26.0
# [76] cowplot_1.1.1 bit_4.0.4 tidyselect_1.1.1
# [79] magrittr_2.0.1 R6_2.5.0 generics_0.1.0
# [82] DelayedArray_0.16.3 DBI_1.1.1 pillar_1.6.0
# [85] withr_2.4.2 RCurl_1.98-1.3 tibble_3.1.1
# [88] crayon_1.4.1 utf8_1.2.1 BiocFileCache_1.14.0
# [91] viridis_0.6.0 progress_1.2.2 locfit_1.5-9.4
# [94] grid_4.0.4 blob_1.2.1 digest_0.6.27
# [97] R.utils_2.10.1 openssl_1.4.3 munsell_0.5.0
# [100] beeswarm_0.3.1 viridisLite_0.4.0 vipor_0.4.5
# [103] askpass_1.1