.Rhistory

query_cluster <- split(query@meta.data, query@meta.data[, x1])
query_means <- data.frame()
for (i in 1:length(query_cluster)) {
cluster_name <- names(query_cluster)[i]
cluster_cell <- data.frame(cells=rownames(query_cluster[[i]]))
cluster_embedding <- dplyr::left_join(cluster_cell, embedding,by="cells")
rownames(cluster_embedding) <- cluster_embedding[, 1]
cluster_embedding <- cluster_embedding[, -1]
cluster_embedding <- as.data.frame(lapply(cluster_embedding, as.numeric))
cluster_vec <- colMeans(cluster_embedding)
cluster_vec <- data.frame(cluster_vec)
colnames(cluster_vec)[1] <- cluster_name
if (i==1) {
query_means <- cluster_vec
}
else
query_means <- cbind(query_means, cluster_vec)
}
query_embedding_list[[x1-3]] <- query_means
}
#correlation of CCA cluster representation
CCA_PCC_list <- list()
for (x1 in 1:length(reference_embedding_list)) {
PCC_list <- list()
for (y1 in 1:length(query_embedding_list)) {
CCA_PCC <- cor(query_embedding_list[[y1]], reference_embedding_list[[x1]])
PCC_list[[y1]] <- CCA_PCC
}
CCA_PCC_list[[x1]] <- PCC_list
}
#Epsilon
reference_all_means <- embedding[1:ncol(reference_matrix), ]
reference_all_means <- reference_all_means[, -1]
reference_all_means <- as.data.frame(lapply(reference_all_means, as.numeric))
reference_all_means_vec <- colMeans(reference_all_means)
query_all_means <- embedding[(ncol(reference_matrix)+1):nrow(embedding), ]
query_all_means <- query_all_means[, -1]
query_all_means <- as.data.frame(lapply(query_all_means, as.numeric))
query_all_means_vec <- colMeans(query_all_means)
Epsilon <- cor(reference_all_means_vec, query_all_means_vec)
#match
strongest_match_avg <- data.frame()
for (x1 in 1:length(CCA_PCC_list)) {
for (y1 in 1:length(CCA_PCC_list[[x1]])) {
match_matrix <- Match(data = CCA_PCC_list[[x1]][[y1]], top = 1, threshold = Epsilon)
strongest_match_PCC <- CCA_PCC_list[[x1]][[y1]][match_matrix == 2]
strongest_match_avg[x1,y1] <- mean(strongest_match_PCC)
}
}
#output
name_row <- data.frame(res=seq(from = start.resolution, to = (ncol(reference@meta.data)-3)*step, by = step))
name_row$name <- "D1_"
name_row$res <- stringr::str_c(name_row$name, name_row$res)
rownames(strongest_match_avg) <- name_row$res
name_col <- data.frame(res=seq(from = start.resolution, to = (ncol(query@meta.data)-3)*step, by = step))
name_col$name <- "D2_"
name_col$res <- stringr::str_c(name_col$name, name_col$res)
colnames(strongest_match_avg) <- name_col$res
a <- as.matrix(strongest_match_avg)
b <- which(a==max(a),arr.ind=T)
b <- as.data.frame(b)
c <- nrow(b)
D1_res <- start.resolution + step*(b[c, 1]-1)
D2_res <- start.resolution + step*(b[c, 2]-1)
ClusterMatch_resolution <- list(Resolution_cor = strongest_match_avg, D1_ref_res = D1_res, D2_que_res =D2_res)
return(ClusterMatch_resolution)
}
#' Matching clusters of two datasets
#' @param data the correlation between clusters of two datasets
#' @param top the maximum number of matches for each cluster
#' @param threshold the minimum threshold for matching
#' @return matching matrix
Match<-function(data,top=3,threshold=1.96){
match_matrix<-data
for (i in 1:nrow(data)) {
for (j in 1:ncol(data)) {
top_row<-data[i,order(data[i,],decreasing = TRUE)[1:top]]
top_col<-data[order(data[,j],decreasing = TRUE)[1:top],j]
if (data[i,j] %in% top_row & data[i,j] %in% top_col & data[i,j]>threshold) {
match_matrix[i,j]<-1
}
else
match_matrix[i,j]<-0
}
}
for (i in 1:nrow(data)) {
for (j in 1:ncol(data)) {
top_row<-data[i,order(data[i,],decreasing = TRUE)[1]]
top_col<-data[order(data[,j],decreasing = TRUE)[1],j]
if (data[i,j] %in% top_row & data[i,j] %in% top_col & data[i,j]>threshold) {
match_matrix[i,j]<-2
}
}
}
return(match_matrix)
}
#' Calculating the matching matrix between two datasets at the optimal resolutions
#' @param reference_matrix the gene expression matrix of dataset 1
#' @param query_matrix the gene expression matrix of dataset 2
#' @param ref.res the optimal clustering resolution of dataset 1
#' @param que.res the optimal clustering resolution of dataset 2
#' @param ref.norm whether dataset 1 is normalized
#' @param que.norm whether dataset 2 is normalized
#' @param dim.pca number of PCs to calculate (50 by default)
#' @param dim.cca number of canonical vectors to calculate (30 by default)
#' @param min.cluster.cells the minimum number of cells in a cluster
#' @param random_PCC the minimum value of random noise
#' @param min.pct Seurat FindAllMarkers min.pct
#' @param logfc.threshold Seurat FindAllMarkers logfc.threshold
#' @return a list containing the following elements:
#' - D1_reference: SeuratObject of dataset 1
#' - D2_query: SeuratObject of dataset 2
#' - Matching_matrix: matching matrix, 2 is mutually most correlated clusters, 1 is mutually correlated clusters
#' - strongest_matching_marker_top10: top 10 common marker genes of mutually most correlated clusters
#' - com_marker_list: common marker genes of mutually most correlated clusters
#' - Epsilon: random noise
#' - CCA_cor: PCC between clusters based on CCA representation
#' - Marker_cor：Jaccard coefficient between clusters based on CCA representation
#' - ClusterMatch_cor = ClusterMatch correlation between clusters
#' @import Seurat CreateSeuratObject NormalizeData ScaleData RunPCA FindNeighbors FindClusters RunCCA L2Dim SplitObject FindAllMarkers Idents
#' @import dplyr select left_join
#' @import stringr str_c
#' @export
ClusterMatch_matching <- function(reference_matrix, query_matrix, ref.res, que.res, ref.norm = TRUE, que.norm = TRUE,
dim.pca = 50, dim.cca = 30, min.cluster.cells = 20, random_PCC = 0, min.pct = 0.25, logfc.threshold = 0.25){
#object and clustering
reference <- Seurat::CreateSeuratObject(counts = reference_matrix)
if (ref.norm == TRUE) {
reference <- Seurat::NormalizeData(reference)
}
reference <- Seurat::FindVariableFeatures(reference, selection.method = "vst", nfeatures = 2000, verbose = FALSE)
reference <- Seurat::ScaleData(reference, verbose = FALSE)
reference <- Seurat::RunPCA(reference, features = VariableFeatures(object = reference), verbose = FALSE)
reference <- Seurat::FindNeighbors(reference, dims = 1:dim.pca,verbose = FALSE)
reference <- Seurat::FindClusters(reference, resolution = ref.res)
reference@meta.data$name <- "D1_"
reference$seurat_clusters <- stringr::str_c(reference$name, reference$seurat_clusters)
reference@meta.data <- dplyr::select(reference@meta.data, -c("name"))
query <- Seurat::CreateSeuratObject(counts = query_matrix)
if (que.norm == TRUE) {
query <- Seurat::NormalizeData(query)
}
query <- Seurat::FindVariableFeatures(query, selection.method = "vst", nfeatures = 2000, verbose = FALSE)
query <- Seurat::ScaleData(query, verbose = FALSE)
query <- Seurat::RunPCA(query, features = VariableFeatures(object = query), verbose = FALSE)
query <- Seurat::FindNeighbors(query, dims = 1:dim.pca, verbose = FALSE)
query <- Seurat::FindClusters(query, resolution = que.res)
query@meta.data$name <- "D2_"
query$seurat_clusters <- stringr::str_c(query$name,query$seurat_clusters)
query@meta.data <- dplyr::select(query@meta.data, -c("name"))
#CCA embedding
CCA <- Seurat::RunCCA(object1 = reference, object2 = query, num.cc = dim.cca)
L2CCA <- Seurat::L2Dim(CCA, reduction = "cca")
embedding <- L2CCA@reductions[["cca.l2"]]@cell.embeddings
embedding <- data.frame(cbind(cells=rownames(embedding), embedding))
#CCA cluster representation
#reference
reference_cluster <- Seurat::SplitObject(reference, split.by = "seurat_clusters")
reference_means <- data.frame()
reference_cell_cluster_embedding <- list()
for (i in 1:length(reference_cluster)) {
cluster_name <- names(reference_cluster)[i]
cluster_cell <- data.frame(cells = rownames(reference_cluster[[i]]@meta.data))
cluster_embedding <- dplyr::left_join(cluster_cell, embedding, by="cells")
rownames(cluster_embedding) <- cluster_embedding[, 1]
cluster_embedding <- cluster_embedding[, -1]
reference_cell_cluster_embedding[[i]] <- as.data.frame(lapply(cluster_embedding, as.numeric))
rownames(reference_cell_cluster_embedding[[i]]) <- cluster_cell$cells
reference_cell_cluster_embedding[[i]] <- t(reference_cell_cluster_embedding[[i]])
names(reference_cell_cluster_embedding)[i] <- cluster_name
cluster_embedding <- as.data.frame(lapply(cluster_embedding, as.numeric))
cluster_vec <- colMeans(cluster_embedding)
cluster_vec <- data.frame(cluster_vec)
colnames(cluster_vec)[1] <- cluster_name
if (i==1) {
reference_means <- cluster_vec
}
else
reference_means <- cbind(reference_means, cluster_vec)
}
#query
query_cluster <- Seurat::SplitObject(query, split.by = "seurat_clusters")
query_means <- data.frame()
query_cell_cluster_embedding <- list()
for (i in 1:length(query_cluster)) {
cluster_name <- names(query_cluster)[i]
cluster_cell <- data.frame(cells = rownames(query_cluster[[i]]@meta.data))
cluster_embedding <- dplyr::left_join(cluster_cell, embedding, by="cells")
rownames(cluster_embedding) <- cluster_embedding[, 1]
cluster_embedding <- cluster_embedding[, -1]
query_cell_cluster_embedding[[i]] <- as.data.frame(lapply(cluster_embedding, as.numeric))
rownames(query_cell_cluster_embedding[[i]]) <- cluster_cell$cells
query_cell_cluster_embedding[[i]] <- t(query_cell_cluster_embedding[[i]])
names(query_cell_cluster_embedding)[i] <- cluster_name
cluster_embedding <- as.data.frame(lapply(cluster_embedding, as.numeric))
cluster_vec <- colMeans(cluster_embedding)
cluster_vec <- data.frame(cluster_vec)
colnames(cluster_vec)[1] <- cluster_name
if (i==1) {
query_means<-cluster_vec
}
else
query_means<-cbind(query_means, cluster_vec)
}
#marker cluster representation
#reference
Seurat::Idents(object = reference) <- reference@meta.data$seurat_clusters
reference_marker <- Seurat::FindAllMarkers(reference, only.pos = TRUE, min.pct = min.pct, logfc.threshold = logfc.threshold)
reference_marker_cluster <- split(reference_marker, reference_marker$cluster)
#query
Seurat::Idents(object = query) <- query@meta.data$seurat_clusters
query_marker <- Seurat::FindAllMarkers(query, only.pos = TRUE, min.pct = min.pct, logfc.threshold = logfc.threshold)
query_marker_cluster <- split(query_marker,query_marker$cluster)
#correlation of clusters
#CCA
CCA_PCC <- cor(query_means,reference_means)
CCA_PCC[CCA_PCC<0] <- 0
#marker
jaccard <- function(a, b) {
intersection = length(intersect(a, b))
union = length(a) + length(b) - intersection
return (intersection/union)
}
marker_jaccard<-data.frame()
for (i in 1:length(query_marker_cluster)) {
for (j in 1:length(reference_marker_cluster)) {
marker_jaccard[i,j] <- jaccard(query_marker_cluster[[i]]$gene,reference_marker_cluster[[j]]$gene)
}
}
rownames(marker_jaccard)<-names(query_marker_cluster)
colnames(marker_jaccard)<-names(reference_marker_cluster)
marker_jaccard[marker_jaccard=='NaN']<-0
#Epsilon and beta
reference_all_means <- embedding[1:ncol(reference_matrix), ]
reference_all_means <- reference_all_means[, -1]
reference_all_means <- as.data.frame(lapply(reference_all_means, as.numeric))
reference_all_means_vec <- colMeans(reference_all_means)
query_all_means <- embedding[(ncol(reference_matrix)+1):nrow(embedding), ]
query_all_means <- query_all_means[, -1]
query_all_means <- as.data.frame(lapply(query_all_means, as.numeric))
query_all_means_vec <- colMeans(query_all_means)
Epsilon <- cor(reference_all_means_vec, query_all_means_vec)
beta <- max(CCA_PCC)/max(marker_jaccard)
Epsilon <- max(Epsilon, random_PCC)
CCA_marker_similarity <- CCA_PCC + beta*marker_jaccard
CCA_marker_similarity[CCA_marker_similarity < Epsilon] <- 0
match_matrix <- Match(data = CCA_marker_similarity, top = 3, threshold = Epsilon)
CCA_PCC_match_matrix<-Match(data = CCA_PCC,top = 1,threshold = Epsilon)
#strongest matching cluster marker
overlap<-data.frame()
flag=1
for (i in 1:nrow(match_matrix)) {
for (j in 1:ncol(match_matrix)) {
if (match_matrix[i,j]==2) {
overlap[flag,1] <- colnames(match_matrix)[j]
overlap[flag,2] <- rownames(match_matrix)[i]
flag<-flag+1
}
}
}
overlap$name <- c('/')
overlap$name <- stringr::str_c(overlap$V1, overlap$name)
overlap$name <- stringr::str_c(overlap$name, overlap$V2)
reference_marker_strongest_matching_list <- subset(reference_marker_cluster, names(reference_marker_cluster) %in% overlap$V1)
reference_marker_strongest_matching_list <- reference_marker_strongest_matching_list[overlap$V1]
query_marker_strongest_matching_list <- subset(query_marker_cluster,names(query_marker_cluster) %in% overlap$V2)
query_marker_strongest_matching_list <- query_marker_strongest_matching_list[overlap$V2]
com_marker <- data.frame()
com_marker_list <- list()
com_marker_top_10 <- data.frame()
for (i in 1:nrow(overlap)) {
com_marker_list[[i]] <- merge(reference_marker_strongest_matching_list[[i]], query_marker_strongest_matching_list[[i]], by = "gene" )
com_marker_list[[i]]$avg_log2FC <- (com_marker_list[[i]]$avg_log2FC.x+com_marker_list[[i]]$avg_log2FC.y)/2
com_marker_list[[i]] <- com_marker_list[[i]][order(com_marker_list[[i]]$avg_log2FC, decreasing = TRUE), ]
if (i==1) {
com_marker_top_10 <- data.frame(com_marker_list[[i]]$gene[1:10])
}
else
com_marker_top_10 <- cbind(com_marker_top_10, data.frame(com_marker_list[[i]]$gene[1:10]))
}
names(com_marker_list) <- overlap$name
colnames(com_marker_top_10) <- overlap$name
#output
ClusterMatch_matching <- list(D1_reference = reference, D2_query = query, Matching_matrix = match_matrix,
strongest_matching_marker_top10 = com_marker_top_10, com_marker_list = com_marker_list, Epsilon = Epsilon, CCA_cor = CCA_PCC, Marker_cor = marker_jaccard, ClusterMatch_cor = CCA_marker_similarity )
}
#' Matching clusters and cell types
#' @param data the correlation between clusters and cell types
#' @param threshold the minimum threshold for matching
#' @return the matching matrix of clusters and cell types
#' @import matrixStats rowMaxs
Match_annotation<-function(data,threshold=0){
data<-as.matrix(data)
row_max<-matrixStats::rowMaxs(data)
match_matrix<-data
for (i in 1:nrow(data)) {
for (j in 1:ncol(data)) {
if (data[i,j] == row_max[i] & data[i,j]>threshold) {
match_matrix[i,j]<-1
}
else
match_matrix[i,j]<-0
}
}
return(match_matrix)
}
#' Annotate the query data based on the reference data labels
#' @param reference_matrix the gene expression matrix of reference
#' @param query_matrix the gene expression matrix of query
#' @param reference_label reference data labels
#' @param que.res the query clustering resolution, default 2
#' @param ref.norm whether reference is normalized
#' @param que.norm whether query is normalized
#' @param dim.pca number of PCs to calculate (50 by default)
#' @param dim.cca number of canonical vectors to calculate (30 by default)
#' @param min.cluster.cells the minimum number of cells in a cluster
#' @param random_PCC the minimum value of random noise
#' @return a list containing the following elements:
#' - D1_reference: SeuratObject of reference
#' - D2_query: SeuratObject of query
#' - Matching_matrix: Matching matrix of cell types and clusters
#' @import Seurat CreateSeuratObject NormalizeData ScaleData RunPCA FindNeighbors FindClusters RunCCA L2Dim SplitObject FindAllMarkers Idents
#' @import dplyr select left_join
#' @import stringr str_c
#' @import reshape2 melt
#' @export
ClusterMatch_annotation <- function(reference_matrix, query_matrix, reference_label, que.res=2, ref.norm = TRUE, que.norm = TRUE,
dim.pca = 50, dim.cca = 30, min.cluster.cells = 20, random_PCC = 0){
#object and clustering
reference <- Seurat::CreateSeuratObject(counts = reference_matrix, meta.data = reference_label)
if (ref.norm == TRUE) {
reference <- Seurat::NormalizeData(reference)
}
reference <- Seurat::FindVariableFeatures(reference, selection.method = "vst", nfeatures = 2000, verbose = FALSE)
reference <- Seurat::ScaleData(reference, verbose = FALSE)
reference <- Seurat::RunPCA(reference, features = VariableFeatures(object = reference), verbose = FALSE)
query <- Seurat::CreateSeuratObject(counts = query_matrix)
if (que.norm == TRUE) {
query <- Seurat::NormalizeData(query)
}
query <- Seurat::FindVariableFeatures(query, selection.method = "vst", nfeatures = 2000, verbose = FALSE)
query <- Seurat::ScaleData(query, verbose = FALSE)
query <- Seurat::RunPCA(query, features = VariableFeatures(object = query), verbose = FALSE)
query <- Seurat::FindNeighbors(query, dims = 1:dim.pca, verbose = FALSE)
query <- Seurat::FindClusters(query, resolution = que.res)
query@meta.data$name <- "D2_"
query$seurat_clusters <- stringr::str_c(query$name,query$seurat_clusters)
query@meta.data <- dplyr::select(query@meta.data, -c("name"))
query_cells <- rownames(query@meta.data)
#CCA embedding
CCA <- Seurat::RunCCA(object1 = reference, object2 = query, num.cc = dim.cca)
L2CCA <- Seurat::L2Dim(CCA, reduction = "cca")
embedding <- L2CCA@reductions[["cca.l2"]]@cell.embeddings
embedding <- data.frame(cbind(cells=rownames(embedding), embedding))
#CCA cluster representation
#reference
reference_cluster <- Seurat::SplitObject(reference, split.by = "celltype")
reference_means <- data.frame()
reference_cell_cluster_embedding <- list()
for (i in 1:length(reference_cluster)) {
cluster_name <- names(reference_cluster)[i]
cluster_cell <- data.frame(cells = rownames(reference_cluster[[i]]@meta.data))
cluster_embedding <- dplyr::left_join(cluster_cell, embedding, by="cells")
rownames(cluster_embedding) <- cluster_embedding[, 1]
cluster_embedding <- cluster_embedding[, -1]
reference_cell_cluster_embedding[[i]] <- as.data.frame(lapply(cluster_embedding, as.numeric))
rownames(reference_cell_cluster_embedding[[i]]) <- cluster_cell$cells
reference_cell_cluster_embedding[[i]] <- t(reference_cell_cluster_embedding[[i]])
names(reference_cell_cluster_embedding)[i] <- cluster_name
cluster_embedding <- as.data.frame(lapply(cluster_embedding, as.numeric))
cluster_vec <- colMeans(cluster_embedding)
cluster_vec <- data.frame(cluster_vec)
colnames(cluster_vec)[1] <- cluster_name
if (i==1) {
reference_means <- cluster_vec
}
else
reference_means <- cbind(reference_means, cluster_vec)
}
#query
query_cluster <- Seurat::SplitObject(query, split.by = "seurat_clusters")
query_means <- data.frame()
query_cell_cluster_embedding <- list()
for (i in 1:length(query_cluster)) {
cluster_name <- names(query_cluster)[i]
cluster_cell <- data.frame(cells = rownames(query_cluster[[i]]@meta.data))
cluster_embedding <- dplyr::left_join(cluster_cell, embedding, by="cells")
rownames(cluster_embedding) <- cluster_embedding[, 1]
cluster_embedding <- cluster_embedding[, -1]
query_cell_cluster_embedding[[i]] <- as.data.frame(lapply(cluster_embedding, as.numeric))
rownames(query_cell_cluster_embedding[[i]]) <- cluster_cell$cells
query_cell_cluster_embedding[[i]] <- t(query_cell_cluster_embedding[[i]])
names(query_cell_cluster_embedding)[i] <- cluster_name
cluster_embedding <- as.data.frame(lapply(cluster_embedding, as.numeric))
cluster_vec <- colMeans(cluster_embedding)
cluster_vec <- data.frame(cluster_vec)
colnames(cluster_vec)[1] <- cluster_name
if (i==1) {
query_means<-cluster_vec
}
else
query_means<-cbind(query_means, cluster_vec)
}
#marker cluster representation
#reference
Seurat::Idents(object = reference) <- reference@meta.data$celltype
reference_marker <- Seurat::FindAllMarkers(reference, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
reference_marker_cluster <- split(reference_marker, reference_marker$cluster)
#query
Seurat::Idents(object = query) <- query@meta.data$seurat_clusters
query_marker <- Seurat::FindAllMarkers(query, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
query_marker_cluster <- split(query_marker,query_marker$cluster)
#correlation of clusters
#CCA
CCA_PCC <- cor(query_means,reference_means)
CCA_PCC[CCA_PCC<0] <- 0
#marker
jaccard <- function(a, b) {
intersection = length(intersect(a, b))
union = length(a) + length(b) - intersection
return (intersection/union)
}
marker_jaccard<-data.frame()
for (i in 1:length(query_marker_cluster)) {
for (j in 1:length(reference_marker_cluster)) {
marker_jaccard[i,j] <- jaccard(query_marker_cluster[[i]]$gene,reference_marker_cluster[[j]]$gene)
}
}
rownames(marker_jaccard)<-names(query_marker_cluster)
colnames(marker_jaccard)<-names(reference_marker_cluster)
marker_jaccard[marker_jaccard=='NaN']<-0
#Epsilon and beta
reference_all_means <- embedding[1:ncol(reference_matrix), ]
reference_all_means <- reference_all_means[, -1]
reference_all_means <- as.data.frame(lapply(reference_all_means, as.numeric))
reference_all_means_vec <- colMeans(reference_all_means)
query_all_means <- embedding[(ncol(reference_matrix)+1):nrow(embedding), ]
query_all_means <- query_all_means[, -1]
query_all_means <- as.data.frame(lapply(query_all_means, as.numeric))
query_all_means_vec <- colMeans(query_all_means)
Epsilon <- cor(reference_all_means_vec, query_all_means_vec)
beta <- max(CCA_PCC)/max(marker_jaccard)
Epsilon <- max(Epsilon, random_PCC)
CCA_marker_similarity <- CCA_PCC + beta*marker_jaccard
CCA_marker_similarity[CCA_marker_similarity < Epsilon] <- 0
match_predict_matrix<-Match_annotation(data = CCA_marker_similarity)
match_predict_matrix_melt<-reshape2::melt(match_predict_matrix)
match_predict_matrix_melt<-subset(match_predict_matrix_melt,match_predict_matrix_melt$value>0)
match_predict_matrix_melt<-match_predict_matrix_melt[,1:2]
colnames(match_predict_matrix_melt)<-c("seurat_clusters","predicted_celltypes")
query@meta.data<-dplyr::left_join(query@meta.data,match_predict_matrix_melt)
query@meta.data$predicted_celltypes <- as.character(query@meta.data$predicted_celltypes)
query@meta.data[is.na(query@meta.data)]<-'unknown'
rownames(query@meta.data)<-query_cells
#output
ClusterMatch_transfer <- list(D1_reference = reference, D2_query = query, Matching_matrix = match_predict_matrix)
}
#2.1	Load data
dendritic_batch1 <- read.csv("/Users/bingbao/Desktop/paper_R_package /ClusterMatch/data/dendritic/batch1.csv", row.names = 1)
dendritic_batch2 <- read.csv("/Users/bingbao/Desktop/paper_R_package /ClusterMatch/data/dendritic/batch2.csv", row.names = 1)
dendritic_celltype <- read.csv("/Users/bingbao/Desktop/paper_R_package /ClusterMatch/data/dendritic/celltype.csv")
human_dLGN <- read.csv("/Users/bingbao/Desktop/paper_R_package /ClusterMatch/data/dLGN/human_dLGN.csv", row.names = 1)
macaque_dLGN <- read.csv("/Users/bingbao/Desktop/paper_R_package /ClusterMatch/data/dLGN/macaque_dLGN.csv", row.names = 1)
human_celltype <- read.csv("/Users/bingbao/Desktop/paper_R_package /ClusterMatch/data/dLGN/human_celltype.csv")
rownames(human_celltype) <- human_celltype$cells
macaque_celltype <- read.csv("/Users/bingbao/Desktop/paper_R_package /ClusterMatch/data/dLGN/macaque_celltype.csv")
rownames(macaque_celltype) <- macaque_celltype$cells
#2.2	Find optimal resolutions
dendritic_res <- ClusterMatch_resolution(dendritic_batch1, dendritic_batch2, ref.norm = FALSE, que.norm = FALSE)
#2.3	Match scRNA-seq data at the cluster level
dendritic_matching <- ClusterMatch_matching(dendritic_batch1, dendritic_batch2, ref.res = dendritic_res$D1_ref_res, que.res = dendritic_res$D2_que_res, ref.norm = FALSE, que.norm = FALSE, random_PCC = 1.3)
#2.4	Integrate scRNA-seq datasets
dendritic_integration <- ClusterMatch_integration(dendritic_batch1, dendritic_batch2, ref.res = dendritic_res$D1_ref_res,
que.res = dendritic_res$D2_que_res, ref.norm = FALSE, que.norm = FALSE, random_PCC = 1.3, distance_diff = 3, distance_same = 1)
umap_df <- ClusterMatch_UMAP(embedding = dendritic_integration$cell_embedding, cell = dendritic_celltype)
batch_colour=c("#E64540","#3F81BB")
celltype_colour=c("#E64136","#5F78A3","#EDA6C3","#96C561")
library(ggplot2)
ggplot(umap_df,aes(X1,X2,color=batch)) +
scale_color_manual(values = batch_colour)+
geom_point() + theme_bw() +
theme(panel.grid=element_blank(),plot.title = element_text(hjust = 0.5),text = element_text(size = 20)) +
labs(x="UMAP_1",y="UMAP_2",
title = "ClusterMatch")
ggplot(umap_df,aes(X1,X2,color=label)) +
scale_color_manual(values = celltype_colour)+
geom_point() + theme_bw() +
theme(panel.grid=element_blank(),plot.title = element_text(hjust = 0.5),text = element_text(size = 20)) +
labs(x="UMAP_1",y="UMAP_2",
title = "ClusterMatch")
#2.5	Annotate the query data based on the reference data labels
dLGN_annotation <- ClusterMatch_annotation(human_dLGN, macaque_dLGN, human_celltype, que.res=2, ref.norm = FALSE, que.norm = FALSE)
table(dLGN_annotation$D2_query$predicted_celltypes==macaque_celltype$celltype)
#FALSE  TRUE
#31  1693
devtools::document()
library(ClusterMatch)
library(usethis)
use_package(package = "Seurat")
use_package(package = "Seurat", ">= 3.0.0")
use_package(package = "Seurat")
use_package(package = "dplyr")
use_package(package = "stringr")
use_package(package = "umap")
use_package(package = "reshape2")
use_package(package = "matrixStats")
?dplyr
?matrixStats
?reshape2
?reshape2
?stringr
?umap