Skip to content

Commit

Permalink
Merge pull request #14 from cellmapslab/plot_options
Browse files Browse the repository at this point in the history
Add which_plots to plot.lcc so that one can select the plots that should be generated
  • Loading branch information
jonas-hag authored Sep 27, 2022
2 parents d1c3633 + 32aa404 commit 44ef039
Show file tree
Hide file tree
Showing 14 changed files with 359 additions and 260 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: longmixr
Title: Longitudinal Consensus Clustering with 'flexmix'
Version: 1.0.0
Version: 1.1.0
Authors@R:
c(person(given = "Jonas",
family = "Hagenberg",
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# longmixr 1.1.0

* include the argument `which_plots` in the `plot.lcc` function so that one can
specify which plots should be plotted

# longmixr 1.0.0

This is the first version of the package (new release).
39 changes: 25 additions & 14 deletions R/helper_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,18 +120,23 @@ triangle <- function(input_matrix,
#'
#' @param matrix_list list of all consensus matrices
#' @param breaks number of breaks
#' @param which_plots which plots should be plotted, can include \code{"CDF"} or
#' \code{"delta"}
#'
#' @importFrom graphics hist lines legend
#' @importFrom grDevices rainbow
#'
#' @return a CDF plot
#' @return a CDF plot and/or delta CDF plot
#' @noRd
CDF <- function(matrix_list,
breaks = 100) {
# set up the plot
plot(0, xlim = c(0, 1), ylim = c(0, 1), col = "white",
bg = "white", xlab = "consensus index", ylab = "CDF",
main = "consensus CDF", las = 2)
breaks = 100,
which_plots = c("CDF", "delta")) {
if ("CDF" %in% which_plots) {
# set up the plot
plot(0, xlim = c(0, 1), ylim = c(0, 1), col = "white",
bg = "white", xlab = "consensus index", ylab = "CDF",
main = "consensus CDF", las = 2)
}

k <- length(matrix_list)
this_colors <- rainbow(k - 1)
Expand All @@ -150,22 +155,28 @@ CDF <- function(matrix_list,
this_area <- this_area + h$counts[bi] * (h$breaks[bi + 1] - h$breaks[bi])
}
area_k <- c(area_k, this_area)
# add the CDF to the plot
lines(h$mids, h$counts, col = this_colors[i - 1], lwd = 2,
type = "l")
if ("CDF" %in% which_plots) {
# add the CDF to the plot
lines(h$mids, h$counts, col = this_colors[i - 1], lwd = 2,
type = "l")
}
}
if ("CDF" %in% which_plots) {
legend(0.8, 0.5, legend = paste(rep("", k - 1), seq(2, k, by = 1), sep = ""),
fill = this_colors)
}
legend(0.8, 0.5, legend = paste(rep("", k - 1), seq(2, k, by = 1), sep = ""),
fill = this_colors)

# plot the area under the CDF change
delta_k <- area_k[1]
for (i in 2:(length(area_k))) {
# proportional increase relative to the previous k
delta_k <- c(delta_k, (area_k[i] - area_k[i - 1]) / area_k[i - 1])
}
plot(1 + (1:length(delta_k)), y = delta_k, xlab = "k",
ylab = "relative change in area under CDF curve",
main = "Delta area", type = "b")
if ("delta" %in% which_plots) {
plot(1 + (1:length(delta_k)), y = delta_k, xlab = "k",
ylab = "relative change in area under CDF curve",
main = "Delta area", type = "b")
}
}

#' Assign colours to cluster assignments
Expand Down
233 changes: 0 additions & 233 deletions R/longitudinal_consensus_cluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -295,239 +295,6 @@ lcc_run <- function(data,
found_number_clusters = found_number_clusters)
}

#' Plot a longitudinal consensus clustering
#'
#' @param x \code{lcc} object (output from \code{\link{longitudinal_consensus_cluster}})
#' @param color_palette optional character vector of colors for consensus matrix
#' @param ... additional parameters for plotting; currently not used
#'
#' @return Plots the following plots:\tabular{ll}{
#' \code{consensus matrix legend} \tab the legend for the following consensus matrix plots \cr
#' \tab \cr
#' \code{consensus matrix plot} \tab for every specified number of clusters, a heatmap of the consensus matrix and the result of the final clustering is shown \cr
#' \tab \cr
#' \code{consensus CDF} \tab a line plot of the CDFs for all different specified numbers of clusters \cr
#' \tab \cr
#' \code{Delta area} \tab elbow plot of the difference in the CDFs between the different numbers of clusters \cr
#' \tab \cr
#' \code{tracking plot} \tab cluster assignment of the subjects throughout the different cluster solutions \cr
#' \tab \cr
#' \code{item-consensus} \tab for every item (subject), calculate the average consensus value with all items that are assigned to one consensus cluster. This is repeated for every cluster and for all different numbers of clusters \cr
#' \tab \cr
#' \code{cluster-consensus} \tab every bar represents the average pair-wise item-consensus within one consensus cluster
#' }
#'
#' @importFrom stats as.dendrogram heatmap median
#' @importFrom graphics barplot par
#'
#' @export
plot.lcc <- function(x, color_palette = NULL, ...) {

checkmate::assert_class(x, "lcc")
checkmate::assert_character(color_palette, null.ok = TRUE)

# set up the colour palette
color_list <- list()
color_matrix <- NULL
this_pal <- c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C",
"#FB9A99", "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6",
"#6A3D9A", "#FFFF99", "#B15928", "#bd18ea", "#2ef4ca",
"#f4cced", "#f4cc03", "#05188a", "#e5a25a", "#06f106",
"#85848f", "#000000", "#076f25", "#93cd7f", "#4d0776",
"#ffffff")

# set up the plot scale
col_breaks <- NA
if (is.null(color_palette)) {
col_breaks <- 10
color_palette <- my_pal(col_breaks)
}
else {
col_breaks <- length(color_palette)
}

##############################################################################
# plot the consensus matrices
##############################################################################

# plot the legend
sc <- cbind(seq(0, 1, by = 1 / col_breaks))
rownames(sc) <- sc[, 1]
sc <- cbind(sc, sc)
heatmap(sc,
Colv = NA,
Rowv = NA,
symm = FALSE,
scale = "none",
col = color_palette,
na.rm = TRUE,
labRow = rownames(sc),
labCol = FALSE,
main = "consensus matrix legend")

# plot the consensus matrices for every number of clusters
# for every cluster, calculate the correct colours for every observation
for (tk in seq(from = 2, to = length(x), by = 1)) {

c_matrix <- x[[tk]][["consensus_matrix"]]
c_tree <- x[[tk]][["consensus_tree"]]
c_class <- x[[tk]][["consensus_class"]]

found_flexmix_clusters <- x[[tk]][["found_flexmix_clusters"]]
median_found_flexmix_clusters <- median(found_flexmix_clusters)

# for every cluster solution except the first define the previous consensus
# class so that the colours are assigned correctly across plots
if (tk == 2) {
previous_c_class <- NULL
} else {
previous_c_class <- x[[tk - 1]][["consensus_class"]]
}
color_list <- set_cluster_colors(previous_c_class,
c_class,
this_pal,
color_list)

# row ordered matrix for plotting with additional row of 0s (as in the
# original ConsensusClusterPlus code)
plot_c_matrix <- rbind(c_matrix[c_tree$order, ], 0)

heatmap(plot_c_matrix,
Colv = as.dendrogram(c_tree),
Rowv = NA,
symm = FALSE,
scale = "none",
col = color_palette,
na.rm = TRUE,
labRow = FALSE,
labCol = FALSE,
margins = c(5, 5),
main = paste("consensus matrix k=", tk, "; median flexmix clusters: ",
median_found_flexmix_clusters, sep = ""),
ColSideColors = color_list[[1]])
legend("topright", legend = unique(c_class), fill = unique(color_list[[1]]),
horiz = FALSE)

color_matrix <- rbind(color_matrix, color_list[[1]])
}

##############################################################################
# plot the CDF, delta CDF and observation tracking plots
##############################################################################

CDF(x[["general_information"]][["consensus_matrices"]])
n_last_element <- length(x)
colour_tracking_matrix <- color_matrix[, x[[n_last_element]]$consensus_tree$order]
# if only 2 clusters were specified, colour_tracking_matrix is not a matrix
# but a vector -> then the plot doesn't work -> transform
if (!is.matrix(colour_tracking_matrix) && n_last_element == 2) {
colour_tracking_matrix <- matrix(colour_tracking_matrix, nrow = 1)
}
cluster_tracking_plot(colour_tracking_matrix)

##############################################################################
# plot the item-consensus
##############################################################################

cluster_consensus <- rbind()
cci <- rbind()
sumx <- list()
colors_arr <- c()
old_par <- par(mfrow = c(3, 1), mar = c(4, 3, 2, 0))
on.exit(par(old_par))
# tk is the number of predefined clusters
for (tk in seq(from = 2, to = length(x), by = 1)) {
ei_cols <- c()
c_matrix <- x[[tk]][["consensus_matrix"]]
c_class <- x[[tk]][["consensus_class"]]

# for every subject (item), calculate the average consensus value with all
# subjects who are grouped into one cluster
# do this for every cluster
c_matrix <- triangle(c_matrix, mode = 2)
# for each cluster in tk/the predefined number of clusters
# e.g. for tk = 2, there should be 2 clusters and for both clusters the
# mean item consensus is calculated
for (cluster_i in sort(unique(c_class))) {
items <- which(c_class == cluster_i)
n_k <- length(items)
mk <- sum(c_matrix[items, items], na.rm = TRUE) / ((n_k * (n_k - 1)) / 2)
# cluster consensus
cluster_consensus <- rbind(cluster_consensus, c(tk, cluster_i, mk))
for (item_i in rev(x[[2]]$consensus_tree$order)) {
denom <- if (item_i %in% items) {
n_k - 1
}
else {
n_k
}
# mean item consensus to a cluster
mean_item_consensus <- sum(c(c_matrix[item_i, items],
c_matrix[items, item_i]),
na.rm = TRUE) / denom
# add a new row with cluster, cluster index, item index, item consensus
cci <- rbind(cci, c(tk, cluster_i, item_i, mean_item_consensus))
}
ei_cols <- c(ei_cols, rep(cluster_i, length(c_class)))
}
# only plot the new tk data
cck <- cci[which(cci[, 1] == tk), ]
# group by item, order by cluster i
w <- lapply(split(cck, cck[, 3]), function(x) {
y <- matrix(unlist(x), ncol = 4)
y[order(y[, 2]), 4]
})

# set up the matrix for plotting
q <- matrix(as.numeric(unlist(w)), ncol = length(w), byrow = FALSE)
# order by leave order of tk = 2
q <- q[, x[[2]]$consensus_tree$order]
# this results in q: a matrix of tk rows and sample columns, values are
# item consensus of sample to the cluster
# so for a defined possible number of clusters (tk), the values in the rows
# are the item consensus for the possible clusters

# it needs to be colorM[tk - 1, ] because the first element in
# colorM refers to tk (so for 2 clusters, the information is stored in the
# first entry and not in the second)
this_colors <- unique(cbind(x[[tk]]$consensus_class, color_matrix[tk - 1, ]))
this_colors <- this_colors[order(as.numeric(this_colors[, 1])), 2]
colors_arr <- c(colors_arr, this_colors)
ranked_bar_plot(item_consensus_matrix = q,
cluster_colors = this_colors,
item_order = c_class[x[[2]]$consensus_tree$order],
title = paste("k=", tk, sep = ""))
}

##############################################################################
# plot the cluster-consensus
##############################################################################

cluster_consensus_y <- cluster_color <- number_clusters_lab <- NULL
# bring the cluster consensus data into the correct format
previous_number_cluster <- cluster_consensus[1, 1]
for (i in seq_len(length(colors_arr))) {
# if the current number of predefined clusters (in the previous loops called
# tk) is not the same as the previous, then insert 0s as space between the
# different numbers of clusters on the x axis
if (previous_number_cluster != cluster_consensus[i, 1]) {
cluster_consensus_y <- c(cluster_consensus_y, 0, 0)
cluster_color <- c(cluster_color, NA, NA)
previous_number_cluster <- cluster_consensus[i, 1]
number_clusters_lab <- c(number_clusters_lab, NA, NA)
}
cluster_consensus_y <- c(cluster_consensus_y, cluster_consensus[i, 3])
cluster_color <- c(cluster_color, colors_arr[i])
number_clusters_lab <- c(number_clusters_lab, cluster_consensus[i, 1])
}
names(cluster_consensus_y) <- number_clusters_lab
# no need to store the parameters here, as the original mfrow and mar
# parameters are stored and restored on exit already earlier in this function
par(mfrow = c(3, 1), mar = c(4, 3, 2, 0))
barplot(cluster_consensus_y, col = cluster_color, border = cluster_color,
main = "cluster-consensus", ylim = c(0, 1), las = 1)
}

#' Try out different linkage methods
#'
#' In the final step, the consensus clustering performs a hierarchical clustering
Expand Down
Loading

0 comments on commit 44ef039

Please sign in to comment.