diff --git a/DESCRIPTION b/DESCRIPTION index 8c3c0b8..0cf963f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: longmixr Title: Longitudinal Consensus Clustering with 'flexmix' -Version: 1.0.0 +Version: 1.1.0 Authors@R: c(person(given = "Jonas", family = "Hagenberg", diff --git a/NEWS.md b/NEWS.md index 8c1b92c..3f3121a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# longmixr 1.1.0 + +* include the argument `which_plots` in the `plot.lcc` function so that one can + specify which plots should be plotted + # longmixr 1.0.0 This is the first version of the package (new release). diff --git a/R/helper_functions.R b/R/helper_functions.R index 185119f..157bcca 100644 --- a/R/helper_functions.R +++ b/R/helper_functions.R @@ -120,18 +120,23 @@ triangle <- function(input_matrix, #' #' @param matrix_list list of all consensus matrices #' @param breaks number of breaks +#' @param which_plots which plots should be plotted, can include \code{"CDF"} or +#' \code{"delta"} #' #' @importFrom graphics hist lines legend #' @importFrom grDevices rainbow #' -#' @return a CDF plot +#' @return a CDF plot and/or delta CDF plot #' @noRd CDF <- function(matrix_list, - breaks = 100) { - # set up the plot - plot(0, xlim = c(0, 1), ylim = c(0, 1), col = "white", - bg = "white", xlab = "consensus index", ylab = "CDF", - main = "consensus CDF", las = 2) + breaks = 100, + which_plots = c("CDF", "delta")) { + if ("CDF" %in% which_plots) { + # set up the plot + plot(0, xlim = c(0, 1), ylim = c(0, 1), col = "white", + bg = "white", xlab = "consensus index", ylab = "CDF", + main = "consensus CDF", las = 2) + } k <- length(matrix_list) this_colors <- rainbow(k - 1) @@ -150,12 +155,16 @@ CDF <- function(matrix_list, this_area <- this_area + h$counts[bi] * (h$breaks[bi + 1] - h$breaks[bi]) } area_k <- c(area_k, this_area) - # add the CDF to the plot - lines(h$mids, h$counts, col = this_colors[i - 1], lwd = 2, - type = "l") + if ("CDF" %in% which_plots) { + # add the CDF to the plot + lines(h$mids, h$counts, col = this_colors[i - 1], lwd = 2, + type = "l") + } + } + if ("CDF" %in% which_plots) { + legend(0.8, 0.5, legend = paste(rep("", k - 1), seq(2, k, by = 1), sep = ""), + fill = this_colors) } - legend(0.8, 0.5, legend = paste(rep("", k - 1), seq(2, k, by = 1), sep = ""), - fill = this_colors) # plot the area under the CDF change delta_k <- area_k[1] @@ -163,9 +172,11 @@ CDF <- function(matrix_list, # proportional increase relative to the previous k delta_k <- c(delta_k, (area_k[i] - area_k[i - 1]) / area_k[i - 1]) } - plot(1 + (1:length(delta_k)), y = delta_k, xlab = "k", - ylab = "relative change in area under CDF curve", - main = "Delta area", type = "b") + if ("delta" %in% which_plots) { + plot(1 + (1:length(delta_k)), y = delta_k, xlab = "k", + ylab = "relative change in area under CDF curve", + main = "Delta area", type = "b") + } } #' Assign colours to cluster assignments diff --git a/R/longitudinal_consensus_cluster.R b/R/longitudinal_consensus_cluster.R index de7bab8..df4773f 100644 --- a/R/longitudinal_consensus_cluster.R +++ b/R/longitudinal_consensus_cluster.R @@ -295,239 +295,6 @@ lcc_run <- function(data, found_number_clusters = found_number_clusters) } -#' Plot a longitudinal consensus clustering -#' -#' @param x \code{lcc} object (output from \code{\link{longitudinal_consensus_cluster}}) -#' @param color_palette optional character vector of colors for consensus matrix -#' @param ... additional parameters for plotting; currently not used -#' -#' @return Plots the following plots:\tabular{ll}{ -#' \code{consensus matrix legend} \tab the legend for the following consensus matrix plots \cr -#' \tab \cr -#' \code{consensus matrix plot} \tab for every specified number of clusters, a heatmap of the consensus matrix and the result of the final clustering is shown \cr -#' \tab \cr -#' \code{consensus CDF} \tab a line plot of the CDFs for all different specified numbers of clusters \cr -#' \tab \cr -#' \code{Delta area} \tab elbow plot of the difference in the CDFs between the different numbers of clusters \cr -#' \tab \cr -#' \code{tracking plot} \tab cluster assignment of the subjects throughout the different cluster solutions \cr -#' \tab \cr -#' \code{item-consensus} \tab for every item (subject), calculate the average consensus value with all items that are assigned to one consensus cluster. This is repeated for every cluster and for all different numbers of clusters \cr -#' \tab \cr -#' \code{cluster-consensus} \tab every bar represents the average pair-wise item-consensus within one consensus cluster -#' } -#' -#' @importFrom stats as.dendrogram heatmap median -#' @importFrom graphics barplot par -#' -#' @export -plot.lcc <- function(x, color_palette = NULL, ...) { - - checkmate::assert_class(x, "lcc") - checkmate::assert_character(color_palette, null.ok = TRUE) - - # set up the colour palette - color_list <- list() - color_matrix <- NULL - this_pal <- c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", - "#FB9A99", "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", - "#6A3D9A", "#FFFF99", "#B15928", "#bd18ea", "#2ef4ca", - "#f4cced", "#f4cc03", "#05188a", "#e5a25a", "#06f106", - "#85848f", "#000000", "#076f25", "#93cd7f", "#4d0776", - "#ffffff") - - # set up the plot scale - col_breaks <- NA - if (is.null(color_palette)) { - col_breaks <- 10 - color_palette <- my_pal(col_breaks) - } - else { - col_breaks <- length(color_palette) - } - - ############################################################################## - # plot the consensus matrices - ############################################################################## - - # plot the legend - sc <- cbind(seq(0, 1, by = 1 / col_breaks)) - rownames(sc) <- sc[, 1] - sc <- cbind(sc, sc) - heatmap(sc, - Colv = NA, - Rowv = NA, - symm = FALSE, - scale = "none", - col = color_palette, - na.rm = TRUE, - labRow = rownames(sc), - labCol = FALSE, - main = "consensus matrix legend") - - # plot the consensus matrices for every number of clusters - # for every cluster, calculate the correct colours for every observation - for (tk in seq(from = 2, to = length(x), by = 1)) { - - c_matrix <- x[[tk]][["consensus_matrix"]] - c_tree <- x[[tk]][["consensus_tree"]] - c_class <- x[[tk]][["consensus_class"]] - - found_flexmix_clusters <- x[[tk]][["found_flexmix_clusters"]] - median_found_flexmix_clusters <- median(found_flexmix_clusters) - - # for every cluster solution except the first define the previous consensus - # class so that the colours are assigned correctly across plots - if (tk == 2) { - previous_c_class <- NULL - } else { - previous_c_class <- x[[tk - 1]][["consensus_class"]] - } - color_list <- set_cluster_colors(previous_c_class, - c_class, - this_pal, - color_list) - - # row ordered matrix for plotting with additional row of 0s (as in the - # original ConsensusClusterPlus code) - plot_c_matrix <- rbind(c_matrix[c_tree$order, ], 0) - - heatmap(plot_c_matrix, - Colv = as.dendrogram(c_tree), - Rowv = NA, - symm = FALSE, - scale = "none", - col = color_palette, - na.rm = TRUE, - labRow = FALSE, - labCol = FALSE, - margins = c(5, 5), - main = paste("consensus matrix k=", tk, "; median flexmix clusters: ", - median_found_flexmix_clusters, sep = ""), - ColSideColors = color_list[[1]]) - legend("topright", legend = unique(c_class), fill = unique(color_list[[1]]), - horiz = FALSE) - - color_matrix <- rbind(color_matrix, color_list[[1]]) - } - - ############################################################################## - # plot the CDF, delta CDF and observation tracking plots - ############################################################################## - - CDF(x[["general_information"]][["consensus_matrices"]]) - n_last_element <- length(x) - colour_tracking_matrix <- color_matrix[, x[[n_last_element]]$consensus_tree$order] - # if only 2 clusters were specified, colour_tracking_matrix is not a matrix - # but a vector -> then the plot doesn't work -> transform - if (!is.matrix(colour_tracking_matrix) && n_last_element == 2) { - colour_tracking_matrix <- matrix(colour_tracking_matrix, nrow = 1) - } - cluster_tracking_plot(colour_tracking_matrix) - - ############################################################################## - # plot the item-consensus - ############################################################################## - - cluster_consensus <- rbind() - cci <- rbind() - sumx <- list() - colors_arr <- c() - old_par <- par(mfrow = c(3, 1), mar = c(4, 3, 2, 0)) - on.exit(par(old_par)) - # tk is the number of predefined clusters - for (tk in seq(from = 2, to = length(x), by = 1)) { - ei_cols <- c() - c_matrix <- x[[tk]][["consensus_matrix"]] - c_class <- x[[tk]][["consensus_class"]] - - # for every subject (item), calculate the average consensus value with all - # subjects who are grouped into one cluster - # do this for every cluster - c_matrix <- triangle(c_matrix, mode = 2) - # for each cluster in tk/the predefined number of clusters - # e.g. for tk = 2, there should be 2 clusters and for both clusters the - # mean item consensus is calculated - for (cluster_i in sort(unique(c_class))) { - items <- which(c_class == cluster_i) - n_k <- length(items) - mk <- sum(c_matrix[items, items], na.rm = TRUE) / ((n_k * (n_k - 1)) / 2) - # cluster consensus - cluster_consensus <- rbind(cluster_consensus, c(tk, cluster_i, mk)) - for (item_i in rev(x[[2]]$consensus_tree$order)) { - denom <- if (item_i %in% items) { - n_k - 1 - } - else { - n_k - } - # mean item consensus to a cluster - mean_item_consensus <- sum(c(c_matrix[item_i, items], - c_matrix[items, item_i]), - na.rm = TRUE) / denom - # add a new row with cluster, cluster index, item index, item consensus - cci <- rbind(cci, c(tk, cluster_i, item_i, mean_item_consensus)) - } - ei_cols <- c(ei_cols, rep(cluster_i, length(c_class))) - } - # only plot the new tk data - cck <- cci[which(cci[, 1] == tk), ] - # group by item, order by cluster i - w <- lapply(split(cck, cck[, 3]), function(x) { - y <- matrix(unlist(x), ncol = 4) - y[order(y[, 2]), 4] - }) - - # set up the matrix for plotting - q <- matrix(as.numeric(unlist(w)), ncol = length(w), byrow = FALSE) - # order by leave order of tk = 2 - q <- q[, x[[2]]$consensus_tree$order] - # this results in q: a matrix of tk rows and sample columns, values are - # item consensus of sample to the cluster - # so for a defined possible number of clusters (tk), the values in the rows - # are the item consensus for the possible clusters - - # it needs to be colorM[tk - 1, ] because the first element in - # colorM refers to tk (so for 2 clusters, the information is stored in the - # first entry and not in the second) - this_colors <- unique(cbind(x[[tk]]$consensus_class, color_matrix[tk - 1, ])) - this_colors <- this_colors[order(as.numeric(this_colors[, 1])), 2] - colors_arr <- c(colors_arr, this_colors) - ranked_bar_plot(item_consensus_matrix = q, - cluster_colors = this_colors, - item_order = c_class[x[[2]]$consensus_tree$order], - title = paste("k=", tk, sep = "")) - } - - ############################################################################## - # plot the cluster-consensus - ############################################################################## - - cluster_consensus_y <- cluster_color <- number_clusters_lab <- NULL - # bring the cluster consensus data into the correct format - previous_number_cluster <- cluster_consensus[1, 1] - for (i in seq_len(length(colors_arr))) { - # if the current number of predefined clusters (in the previous loops called - # tk) is not the same as the previous, then insert 0s as space between the - # different numbers of clusters on the x axis - if (previous_number_cluster != cluster_consensus[i, 1]) { - cluster_consensus_y <- c(cluster_consensus_y, 0, 0) - cluster_color <- c(cluster_color, NA, NA) - previous_number_cluster <- cluster_consensus[i, 1] - number_clusters_lab <- c(number_clusters_lab, NA, NA) - } - cluster_consensus_y <- c(cluster_consensus_y, cluster_consensus[i, 3]) - cluster_color <- c(cluster_color, colors_arr[i]) - number_clusters_lab <- c(number_clusters_lab, cluster_consensus[i, 1]) - } - names(cluster_consensus_y) <- number_clusters_lab - # no need to store the parameters here, as the original mfrow and mar - # parameters are stored and restored on exit already earlier in this function - par(mfrow = c(3, 1), mar = c(4, 3, 2, 0)) - barplot(cluster_consensus_y, col = cluster_color, border = cluster_color, - main = "cluster-consensus", ylim = c(0, 1), las = 1) -} - #' Try out different linkage methods #' #' In the final step, the consensus clustering performs a hierarchical clustering diff --git a/R/plot.lcc.R b/R/plot.lcc.R new file mode 100644 index 0000000..d66f752 --- /dev/null +++ b/R/plot.lcc.R @@ -0,0 +1,278 @@ +#' Plot a longitudinal consensus clustering +#' +#' @param x \code{lcc} object (output from \code{\link{longitudinal_consensus_cluster}}) +#' @param color_palette optional character vector of colors for consensus matrix +#' @param which_plots determine which plots should be plotted; the default is \code{"all"}. +#' Alternatively, a combination of the following values can be specified to plot +#' only some of the below mentioned plots: \code{"consensusmatrix_legend"}, +#' \code{"consensusmatrix_x"} where \code{x} is replaced by the corresponding number +#' of clusters, \code{"CDF"}, \code{"delta"}, \code{"cluster_tracking"}, +#' \code{"item_consensus"} or \code{"cluster_consensus"}. When you want to plot +#' all consensus matrices and the legend, you can just use \code{"consensusmatrix"}. +#' @param ... additional parameters for plotting; currently not used +#' +#' @return Plots the following plots (when selected):\tabular{ll}{ +#' \code{consensus matrix legend} \tab the legend for the following consensus matrix plots (select with \code{"consensusmatrix_legend"}) \cr +#' \tab \cr +#' \code{consensus matrix plot} \tab for every specified number of clusters, a heatmap of the consensus matrix and the result of the final clustering is shown (select with \code{"consensusmatrix_x"} where \code{x} is replaced by the corresponding number +#' of clusters) \cr +#' \tab \cr +#' \code{consensus CDF} \tab a line plot of the CDFs for all different specified numbers of clusters (select with \code{"CDF"})\cr +#' \tab \cr +#' \code{Delta area} \tab elbow plot of the difference in the CDFs between the different numbers of clusters (select with \code{"delta"}) \cr +#' \tab \cr +#' \code{tracking plot} \tab cluster assignment of the subjects throughout the different cluster solutions (select with \code{"cluster_tracking"}) \cr +#' \tab \cr +#' \code{item-consensus} \tab for every item (subject), calculate the average consensus value with all items that are assigned to one consensus cluster. This is repeated for every cluster and for all different numbers of clusters (select with \code{"item_consensus"}) \cr +#' \tab \cr +#' \code{cluster-consensus} \tab every bar represents the average pair-wise item-consensus within one consensus cluster (select with \code{"cluster_consensus"}) +#' } +#' +#' @importFrom stats as.dendrogram heatmap median +#' @importFrom graphics barplot par +#' +#' @export +plot.lcc <- function(x, + color_palette = NULL, + which_plots = "all", + ...) { + + checkmate::assert_class(x, "lcc") + checkmate::assert_character(color_palette, null.ok = TRUE) + + # determine the possible consensus matrices + possible_consensusmatrix <- paste0("consensusmatrix_", + seq(from = 2, to = length(x), by = 1)) + possible_plots <- c("all", "consensusmatrix_legend", "consensusmatrix", + possible_consensusmatrix, "CDF", "delta", + "cluster_tracking", "item_consensus", "cluster_consensus") + + checkmate::assert_character(which_plots) + if (!all(which_plots %in% possible_plots)) { + stop(paste0("which_plot must be one of ", + paste0(possible_plots, collapse = ", "), ".")) + } + + # if "all" was selected for the plots, set which_plot to all possible plots, + # so that I don't need to include "all" in every if condition when checking + # which plots to plot + if ("all" %in% which_plots) { + which_plots <- possible_plots + } + + # set up the colour palette + color_list <- list() + color_matrix <- NULL + this_pal <- c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", + "#FB9A99", "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", + "#6A3D9A", "#FFFF99", "#B15928", "#bd18ea", "#2ef4ca", + "#f4cced", "#f4cc03", "#05188a", "#e5a25a", "#06f106", + "#85848f", "#000000", "#076f25", "#93cd7f", "#4d0776", + "#ffffff") + + # set up the plot scale + col_breaks <- NA + if (is.null(color_palette)) { + col_breaks <- 10 + color_palette <- my_pal(col_breaks) + } + else { + col_breaks <- length(color_palette) + } + + ############################################################################## + # plot the consensus matrices + ############################################################################## + + # plot the legend + sc <- cbind(seq(0, 1, by = 1 / col_breaks)) + rownames(sc) <- sc[, 1] + sc <- cbind(sc, sc) + if ("consensusmatrix_legend" %in% which_plots || + "consensusmatrix" %in% which_plots) { + heatmap(sc, + Colv = NA, + Rowv = NA, + symm = FALSE, + scale = "none", + col = color_palette, + na.rm = TRUE, + labRow = rownames(sc), + labCol = FALSE, + main = "consensus matrix legend") + } + + + # plot the consensus matrices for every number of clusters + # for every cluster, calculate the correct colours for every observation + for (tk in seq(from = 2, to = length(x), by = 1)) { + + c_matrix <- x[[tk]][["consensus_matrix"]] + c_tree <- x[[tk]][["consensus_tree"]] + c_class <- x[[tk]][["consensus_class"]] + + found_flexmix_clusters <- x[[tk]][["found_flexmix_clusters"]] + median_found_flexmix_clusters <- median(found_flexmix_clusters) + + # for every cluster solution except the first define the previous consensus + # class so that the colours are assigned correctly across plots + if (tk == 2) { + previous_c_class <- NULL + } else { + previous_c_class <- x[[tk - 1]][["consensus_class"]] + } + color_list <- set_cluster_colors(previous_c_class, + c_class, + this_pal, + color_list) + + # row ordered matrix for plotting with additional row of 0s (as in the + # original ConsensusClusterPlus code) + plot_c_matrix <- rbind(c_matrix[c_tree$order, ], 0) + + if (paste0("consensusmatrix_", tk) %in% which_plots || + "consensusmatrix" %in% which_plots) { + heatmap(plot_c_matrix, + Colv = as.dendrogram(c_tree), + Rowv = NA, + symm = FALSE, + scale = "none", + col = color_palette, + na.rm = TRUE, + labRow = FALSE, + labCol = FALSE, + margins = c(5, 5), + main = paste("consensus matrix k=", tk, "; median flexmix clusters: ", + median_found_flexmix_clusters, sep = ""), + ColSideColors = color_list[[1]]) + legend("topright", legend = unique(c_class), fill = unique(color_list[[1]]), + horiz = FALSE) + } + color_matrix <- rbind(color_matrix, color_list[[1]]) + } + + ############################################################################## + # plot the CDF, delta CDF and observation tracking plots + ############################################################################## + if ("CDF" %in% which_plots || "delta" %in% which_plots) { + CDF(x[["general_information"]][["consensus_matrices"]], + which_plots = which_plots) + } + + n_last_element <- length(x) + colour_tracking_matrix <- color_matrix[, x[[n_last_element]]$consensus_tree$order] + # if only 2 clusters were specified, colour_tracking_matrix is not a matrix + # but a vector -> then the plot doesn't work -> transform + if (!is.matrix(colour_tracking_matrix) && n_last_element == 2) { + colour_tracking_matrix <- matrix(colour_tracking_matrix, nrow = 1) + } + if ("cluster_tracking" %in% which_plots) { + cluster_tracking_plot(colour_tracking_matrix) + } + + ############################################################################## + # plot the item-consensus + ############################################################################## + + cluster_consensus <- rbind() + cci <- rbind() + sumx <- list() + colors_arr <- c() + old_par <- par(mfrow = c(3, 1), mar = c(4, 3, 2, 0)) + on.exit(par(old_par)) + # tk is the number of predefined clusters + for (tk in seq(from = 2, to = length(x), by = 1)) { + ei_cols <- c() + c_matrix <- x[[tk]][["consensus_matrix"]] + c_class <- x[[tk]][["consensus_class"]] + + # for every subject (item), calculate the average consensus value with all + # subjects who are grouped into one cluster + # do this for every cluster + c_matrix <- triangle(c_matrix, mode = 2) + # for each cluster in tk/the predefined number of clusters + # e.g. for tk = 2, there should be 2 clusters and for both clusters the + # mean item consensus is calculated + for (cluster_i in sort(unique(c_class))) { + items <- which(c_class == cluster_i) + n_k <- length(items) + mk <- sum(c_matrix[items, items], na.rm = TRUE) / ((n_k * (n_k - 1)) / 2) + # cluster consensus + cluster_consensus <- rbind(cluster_consensus, c(tk, cluster_i, mk)) + for (item_i in rev(x[[2]]$consensus_tree$order)) { + denom <- if (item_i %in% items) { + n_k - 1 + } + else { + n_k + } + # mean item consensus to a cluster + mean_item_consensus <- sum(c(c_matrix[item_i, items], + c_matrix[items, item_i]), + na.rm = TRUE) / denom + # add a new row with cluster, cluster index, item index, item consensus + cci <- rbind(cci, c(tk, cluster_i, item_i, mean_item_consensus)) + } + ei_cols <- c(ei_cols, rep(cluster_i, length(c_class))) + } + # only plot the new tk data + cck <- cci[which(cci[, 1] == tk), ] + # group by item, order by cluster i + w <- lapply(split(cck, cck[, 3]), function(x) { + y <- matrix(unlist(x), ncol = 4) + y[order(y[, 2]), 4] + }) + + # set up the matrix for plotting + q <- matrix(as.numeric(unlist(w)), ncol = length(w), byrow = FALSE) + # order by leave order of tk = 2 + q <- q[, x[[2]]$consensus_tree$order] + # this results in q: a matrix of tk rows and sample columns, values are + # item consensus of sample to the cluster + # so for a defined possible number of clusters (tk), the values in the rows + # are the item consensus for the possible clusters + + # it needs to be colorM[tk - 1, ] because the first element in + # colorM refers to tk (so for 2 clusters, the information is stored in the + # first entry and not in the second) + this_colors <- unique(cbind(x[[tk]]$consensus_class, color_matrix[tk - 1, ])) + this_colors <- this_colors[order(as.numeric(this_colors[, 1])), 2] + colors_arr <- c(colors_arr, this_colors) + if ("item_consensus" %in% which_plots) { + ranked_bar_plot(item_consensus_matrix = q, + cluster_colors = this_colors, + item_order = c_class[x[[2]]$consensus_tree$order], + title = paste("k=", tk, sep = "")) + } + } + + ############################################################################## + # plot the cluster-consensus + ############################################################################## + + cluster_consensus_y <- cluster_color <- number_clusters_lab <- NULL + # bring the cluster consensus data into the correct format + previous_number_cluster <- cluster_consensus[1, 1] + for (i in seq_len(length(colors_arr))) { + # if the current number of predefined clusters (in the previous loops called + # tk) is not the same as the previous, then insert 0s as space between the + # different numbers of clusters on the x axis + if (previous_number_cluster != cluster_consensus[i, 1]) { + cluster_consensus_y <- c(cluster_consensus_y, 0, 0) + cluster_color <- c(cluster_color, NA, NA) + previous_number_cluster <- cluster_consensus[i, 1] + number_clusters_lab <- c(number_clusters_lab, NA, NA) + } + cluster_consensus_y <- c(cluster_consensus_y, cluster_consensus[i, 3]) + cluster_color <- c(cluster_color, colors_arr[i]) + number_clusters_lab <- c(number_clusters_lab, cluster_consensus[i, 1]) + } + names(cluster_consensus_y) <- number_clusters_lab + # no need to store the parameters here, as the original mfrow and mar + # parameters are stored and restored on exit already earlier in this function + par(mfrow = c(3, 1), mar = c(4, 3, 2, 0)) + if ("cluster_consensus" %in% which_plots) { + barplot(cluster_consensus_y, col = cluster_color, border = cluster_color, + main = "cluster-consensus", ylim = c(0, 1), las = 1) + } +} diff --git a/README.md b/README.md index 2501da4..aa16a15 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,14 @@ The above mentioned plots are generated when calling the `plot` function: plot(clustering) ``` +You can also select which plots you want to generate: + +``` r +plot(clustering, which_plots = "consensusmatrix_2") +``` + +![consensus matrix plot showing two clusters cleanly separated, one bigger than the other](man/figures/example_consensusmatrix_2.png) + ### Detailed explanation For a detailed explanation how you can use `longmixr` to analyze your longitudinal data, check out the diff --git a/man/figures/example_consensusmatrix_2.png b/man/figures/example_consensusmatrix_2.png new file mode 100644 index 0000000..c6ff9fc Binary files /dev/null and b/man/figures/example_consensusmatrix_2.png differ diff --git a/man/plot.lcc.Rd b/man/plot.lcc.Rd index cb1e104..473f37f 100644 --- a/man/plot.lcc.Rd +++ b/man/plot.lcc.Rd @@ -1,33 +1,42 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/longitudinal_consensus_cluster.R +% Please edit documentation in R/plot.lcc.R \name{plot.lcc} \alias{plot.lcc} \title{Plot a longitudinal consensus clustering} \usage{ -\method{plot}{lcc}(x, color_palette = NULL, ...) +\method{plot}{lcc}(x, color_palette = NULL, which_plots = "all", ...) } \arguments{ \item{x}{\code{lcc} object (output from \code{\link{longitudinal_consensus_cluster}})} \item{color_palette}{optional character vector of colors for consensus matrix} +\item{which_plots}{determine which plots should be plotted; the default is \code{"all"}. +Alternatively, a combination of the following values can be specified to plot +only certain plots of the below mentioned plots: \code{"consensusmatrix_legend"}, +\code{"consensusmatrix_x"} where \code{x} is replaced by the corresponding number +of clusters, \code{"CDF"}, \code{"delta"}, \code{"cluster_tracking"}, +\code{"item_consensus"} or \code{"cluster_consensus"}. When you want to plot +all consensus matrices and the legend, you can just use \code{"consensusmatrix"}.} + \item{...}{additional parameters for plotting; currently not used} } \value{ -Plots the following plots:\tabular{ll}{ -\code{consensus matrix legend} \tab the legend for the following consensus matrix plots \cr +Plots the following plots (when selected):\tabular{ll}{ +\code{consensus matrix legend} \tab the legend for the following consensus matrix plots (select with \code{"consensusmatrix_legend"}) \cr \tab \cr -\code{consensus matrix plot} \tab for every specified number of clusters, a heatmap of the consensus matrix and the result of the final clustering is shown \cr +\code{consensus matrix plot} \tab for every specified number of clusters, a heatmap of the consensus matrix and the result of the final clustering is shown (select with \code{"consensusmatrix_x"} where \code{x} is replaced by the corresponding number +of clusters) \cr \tab \cr -\code{consensus CDF} \tab a line plot of the CDFs for all different specified numbers of clusters \cr +\code{consensus CDF} \tab a line plot of the CDFs for all different specified numbers of clusters (select with \code{"CDF"})\cr \tab \cr -\code{Delta area} \tab elbow plot of the difference in the CDFs between the different numbers of clusters \cr +\code{Delta area} \tab elbow plot of the difference in the CDFs between the different numbers of clusters (select with \code{"delta"}) \cr \tab \cr -\code{tracking plot} \tab cluster assignment of the subjects throughout the different cluster solutions \cr +\code{tracking plot} \tab cluster assignment of the subjects throughout the different cluster solutions (select with \code{"cluster_tracking"}) \cr \tab \cr -\code{item-consensus} \tab for every item (subject), calculate the average consensus value with all items that are assigned to one consensus cluster. This is repeated for every cluster and for all different numbers of clusters \cr +\code{item-consensus} \tab for every item (subject), calculate the average consensus value with all items that are assigned to one consensus cluster. This is repeated for every cluster and for all different numbers of clusters (select with \code{"item_consensus"}) \cr \tab \cr -\code{cluster-consensus} \tab every bar represents the average pair-wise item-consensus within one consensus cluster +\code{cluster-consensus} \tab every bar represents the average pair-wise item-consensus within one consensus cluster (select with \code{"cluster_consensus"}) } } \description{ diff --git a/tests/testthat/_snaps/lcc_plot/plot_lcc_output.png b/tests/testthat/_snaps/lcc_plot/plot_lcc_output.png index 83b7356..6cd7ad2 100644 Binary files a/tests/testthat/_snaps/lcc_plot/plot_lcc_output.png and b/tests/testthat/_snaps/lcc_plot/plot_lcc_output.png differ diff --git a/tests/testthat/_snaps/lcc_plot/plot_lcc_output_cdf_cluster_tracking.png b/tests/testthat/_snaps/lcc_plot/plot_lcc_output_cdf_cluster_tracking.png new file mode 100644 index 0000000..ba2df22 Binary files /dev/null and b/tests/testthat/_snaps/lcc_plot/plot_lcc_output_cdf_cluster_tracking.png differ diff --git a/tests/testthat/_snaps/lcc_plot/plot_lcc_output_consensusmatrix.png b/tests/testthat/_snaps/lcc_plot/plot_lcc_output_consensusmatrix.png new file mode 100644 index 0000000..dd721e8 Binary files /dev/null and b/tests/testthat/_snaps/lcc_plot/plot_lcc_output_consensusmatrix.png differ diff --git a/tests/testthat/_snaps/lcc_plot/plot_lcc_output_delta.png b/tests/testthat/_snaps/lcc_plot/plot_lcc_output_delta.png new file mode 100644 index 0000000..fd48917 Binary files /dev/null and b/tests/testthat/_snaps/lcc_plot/plot_lcc_output_delta.png differ diff --git a/tests/testthat/test_lcc_plot.R b/tests/testthat/test_lcc_plot.R index 128c269..3b964b3 100644 --- a/tests/testthat/test_lcc_plot.R +++ b/tests/testthat/test_lcc_plot.R @@ -20,10 +20,10 @@ clustering <- longitudinal_consensus_cluster( # helper function to save the plots # because plot.lcc outputs several plots at once, I store them all in one big # png and compare this png -save_png <- function(code, width = 400, height = 3200) { +save_png <- function(code, width = 800, height = 800) { path <- tempfile(fileext = ".png") png(path, width = width, height = height) - op <- par(mfrow = c(8, 1)) + op <- par(mfrow = c(4, 2)) on.exit(dev.off()) on.exit(par(op), add = TRUE) code @@ -41,6 +41,8 @@ test_that("a plot is generated in the CI", { test_that("the plots stay the same", { skip_on_ci() + # because plot.lcc changes the graphics parameters, currently only the last + # plot is recorded. Therefore, I can only check the last plot. expect_snapshot_file(save_png(plot(clustering)), "plot_lcc_output.png") }) @@ -55,3 +57,19 @@ test_that("the color_palette argument is correct", { expect_silent(plot(clustering, color_palette = c("#9999FF", "#7F7FFF", "#6666FF"))) }) + +test_that("the which_plots argument is correct", { + expect_error(plot(clustering, which_plots = 3)) + expect_error(plot(clustering, which_plots = "consensusmatrix_4"), + regexp = "which_plot must be one of all, consensusmatrix_legend, consensusmatrix, consensusmatrix_2, consensusmatrix_3, CDF, delta, cluster_tracking, item_consensus, cluster_consensus.") + skip_on_ci() + # because plot.lcc changes the graphics parameters, currently only the last + # plot is recorded. Therefore, I can only check the last plot for the + # consensus plots, even though I specified several plots. + expect_snapshot_file(save_png(plot(clustering, which_plots = "consensusmatrix")), + "plot_lcc_output_consensusmatrix.png") + expect_snapshot_file(save_png(plot(clustering, which_plots = "delta")), + "plot_lcc_output_delta.png") + expect_snapshot_file(save_png(plot(clustering, which_plots = c("CDF", "cluster_tracking"))), + "plot_lcc_output_cdf_cluster_tracking.png") +}) diff --git a/vignettes/analysis_workflow.Rmd b/vignettes/analysis_workflow.Rmd index 7aa51f6..0602c42 100644 --- a/vignettes/analysis_workflow.Rmd +++ b/vignettes/analysis_workflow.Rmd @@ -432,6 +432,9 @@ the mean item-consensus is shown for cluster 2 and cluster 3. In general, for a given subject you want to have a high item-consensus for one cluster and low item-consensus for all other clusters. +If you only want to plot certain plots and not all, you can select them with the +`which_plots` argument. For more information, see `help(plot.lcc)`. + In the example, the item-consensus plots suggest a two cluster solution fits the data best.