diff --git a/R/circular_CN_plot.R b/R/circular_CN_plot.R index 58a4b31..f694d6c 100644 --- a/R/circular_CN_plot.R +++ b/R/circular_CN_plot.R @@ -3,9 +3,17 @@ #' #' @param pretty_CN_heatmap_output #' @param ideogram -#' @param labelTheseGenes +#' @param track_height Change this to increase/decrease the height of the tracks. (0.1) +#' @param labelTheseGenes Specify a vector of gene names to label in the plot +#' @param del_col Optionally specify a different colour to use for the CNV deletion track +#' @param gain_col Optionally specify a different colour to use for the CNV gain track +#' @param calculate_correlations Experimental! Calculate the correlation between CNVs between different chromosomes and link highly correlated regions +#' @param min_correlation Minimum correlation to consider when plotting links +#' @param max_neg_correlation Maximum negative value for correlations <1 to consider when plotting links +#' @param link_transparency Specify a different alpha to increase or decrease the transparency of links +#' #' -#' @return +#' @return Nothing #' @export #' #' @examples @@ -22,7 +30,6 @@ circular_CN_plot = function(pretty_CN_heatmap_output, del_col="#0000FF80", gain_col="#FF000080", calculate_correlations = FALSE, - return_correlations = FALSE, link_transparency=0.8, labelTheseGenes = c("CD58","TLR2", "MCL1","CDKN2A", @@ -134,9 +141,6 @@ circular_CN_plot = function(pretty_CN_heatmap_output, - #return(list(bed1,bed2,correlations)) - #Heatmap(correlations,row_split=bin_chroms, - # column_split = bin_chroms,cluster_rows=F,cluster_columns=F,show_row_names = F,show_column_names = F) chroms_u = unique(bin_chroms) xlims = matrix(ncol=2,nrow=length(chroms_u)) rownames(xlims)=chroms_u @@ -294,12 +298,23 @@ circular_CN_plot = function(pretty_CN_heatmap_output, #' Categorize arm-level and chromosomal CNV events #' -#' @param pretty_CN_heatmap_output +#' @param pretty_CN_heatmap_output The output from running the pretty_CN_heatmap function #' -#' @return +#' @return List of data frames #' @export #' #' @examples +#' +#' cn_out = pretty_CN_heatmap(cn_state_matrix=all_states_binned, +#' scale_by_sample = T, +#' these_samples_metadata = all_genome_meta, +#' metadataColumns = c("pathology","seq_type"), +#' return_data = T) +#' +#' aneuploidies = categorize_CN_events(cn_out) +#' +#' +#' categorize_CN_events = function(pretty_CN_heatmap_output){ CN_mat = pretty_CN_heatmap_output$data labels = pretty_CN_heatmap_output$labels diff --git a/R/pretty_CN_heatmap.R b/R/pretty_CN_heatmap.R index 6046f8a..544909c 100644 --- a/R/pretty_CN_heatmap.R +++ b/R/pretty_CN_heatmap.R @@ -17,7 +17,7 @@ #' @param cluster_columns Set to TRUE to enable clustering of genomic regions (columns) based on their CN value across all patients in the heatmap #' @param cluster_rows Set to TRUE to enable clustering of genomic regions (columns) based on their CN value across all regions in the heatmap #' @param sortByBins Optional: A vector containing one or more names of genomic bins that will be used to order the heatmap rows. -#' @param sortByPGA +#' @param sortByPGA Optional: Sort the rows based on percent genome altered (PGA) instead of the other options #' @param splitByBinState Optional: A single genomic bin that will be used to split the heatmap based on the CN state of that bin #' @param sortByMetadataColumns A vector containing one or more names of columns from your metadata that will be used to order the rows overall or within slices (if combined with splitByBinState or geneBoxPlot) #' @param labelTheseGenes A vector of Hugo gene symbols whose location will be indicated on the top of the heatmap diff --git a/R/pretty_circular_mutation_frequency_heatmap.R b/R/pretty_circular_mutation_frequency_heatmap.R index 2905338..6db6db0 100644 --- a/R/pretty_circular_mutation_frequency_heatmap.R +++ b/R/pretty_circular_mutation_frequency_heatmap.R @@ -1,11 +1,13 @@ #' pretty_circular_mutation_frequency_heatmap #' -#' @param prettyOncoplot_output -#' @param genes -#' @param keep_these_pathologies -#' @param min_sample_num +#' @param prettyOncoplot_output The output of the prettyOncoplot function +#' @param cn_status_matrix The output of get_cn_states +#' @param collated_results A list of data frames with sample_id as rownames and features as column names +#' @param genes A vector of genes to label +#' @param keep_these_pathologies A vector of pathology values to show in the plot. All the remaining rows will be ignored. +#' @param min_sample_num Minimum number of samples in a pathology to be considered for the plot. Pathologies with less than this number will be excluded. (20) #' -#' @return +#' @return Nothing or a list of data frames (when return_data = TRUE) #' @export #' #' @examples diff --git a/man/categorize_CN_events.Rd b/man/categorize_CN_events.Rd index 91c6d97..bc83b65 100644 --- a/man/categorize_CN_events.Rd +++ b/man/categorize_CN_events.Rd @@ -7,8 +7,24 @@ categorize_CN_events(pretty_CN_heatmap_output) } \arguments{ -\item{pretty_CN_heatmap_output}{} +\item{pretty_CN_heatmap_output}{The output from running the pretty_CN_heatmap function} +} +\value{ +List of data frames } \description{ Categorize arm-level and chromosomal CNV events +} +\examples{ + +cn_out = pretty_CN_heatmap(cn_state_matrix=all_states_binned, + scale_by_sample = T, + these_samples_metadata = all_genome_meta, + metadataColumns = c("pathology","seq_type"), + return_data = T) + +aneuploidies = categorize_CN_events(cn_out) + + + } diff --git a/man/circular_CN_plot.Rd b/man/circular_CN_plot.Rd index c14d59e..260a05b 100644 --- a/man/circular_CN_plot.Rd +++ b/man/circular_CN_plot.Rd @@ -13,7 +13,6 @@ circular_CN_plot( del_col = "#0000FF80", gain_col = "#FF000080", calculate_correlations = FALSE, - return_correlations = FALSE, link_transparency = 0.8, labelTheseGenes = c("CD58", "TLR2", "MCL1", "CDKN2A", "TMEM30A", "RHOA", "B2M", "PTEN", "FAS", "ETV6", "GRB2", "FCGR2B", "CCND3", "CUX1", "MIR17HG", "TFPT", "CD274", "JAK2", @@ -26,7 +25,24 @@ circular_CN_plot( \item{ideogram}{} -\item{labelTheseGenes}{} +\item{track_height}{Change this to increase/decrease the height of the tracks. (0.1)} + +\item{min_correlation}{Minimum correlation to consider when plotting links} + +\item{max_neg_correlation}{Maximum negative value for correlations <1 to consider when plotting links} + +\item{del_col}{Optionally specify a different colour to use for the CNV deletion track} + +\item{gain_col}{Optionally specify a different colour to use for the CNV gain track} + +\item{calculate_correlations}{Experimental! Calculate the correlation between CNVs between different chromosomes and link highly correlated regions} + +\item{link_transparency}{Specify a different alpha to increase or decrease the transparency of links} + +\item{labelTheseGenes}{Specify a vector of gene names to label in the plot} +} +\value{ +Nothing } \description{ Circular overview of copy number data across a cohort diff --git a/man/pretty_CN_heatmap.Rd b/man/pretty_CN_heatmap.Rd index bb45fb7..87f0de1 100644 --- a/man/pretty_CN_heatmap.Rd +++ b/man/pretty_CN_heatmap.Rd @@ -74,6 +74,8 @@ pretty_CN_heatmap( \item{splitByBinState}{Optional: A single genomic bin that will be used to split the heatmap based on the CN state of that bin} +\item{sortByPGA}{Optional: Sort the rows based on percent genome altered (PGA) instead of the other options} + \item{sortByMetadataColumns}{A vector containing one or more names of columns from your metadata that will be used to order the rows overall or within slices (if combined with splitByBinState or geneBoxPlot)} \item{labelTheseGenes}{A vector of Hugo gene symbols whose location will be indicated on the top of the heatmap} @@ -120,10 +122,10 @@ Pretty Copy Number Heatmap #get some metadata for subsetting the data to just one pathology (DLBCL) -dlbcl_genome_meta = get_gambl_metadata() \%>\% +dlbcl_genome_meta = get_gambl_metadata() \%>\% filter(pathology=="DLBCL", seq_type=="genome") - + # Create the copy number matrix using the helper functions all_segments = get_cn_segments() all_states_binned = get_cn_states(n_bins_split=2500, @@ -137,7 +139,7 @@ pretty_CN_heatmap(cn_state_matrix=all_states_binned, these_samples_metadata = dlbcl_genome_meta, hide_annotations = "chromosome") -# Disable row (sample) clustering and restrict to a few chromosomes +# Disable row (sample) clustering and restrict to a few chromosomes # and highlight some genes of interest pretty_CN_heatmap(cn_state_matrix=all_states_binned, these_samples_metadata = dlbcl_genome_meta, @@ -150,11 +152,11 @@ pretty_CN_heatmap(cn_state_matrix=all_states_binned, # get gene expression data gene_exp_all = get_gene_expression(all_genes=T,lazy_join=T,arbitrarily_pick = T,HGNC=T,format="wide") -genome_meta_exp = left_join(get_gambl_metadata() \%>\% - filter(seq_type=="genome") \%>\% +genome_meta_exp = left_join(get_gambl_metadata() \%>\% + filter(seq_type=="genome") \%>\% select(sample_id,pathology,lymphgen), select(gene_exp_all,-sample_id), - by=c("sample_id"="genome_sample_id")) \%>\% + by=c("sample_id"="genome_sample_id")) \%>\% filter(!is.na(MYC)) # Include gene expression data and embed a box plot showing the expression of one gene across different CN states diff --git a/man/pretty_circular_mutation_frequency_heatmap.Rd b/man/pretty_circular_mutation_frequency_heatmap.Rd index b59d793..25031ab 100644 --- a/man/pretty_circular_mutation_frequency_heatmap.Rd +++ b/man/pretty_circular_mutation_frequency_heatmap.Rd @@ -32,29 +32,36 @@ pretty_circular_mutation_frequency_heatmap( ) } \arguments{ -\item{prettyOncoplot_output}{} +\item{prettyOncoplot_output}{The output of the prettyOncoplot function} -\item{genes}{} +\item{cn_status_matrix}{The output of get_cn_states} -\item{keep_these_pathologies}{} +\item{collated_results}{A list of data frames with sample_id as rownames and features as column names} -\item{min_sample_num}{} +\item{genes}{A vector of genes to label} + +\item{keep_these_pathologies}{A vector of pathology values to show in the plot. All the remaining rows will be ignored.} + +\item{min_sample_num}{Minimum number of samples in a pathology to be considered for the plot. Pathologies with less than this number will be excluded. (20)} +} +\value{ +Nothing or a list of data frames (when return_data = TRUE) } \description{ pretty_circular_mutation_frequency_heatmap } \examples{ -all_gambl_meta = get_gambl_metadata() \%>\% +all_gambl_meta = get_gambl_metadata() \%>\% filter(!seq_type == "mrna") \%>\% filter(pathology \%in\% names(get_gambl_colours("pathology"))) - + all_coding = get_all_coding_ssm(these_samples_metadata = all_gambl_meta) -genes = filter(GAMBLR.data::lymphoma_genes_dlbcl_v_latest,curated==TRUE) \%>\% +genes = filter(GAMBLR.data::lymphoma_genes_dlbcl_v_latest,curated==TRUE) \%>\% pull(Gene) genes = unique(c(genes,filter(GAMBLR.data::lymphoma_genes_mcl_v_latest,,curated==TRUE) \%>\% pull(Gene))) -genes = unique(c(genes,filter(GAMBLR.data::lymphoma_genes_bl_v_latest,,curated==TRUE) \%>\% pull(Gene))) +genes = unique(c(genes,filter(GAMBLR.data::lymphoma_genes_bl_v_latest,,curated==TRUE) \%>\% pull(Gene))) oncoplot_output = prettyOncoplot(all_coding, genes=genes, minMutationPercent = 2, @@ -68,7 +75,7 @@ pretty_circular_mutation_frequency_heatmap(prettyOncoplot_output = oncoplot_outp "BL", "HGBL")) -genes_and_cn_threshs = +genes_and_cn_threshs = data.frame(gene_id=c("MYC", "MIR17HG", "TNFAIP3","TCF4", "TNFRSF14","REL","CD274", @@ -85,22 +92,22 @@ cn_status = get_cnv_and_ssm_status(only_cnv="all", these_samples_metadata = all_gambl_meta, genes_and_cn_threshs = genes_and_cn_threshs, adjust_for_ploidy=T) - - + + pretty_circular_mutation_frequency_heatmap(cn_status_matrix = cn_status, prettyOncoplot_output = oncoplot_output) -sv_collated = GAMBLR.results:::collate_sv_results() \%>\% +sv_collated = GAMBLR.results:::collate_sv_results() \%>\% select(sample_id,ends_with("sv")) -NFKBIZ_genome = GAMBLR.results:::collate_nfkbiz_results() \%>\% +NFKBIZ_genome = GAMBLR.results:::collate_nfkbiz_results() \%>\% select(sample_id,NFKBIZ_UTR) -NFKBIZ_capture = GAMBLR.results:::collate_nfkbiz_results(seq_type_filter="capture") \%>\% +NFKBIZ_capture = GAMBLR.results:::collate_nfkbiz_results(seq_type_filter="capture") \%>\% select(sample_id,NFKBIZ_UTR) -HNRNPH1_genome = GAMBLR.results:::collate_hnrnph1_mutations() \%>\% +HNRNPH1_genome = GAMBLR.results:::collate_hnrnph1_mutations() \%>\% select(sample_id,HNRNPH1_splice) -HNRNPH1_capture = GAMBLR.results:::collate_hnrnph1_mutations(seq_type_filter="capture") \%>\% +HNRNPH1_capture = GAMBLR.results:::collate_hnrnph1_mutations(seq_type_filter="capture") \%>\% select(sample_id,HNRNPH1_splice) NFKBIZ = bind_rows(NFKBIZ_genome,NFKBIZ_capture) @@ -112,17 +119,17 @@ pretty_circular_mutation_frequency_heatmap(cn_status_matrix = cn_status, HNRNPH1), prettyOncoplot_output = oncoplot_output, these_samples_metadata = all_gambl_meta) - - + + all_states_binned = get_cn_states(n_bins_split=2500, missing_data_as_diploid = T, - seg_data = seg_data) - + seg_data = seg_data) + CN_out = pretty_CN_heatmap(cn_state_matrix=all_states_binned, these_samples_metadata = all_gambl_meta, hide_annotations = "chromosome", scale_by_sample=T, - return_data = T) + return_data = T) arm_level_events = categorize_CN_events(CN_out) @@ -136,15 +143,15 @@ pretty_circular_mutation_frequency_heatmap(cn_status_matrix = cn_status, arm_level_annotated), prettyOncoplot_output = oncoplot_output, these_samples_metadata = all_gambl_meta) - - + + ashm_freq = get_ashm_count_matrix( regions_bed = dplyr::mutate(GAMBLR.data::grch37_ashm_regions, name = paste(gene, region, sep = "_")), this_seq_type = "genome" ) -ashm_freq_collated = mutate(ashm_freq,across(,~ifelse(.x>0,1,0))) - +ashm_freq_collated = mutate(ashm_freq,across(,~ifelse(.x>0,1,0))) + ashm_freq_collated = ashm_freq_collated[,colSums(ashm_freq_collated) >130] ashm_freq_collated = rownames_to_column(ashm_freq_collated,"sample_id")