diff --git a/recipe/bin/plot_circos b/recipe/bin/plot_circos index 216f774..d4f9f6a 100644 --- a/recipe/bin/plot_circos +++ b/recipe/bin/plot_circos @@ -1,402 +1,95 @@ #!/bin/bash - # Simple script to create a circos plot between two FASTA files. +###################### +### Get script DIR ### +###################### +export SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" + ############ ### Help ### ############ -Help() -{ -cat << EOF - -Simple script to create a circos plot between two FASTA files. -Copyright, Felipe Almeida , 2021 - - Syntax: plot_circos.sh [-h] [--fofn --outdir --minlen --minid - --linklen --show_intrachr --gc_window --gc_step - --labels ] - - Options: - - # Help - -h/--help Print this help - - # Output - --outdir Path to output directory [Default: ./results] - - # Input file of file names - # CSV: fasta path,prefix,color - --fofn File of file names contatining list of fastas to - draw circos plot. - - # Input min. length - --minlen Min size of contigs to consider for plot [Default: 10000] - - # Links (blastn) min. percentage id - --minid Min. percentage id to filter the results of blastn to draw links [Default: 85] - --linklen Min. link (blastn hit) length to display in plot [Default: 5000] - --show_intrachr Tells the program to create a conf file showing intra chr links [Default: false] - Mandatory if using only one FASTA, otherwise, links will not be shown. - - # GC skew config - --gc_window GC skew window size [Default: 5000] - --gc_step GC skew step size [Default: 5000] - - # Labels config - --labels TSV file containing the label definitions for plotting. The file must contain - 3 or 4 columns as shown at http://circos.ca/documentation/tutorials/2d_tracks/text_1/lesson, - "DATA FORMAT" section. The first column must be the name (ID) of the contig. - - -EOF -} +source ${SCRIPT_DIR}/../src/help.sh ################ ### Defaults ### ################ -RESULTS="./results" # tmp directory -FOFN="" -LABELS="" -LABEL_CONF="" -FASTA="" -FASTA_PREFIX="" -FASTA_COLOR="" -MINLEN=10000 -MINID=85 -MINLINKLEN=5000 -INTRACHR_FILE="links_concatenated_colored_no_intrachr.txt" -INTRACHR_SHOW="no" -GCWINDOW=5000 -GCSTEP=5000 +export THREADS=1 +export RESULTS="./results" +export FOFN="" +export LABELS="" +export LABELS_CONF="" +export TILES="" +export TILES_CONF="" +export FASTA="" +export FASTA_PREFIX="" +export FASTA_COLOR="" +export MINLEN=10000 +export MINID=85 +export MINLINKLEN=5000 +export INTRACHR_FILE="links_concatenated_colored_no_intrachr.txt" +export INTRACHR_SHOW="no" +export GCWINDOW=5000 +export GCSTEP=5000 ###################################### ### Function to filter FASTA files ### ###################################### -filter() -{ -# create results dir -mkdir -p $RESULTS - -# create dir for files -mkdir -p ${RESULTS}/filtered - -# filter genomes -IFS=',' -while read -r FASTA FASTA_PREFIX FASTA_COLOR ; do - name="$(basename $FASTA)" ; - $CONDA_PREFIX/bin/perl $CONDA_PREFIX/bin/removesmalls.pl $MINLEN $FASTA >> ${RESULTS}/filtered/"$name" ; - continue -done<"$FOFN" - -} +source ${SCRIPT_DIR}/../src/filter.sh ########################################## ### Function to create karyotype files ### ########################################## -karyotype() -{ -# create dir -mkdir -p ${RESULTS}/conf - -# write karyotypes -IFS=',' -while read -r FASTA FASTA_PREFIX FASTA_COLOR ; do - name="$(basename $FASTA)" ; - FILTERED_FASTA=${RESULTS}/filtered/"$name" ; - bioawk -c fastx -v p=$FASTA_PREFIX -v color=$FASTA_COLOR \ - '{ printf "chr - " substr($name,1) " " p":" substr($name,1) " " "0" " " length($seq) " " color"\n" }' \ - $FILTERED_FASTA >> ${RESULTS}/conf/circos.sequences.txt ; -done<"$FOFN" -} +source ${SCRIPT_DIR}/../src/karyotypes.sh ############################################# ### Function to find links between fastas ### ############################################# -find_links() -{ -# create dir -mkdir -p ${RESULTS}/all_vs_all_blast - -# concatenate genomes -cat ${RESULTS}/filtered/* >> ${RESULTS}/all_vs_all_blast/concatenated_genomes.fasta ; -export CONCAT_FASTA=${RESULTS}/all_vs_all_blast/concatenated_genomes.fasta -export BLAST_DB=${RESULTS}/all_vs_all_blast/blast_db - -# Run blast -makeblastdb -in $CONCAT_FASTA -dbtype nucl -out $BLAST_DB &> /dev/null ; -blastn -task blastn -perc_identity $MINID -query $CONCAT_FASTA -db $BLAST_DB \ - -outfmt "6 qseqid qstart qend sseqid sstart send pident length mismatch gapopen evalue bitscore stitle" \ - -out ${RESULTS}/all_vs_all_blast/tmp.blast - -# Filter blast -awk -F '\t' -v minid=$MINID '{ if ($7 >= minid) { print } }' ${RESULTS}/all_vs_all_blast/tmp.blast > ${RESULTS}/all_vs_all_blast/all_vs_all.blast - -# Remove tmp -rm ${RESULTS}/all_vs_all_blast/tmp.blast -} +source ${SCRIPT_DIR}/../src/find_links.sh ######################################################################### ### Function to parse blastn (links) and create conf file with colors ### ######################################################################### -parse_links() -{ -# create dir -mkdir -p ${RESULTS}/conf - -# Filter blocks with more then N bp hits -awk -v minlen=$MINLINKLEN '{ if ($8 >= minlen) { print } }' \ - ${RESULTS}/all_vs_all_blast/all_vs_all.blast | cut -f 1,2,3,4,5,6 >> ${RESULTS}/conf/links_concatenated.txt ; - -# get links comming from contigs and give it colors -IFS=',' -while read -r FASTA FASTA_PREFIX FASTA_COLOR ; do - bioawk -c fastx '{ printf $name"\n" }' $FASTA > tmp_names.fasta ; - awk -v color1=$FASTA_COLOR -F'\t' 'NR==FNR{c[$1]++;next};c[$1] > 0 {print $0 "\t" "color="color1}' \ - tmp_names.fasta ${RESULTS}/conf/links_concatenated.txt >> ${RESULTS}/conf/links_concatenated_colored.txt - rm tmp_names.fasta ; -done<"$FOFN" - -# create additional file whithout intrachr links -awk -F'\t' '{ if ($1 != $4) { print } }' ${RESULTS}/conf/links_concatenated_colored.txt > ${RESULTS}/conf/links_concatenated_colored_no_intrachr.txt ; -} +source ${SCRIPT_DIR}/../src/parse_links.sh ############################################################################# ### Function to sort and remove duplicates from links and karyotype files ### ############################################################################# -dedup() -{ -for file in $(ls ${RESULTS}/conf/*); do - sort -u $file > tmp.txt ; - cat tmp.txt > $file ; - rm tmp.txt -done -} +source ${SCRIPT_DIR}/../src/dedup.sh ############################################### ### Function to check which chrs have links ### ############################################### -check_links() -{ -# get chrs with links -## chr source -cut -f 1 ${RESULTS}/conf/links_concatenated_colored.txt >> tmp.chrs -## chr target -cut -f 4 ${RESULTS}/conf/links_concatenated_colored.txt >> tmp.chrs - -# export LINE -CHRS=$(cat tmp.chrs | sort -u | tr '\n' ';') ; -rm tmp.chrs ; -export CUSTOM_CHR_LINE="chromosomes = "${CHRS} ; -} +source ${SCRIPT_DIR}/../src/check_links.sh ######################################################### ### Function to create GC skew file proper for Circos ### ######################################################### -gc_skew() -{ -# exec GCcalc.py -$CONDA_PREFIX/bin/python3 $CONDA_PREFIX/bin/GCcalc.py -f ${RESULTS}/all_vs_all_blast/concatenated_genomes.fasta -w $GCWINDOW -s $GCSTEP | \ - cut -f 1,2,3,5 | awk '{ if ($4 > 0) print $0 "\t" "color=dblue"; else print $0 "\t" "color=red"}' > ${RESULTS}/conf/GC_skew.txt -} - -################################################## -### Function to create feature labels from GFF ### -################################################## -gff2labels() -{ -# load variable -read -r -d '' LABEL_CONF << EOM -# Labels - -label_snuggle = yes -type = text -color = black -file = feature_labels.txt - -r0 = 1r -r1 = 1r+200p +source ${SCRIPT_DIR}/../src/gc_skew.sh -show_links = yes -link_dims = 4p,4p,8p,4p,4p -link_thickness = 5p -link_color = black +################################################ +### Function to create feature labels config ### +################################################ +source ${SCRIPT_DIR}/../src/labels.sh -label_size = 30p - -padding = 0p -rpadding = 0p - - -EOM - -# read gff and create TSV file -} +############################################### +### Function to create feature TILES config ### +############################################### +source ${SCRIPT_DIR}/../src/tiles.sh ########################################### ### Function to create circos.conf file ### ########################################### -write_circos() -{ -cat << EOF -# MINIMUM CIRCOS CONFIGURATION - -# Defines unit length for ideogram and tick spacing, referenced -# using "u" prefix, e.g. 10u -chromosomes_units = 500000 - -# Show all chromosomes in karyotype file. By default, this is -# true. If you want to explicitly specify which chromosomes -# to draw, set this to 'no' and use the 'chromosomes' parameter. -chromosomes_display_default = no -${CUSTOM_CHR_LINE} - -# Chromosome name, size and color definition -karyotype = circos.sequences.txt - -<> - - - - -<> -# overwrite auto_alpha_steps from default value included in etc/image.conf -auto_alpha_steps* = 10 - - - - - -# spacing between ideograms -default = 0.005r - - -# ideogram position, thickness and fill -radius = 0.9r -thickness = 30p -fill = yes -show_label = no -label_font = default -label_size = 40 -label_radius = 1r + 75p -label_parallel = yes - - - - - - show = yes - ribbon = no - file = ${INTRACHR_FILE} - radius = 0.8r - tickness = 15 - - - - - # Do not show intra-chromossome links - condition = var(intrachr) - show = ${INTRACHR_SHOW} - - - - - - - -# Add plots - - -# GC Skew - -type = histogram -file = GC_skew.txt -r1 = 1.0r -r0 = 0.8r -thickness = 2 -max = 0.49999999999999173 -min = -0.47826086956521324 -extend_bin = yes -orientation = out - - -${LABEL_CONF} - - - -# debugging, I/O an dother system parameters -<> # included from Circos distribution - -show_ticks = yes -show_tick_labels = yes - - - -skip_first_label = no -skip_last_label = no -radius = dims(ideogram,radius_outer) -tick_separation = 2p -label_separation = 5p -multiplier = 1e-6 -color = black -thickness = 4p -size = 20p - -# with you desired more ticks, add new tick inclusions as the one shown. See: http://circos.ca/documentation/tutorials/ticks_and_labels/labels/configuration - -spacing = 1u -show_label = yes -label_size = 30p -label_offset = 10p -format = %d -grid = yes -grid_color = black -grid_thickness = 1p -grid_start = 0.5r -grid_end = 0.999r - - - -skip_first_label = yes -spacing = 0.5u -show_label = yes -label_size = 30p -label_offset = 10p -format = %.2fMb -grid = yes -grid_color = black -grid_thickness = 1p -grid_start = 0.5r -grid_end = 0.999r - - - - -EOF -} +source ${SCRIPT_DIR}/../src/write_circos.sh ############################### ### Function to plot circos ### ############################### -plot_circos() -{ -# get current dir -CURRENT_DIR=$PWD - -# got to conf dir -cd ${RESULTS}/conf/ - -# draw -circos - -# go back -cd $CURRENT_DIR -} - +source ${SCRIPT_DIR}/../src/plot_circos.sh ################################ ### Get positional arguments ### ################################ - # No arguments given if [ $# -eq 0 ] ; then Help @@ -413,8 +106,17 @@ case $ARGS in Help exit ;; + --threads) + if [ "$2" ]; then + export THREADS=$2 + shift 2 + else + echo -e '\nERROR: "--threads" requires a numeric argument\n' + exit + fi + ;; --outdir) - RESULTS=$2 + export RESULTS=$2 shift 2 ;; --fofn) @@ -425,7 +127,7 @@ case $ARGS in else echo "" >> $2 ; fi - FOFN=$2 + export FOFN=$2 shift 2 else echo -e '\nERROR: "--fofn" requires an argument. Check your input.\n' @@ -434,16 +136,25 @@ case $ARGS in ;; --labels) if [[ -f "$2" ]]; then - LABELS=$2 + export LABELS=$2 shift 2 else echo -e '\nERROR: "--labels" requires an argument. Check your input.\n' exit fi ;; + --tiles) + if [[ -f "$2" ]]; then + export TILES=$2 + shift 2 + else + echo -e '\nERROR: "--tiles" requires an argument. Check your input.\n' + exit + fi + ;; --minlen) if [ "$2" ]; then - MINLEN=$2 + export MINLEN=$2 shift 2 else echo -e '\nERROR: "--minlen" requires a numeric argument\n' @@ -452,7 +163,7 @@ case $ARGS in ;; --minid) if [ "$2" ]; then - MINID=$2 + export MINID=$2 shift 2 else echo -e '\nERROR: "--minid" requires a numeric argument\n' @@ -461,7 +172,7 @@ case $ARGS in ;; --linklen) if [ "$2" ]; then - MINLINKLEN=$2 + export MINLINKLEN=$2 shift 2 else echo -e '\nERROR: "--linklen" requires a numeric argument\n' @@ -470,7 +181,7 @@ case $ARGS in ;; --gc_window) if [ "$2" ]; then - GCWINDOW=$2 + export GCWINDOW=$2 shift 2 else echo -e '\nERROR: "--gc_window" requires a numeric argument\n' @@ -479,7 +190,7 @@ case $ARGS in ;; --gc_step) if [ "$2" ]; then - GCSTEP=$2 + export GCSTEP=$2 shift 2 else echo -e '\nERROR: "--gc_step" requires a numeric argument\n' @@ -487,8 +198,8 @@ case $ARGS in fi ;; --show_intrachr) - INTRACHR_FILE="links_concatenated_colored.txt" - INTRACHR_SHOW="yes" + export INTRACHR_FILE="links_concatenated_colored.txt" + export INTRACHR_SHOW="yes" shift ;; *) @@ -502,50 +213,6 @@ done ################### ### Exec script ### ################### - -## remove existing results -rm -rf $RESULTS ; - -# Step 1 -echo " # Preparing inputs!" -filter ; - -# Step 2 -echo " # Writing karyotypes!" -karyotype ; - -# Step 3 -echo " # Finding links (all vs all blastn)!" -find_links ; -parse_links ; - -# Step 4 -echo " # Removing duplicate lines in conf files!" -dedup ; -check_links ; - -# Step 5 -echo " # Computing GC Skew!" -gc_skew ; - -# Step 6 -echo " # Wrinting circos conf file!" -write_circos > ${RESULTS}/conf/circos.conf ; - -# Step 7 -echo " # Plotting circos!" -plot_circos ; - -cat << EOF - - # Bye Bye - Now your plot is complete and must be available at: ${RESULTS}/conf/ - - All the required files for a minimal circos plot have been produced and stored at ${RESULTS}/conf/. - Now you can play with the ${RESULTS}/conf/circos.conf file in order to change the plot as you like. - - Remember to read the circos manual in order to understand the conf file. - - Have fun! - -EOF +source ${SCRIPT_DIR}/../src/workflow.sh +source ${SCRIPT_DIR}/../src/bye.sh +workflow diff --git a/recipe/src/bye.sh b/recipe/src/bye.sh new file mode 100644 index 0000000..50abd06 --- /dev/null +++ b/recipe/src/bye.sh @@ -0,0 +1,13 @@ +read -r -d '' BYE << EOM + + # Bye Bye + Now your plot is complete and must be available at: ${RESULTS}/conf/ + + All the required files for a minimal circos plot have been produced and stored at ${RESULTS}/conf/. + Now you can play with the ${RESULTS}/conf/circos.conf file in order to change the plot as you like. + + Remember to read the circos manual in order to understand the conf file. + + Have fun! + +EOM diff --git a/recipe/src/check_links.sh b/recipe/src/check_links.sh new file mode 100644 index 0000000..d758f9b --- /dev/null +++ b/recipe/src/check_links.sh @@ -0,0 +1,13 @@ +check_links() +{ +# get chrs with links +## chr source +cut -f 1 ${RESULTS}/conf/links_concatenated_colored.txt >> tmp.chrs +## chr target +cut -f 4 ${RESULTS}/conf/links_concatenated_colored.txt >> tmp.chrs + +# export LINE +CHRS=$(cat tmp.chrs | sort -u | tr '\n' ';') ; +rm tmp.chrs ; +export CUSTOM_CHR_LINE="chromosomes = "${CHRS} ; +} diff --git a/recipe/src/dedup.sh b/recipe/src/dedup.sh new file mode 100644 index 0000000..cc93348 --- /dev/null +++ b/recipe/src/dedup.sh @@ -0,0 +1,8 @@ +dedup() +{ +for file in ${RESULTS}/conf/*.txt; do + sort -u ${file} > tmp.txt ; + cat tmp.txt > ${file} ; + rm tmp.txt +done +} diff --git a/recipe/src/filter.sh b/recipe/src/filter.sh new file mode 100644 index 0000000..14348db --- /dev/null +++ b/recipe/src/filter.sh @@ -0,0 +1,17 @@ +filter() +{ +# create results dir +mkdir -p $RESULTS + +# create dir for files +mkdir -p ${RESULTS}/filtered + +# filter genomes +IFS=',' +while read -r FASTA FASTA_PREFIX FASTA_COLOR ; do + name="$(basename $FASTA)" ; + $CONDA_PREFIX/bin/perl $CONDA_PREFIX/bin/removesmalls.pl $MINLEN $FASTA >> ${RESULTS}/filtered/"$name" ; + continue +done<"$FOFN" + +} diff --git a/recipe/src/find_links.sh b/recipe/src/find_links.sh new file mode 100644 index 0000000..395d608 --- /dev/null +++ b/recipe/src/find_links.sh @@ -0,0 +1,22 @@ +find_links() +{ +# create dir +mkdir -p ${RESULTS}/all_vs_all_blast + +# concatenate genomes +cat ${RESULTS}/filtered/* >> ${RESULTS}/all_vs_all_blast/concatenated_genomes.fasta ; +export CONCAT_FASTA=${RESULTS}/all_vs_all_blast/concatenated_genomes.fasta +export BLAST_DB=${RESULTS}/all_vs_all_blast/blast_db + +# Run blast +makeblastdb -in $CONCAT_FASTA -dbtype nucl -out $BLAST_DB &> /dev/null ; +blastn -task blastn -perc_identity $MINID -query $CONCAT_FASTA -db $BLAST_DB \ + -outfmt "6 qseqid qstart qend sseqid sstart send pident length mismatch gapopen evalue bitscore stitle" \ + -out ${RESULTS}/all_vs_all_blast/tmp.blast -num_threads $THREADS + +# Filter blast +awk -F '\t' -v minid=$MINID '{ if ($7 >= minid) { print } }' ${RESULTS}/all_vs_all_blast/tmp.blast > ${RESULTS}/all_vs_all_blast/all_vs_all.blast + +# Remove tmp +rm ${RESULTS}/all_vs_all_blast/tmp.blast +} diff --git a/recipe/src/gc_skew.sh b/recipe/src/gc_skew.sh new file mode 100644 index 0000000..f37686b --- /dev/null +++ b/recipe/src/gc_skew.sh @@ -0,0 +1,6 @@ +gc_skew() +{ +# exec GCcalc.py +$CONDA_PREFIX/bin/python3 $CONDA_PREFIX/bin/GCcalc.py -f ${RESULTS}/all_vs_all_blast/concatenated_genomes.fasta -w $GCWINDOW -s $GCSTEP | \ + cut -f 1,2,3,5 | awk '{ if ($4 > 0) print $0 "\t" "color=dblue"; else print $0 "\t" "color=red"}' > ${RESULTS}/conf/GC_skew.txt +} diff --git a/recipe/src/help.sh b/recipe/src/help.sh new file mode 100644 index 0000000..d8f1edc --- /dev/null +++ b/recipe/src/help.sh @@ -0,0 +1,53 @@ +Help() +{ +cat << EOF + +Simple script to create a circos plot between two FASTA files. +Copyright, Felipe Almeida , 2021 + + Syntax: plot_circos.sh [-h] [--fofn --outdir --minlen --minid + --linklen --show_intrachr --gc_window --gc_step + --labels ] + + Options: + + # Help + -h/--help Print this help + + # Threads for blastn + --threads Number of threads to use [Default: 1] + + # Output + --outdir Path to output directory [Default: ./results] + + # Input file of file names + # CSV: fasta path,prefix,color + --fofn File of file names contatining list of fastas to + draw circos plot. + + # Input min. length + --minlen Min size of contigs to consider for plot [Default: 10000] + + # Links (blastn) min. percentage id + --minid Min. percentage id to filter the results of blastn to draw links [Default: 85] + --linklen Min. link (blastn hit) length to display in plot [Default: 5000] + --show_intrachr Tells the program to create a conf file showing intra chr links [Default: false] + Mandatory if using only one FASTA, otherwise, links will not be shown. + + # GC skew config + --gc_window GC skew window size [Default: 5000] + --gc_step GC skew step size [Default: 5000] + + # Labels config + --labels TSV file containing the label definitions for plotting. The file must contain + 3 or 4 columns as shown at http://circos.ca/documentation/tutorials/2d_tracks/text_1/lesson, + "DATA FORMAT" section. The first column must be the name (ID) of the contig. + + # Tiles config + --tiles TSV file containing the tile definitions for plotting. The file must contain + 3 or 4 columns as shown at http://circos.ca/documentation/tutorials/configuration/data_files. + The first column must be the name (ID) of the contig. + + +EOF +} diff --git a/recipe/src/karyotypes.sh b/recipe/src/karyotypes.sh new file mode 100644 index 0000000..1f62d34 --- /dev/null +++ b/recipe/src/karyotypes.sh @@ -0,0 +1,15 @@ +karyotype() +{ +# create dir +mkdir -p ${RESULTS}/conf + +# write karyotypes +IFS=',' +while read -r FASTA FASTA_PREFIX FASTA_COLOR ; do + name="$(basename $FASTA)" ; + FILTERED_FASTA=${RESULTS}/filtered/"$name" ; + bioawk -c fastx -v p=$FASTA_PREFIX -v color=$FASTA_COLOR \ + '{ printf "chr - " substr($name,1) " " p":" substr($name,1) " " "0" " " length($seq) " " color"\n" }' \ + $FILTERED_FASTA >> ${RESULTS}/conf/circos.sequences.txt ; +done<"$FOFN" +} diff --git a/recipe/src/labels.sh b/recipe/src/labels.sh new file mode 100644 index 0000000..5511667 --- /dev/null +++ b/recipe/src/labels.sh @@ -0,0 +1,28 @@ +labels() +{ +# Create label definition +read -r -d '' LABELS_CONF << EOM +# Labels +# to understand it more read: http://circos.ca/documentation/tutorials/2d_tracks/text_1/lesson + +label_snuggle = yes +type = text +color = black +file = ${LABELS} + +r0 = 1r +r1 = 1r+200p + +show_links = yes +link_dims = 4p,4p,8p,4p,4p +link_thickness = 5p +link_color = black + +label_size = 30p + +padding = 0p +rpadding = 0p + + +EOM +} diff --git a/recipe/src/parse_links.sh b/recipe/src/parse_links.sh new file mode 100644 index 0000000..c06af46 --- /dev/null +++ b/recipe/src/parse_links.sh @@ -0,0 +1,21 @@ +parse_links() +{ +# create dir +mkdir -p ${RESULTS}/conf + +# Filter blocks with more then N bp hits +awk -v minlen=$MINLINKLEN '{ if ($8 >= minlen) { print } }' \ + ${RESULTS}/all_vs_all_blast/all_vs_all.blast | cut -f 1,2,3,4,5,6 >> ${RESULTS}/conf/links_concatenated.txt ; + +# get links comming from contigs and give it colors +IFS=',' +while read -r FASTA FASTA_PREFIX FASTA_COLOR ; do + bioawk -c fastx '{ printf $name"\n" }' $FASTA > tmp_names.fasta ; + awk -v color1=$FASTA_COLOR -F'\t' 'NR==FNR{c[$1]++;next};c[$1] > 0 {print $0 "\t" "color="color1}' \ + tmp_names.fasta ${RESULTS}/conf/links_concatenated.txt >> ${RESULTS}/conf/links_concatenated_colored.txt + rm tmp_names.fasta ; +done<"$FOFN" + +# create additional file whithout intrachr links +awk -F'\t' '{ if ($1 != $4) { print } }' ${RESULTS}/conf/links_concatenated_colored.txt > ${RESULTS}/conf/links_concatenated_colored_no_intrachr.txt ; +} diff --git a/recipe/src/plot_circos.sh b/recipe/src/plot_circos.sh new file mode 100644 index 0000000..6282fbb --- /dev/null +++ b/recipe/src/plot_circos.sh @@ -0,0 +1,14 @@ +plot_circos() +{ +# get current dir +CURRENT_DIR=$PWD + +# got to conf dir +cd ${RESULTS}/conf/ + +# draw +circos + +# go back +cd $CURRENT_DIR +} diff --git a/recipe/src/tiles.sh b/recipe/src/tiles.sh new file mode 100644 index 0000000..7ace359 --- /dev/null +++ b/recipe/src/tiles.sh @@ -0,0 +1,31 @@ +tiles() +{ +# Create label definition +read -r -d '' TILES_CONF << EOM +# Tiles +# to understand its configuration read: http://circos.ca/documentation/tutorials/2d_tracks/tiles/configuration + +type = tile +layers_overflow = grow +file = ${TILES} +r1 = 0.8r +r0 = 0.7r +orientation = out + +layers = 5 +margin = 0.02u +thickness = 15 +padding = 8 + +stroke_thickness = 1 +stroke_color = grey + + + +color = vvlgrey + + + + +EOM +} diff --git a/recipe/src/workflow.sh b/recipe/src/workflow.sh new file mode 100644 index 0000000..3008806 --- /dev/null +++ b/recipe/src/workflow.sh @@ -0,0 +1,58 @@ +workflow() +{ + ## remove existing results + rm -rf $RESULTS ; + + # Step 1 + echo " # Preparing inputs!" + filter ; + + # Step 2 + echo " # Writing karyotypes!" + karyotype ; + + # Step 3 + echo " # Finding links (all vs all blastn)!" + find_links ; + parse_links ; + + # Step 4 + echo " # Removing duplicate lines in conf files!" + dedup ; + check_links ; + + # Step 5 + echo " # Computing GC Skew!" + gc_skew ; + + # Step 6 + # Check for labels + if [ -z "$LABELS" ] + then + echo "" > /dev/null ; + else + echo " # Parsing labels!" ; + labels ; + fi + + # Step 7 + # Check for tiles + if [ -z "$TILES" ] + then + echo "" > /dev/null ; + else + echo " # Parsing tiles!" ; + tiles ; + fi + + # Step 6 + echo " # Wrinting circos conf file!" + write_circos > ${RESULTS}/conf/circos.conf ; + + # Step 7 + echo " # Plotting circos!" + plot_circos ; + + # Bye + echo ${BYE} +} diff --git a/recipe/src/write_circos.sh b/recipe/src/write_circos.sh new file mode 100644 index 0000000..dc56696 --- /dev/null +++ b/recipe/src/write_circos.sh @@ -0,0 +1,140 @@ +write_circos() +{ +cat << EOF +# MINIMUM CIRCOS CONFIGURATION + +# Defines unit length for ideogram and tick spacing, referenced +# using "u" prefix, e.g. 10u +chromosomes_units = 500000 + +# Show all chromosomes in karyotype file. By default, this is +# true. If you want to explicitly specify which chromosomes +# to draw, set this to 'no' and use the 'chromosomes' parameter. +chromosomes_display_default = no +${CUSTOM_CHR_LINE} + +# Chromosome name, size and color definition +karyotype = circos.sequences.txt + +<> + + + + +<> +# overwrite auto_alpha_steps from default value included in etc/image.conf +auto_alpha_steps* = 10 + + + + + +# spacing between ideograms +default = 0.005r + + +# ideogram position, thickness and fill +radius = 0.9r +thickness = 30p +fill = yes +show_label = no +label_font = default +label_size = 40 +label_radius = 1r + 75p +label_parallel = yes + + + + + + show = yes + ribbon = no + file = ${INTRACHR_FILE} + radius = 0.7r + tickness = 15 + + + + + # Do not show intra-chromossome links + condition = var(intrachr) + show = ${INTRACHR_SHOW} + + + + + + + +# Add plots + + +# GC Skew + +type = histogram +file = GC_skew.txt +r1 = 1.0r +r0 = 0.8r +thickness = 2 +max = 0.49999999999999173 +min = -0.47826086956521324 +extend_bin = yes +orientation = out + + +${LABELS_CONF} + +${TILES_CONF} + + + +# debugging, I/O an dother system parameters +<> # included from Circos distribution + +show_ticks = yes +show_tick_labels = yes + + + +skip_first_label = no +skip_last_label = no +radius = dims(ideogram,radius_outer) +tick_separation = 2p +label_separation = 5p +multiplier = 1e-6 +color = black +thickness = 4p +size = 20p + +# with you desired more ticks, add new tick inclusions as the one shown. See: http://circos.ca/documentation/tutorials/ticks_and_labels/labels/configuration + +spacing = 1u +show_label = yes +label_size = 30p +label_offset = 10p +format = %d +grid = yes +grid_color = black +grid_thickness = 1p +grid_start = 0.5r +grid_end = 0.999r + + + +skip_first_label = yes +spacing = 0.5u +show_label = yes +label_size = 30p +label_offset = 10p +format = %.2fMb +grid = yes +grid_color = black +grid_thickness = 1p +grid_start = 0.5r +grid_end = 0.999r + + + + +EOF +}