Skip to content

Commit

Permalink
Structural Change!
Browse files Browse the repository at this point in the history
Changing script structure to have functions in separate files!
  • Loading branch information
fmalmeida committed Jul 7, 2021
1 parent 25a13bc commit ee7f7da
Show file tree
Hide file tree
Showing 15 changed files with 510 additions and 404 deletions.
475 changes: 71 additions & 404 deletions recipe/bin/plot_circos

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions recipe/src/bye.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
read -r -d '' BYE << EOM
# Bye Bye
Now your plot is complete and must be available at: ${RESULTS}/conf/
All the required files for a minimal circos plot have been produced and stored at ${RESULTS}/conf/.
Now you can play with the ${RESULTS}/conf/circos.conf file in order to change the plot as you like.
Remember to read the circos manual in order to understand the conf file.
Have fun!
EOM
13 changes: 13 additions & 0 deletions recipe/src/check_links.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
check_links()
{
# get chrs with links
## chr source
cut -f 1 ${RESULTS}/conf/links_concatenated_colored.txt >> tmp.chrs
## chr target
cut -f 4 ${RESULTS}/conf/links_concatenated_colored.txt >> tmp.chrs

# export LINE
CHRS=$(cat tmp.chrs | sort -u | tr '\n' ';') ;
rm tmp.chrs ;
export CUSTOM_CHR_LINE="chromosomes = "${CHRS} ;
}
8 changes: 8 additions & 0 deletions recipe/src/dedup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
dedup()
{
for file in ${RESULTS}/conf/*.txt; do
sort -u ${file} > tmp.txt ;
cat tmp.txt > ${file} ;
rm tmp.txt
done
}
17 changes: 17 additions & 0 deletions recipe/src/filter.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
filter()
{
# create results dir
mkdir -p $RESULTS

# create dir for files
mkdir -p ${RESULTS}/filtered

# filter genomes
IFS=','
while read -r FASTA FASTA_PREFIX FASTA_COLOR ; do
name="$(basename $FASTA)" ;
$CONDA_PREFIX/bin/perl $CONDA_PREFIX/bin/removesmalls.pl $MINLEN $FASTA >> ${RESULTS}/filtered/"$name" ;
continue
done<"$FOFN"

}
22 changes: 22 additions & 0 deletions recipe/src/find_links.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
find_links()
{
# create dir
mkdir -p ${RESULTS}/all_vs_all_blast

# concatenate genomes
cat ${RESULTS}/filtered/* >> ${RESULTS}/all_vs_all_blast/concatenated_genomes.fasta ;
export CONCAT_FASTA=${RESULTS}/all_vs_all_blast/concatenated_genomes.fasta
export BLAST_DB=${RESULTS}/all_vs_all_blast/blast_db

# Run blast
makeblastdb -in $CONCAT_FASTA -dbtype nucl -out $BLAST_DB &> /dev/null ;
blastn -task blastn -perc_identity $MINID -query $CONCAT_FASTA -db $BLAST_DB \
-outfmt "6 qseqid qstart qend sseqid sstart send pident length mismatch gapopen evalue bitscore stitle" \
-out ${RESULTS}/all_vs_all_blast/tmp.blast -num_threads $THREADS

# Filter blast
awk -F '\t' -v minid=$MINID '{ if ($7 >= minid) { print } }' ${RESULTS}/all_vs_all_blast/tmp.blast > ${RESULTS}/all_vs_all_blast/all_vs_all.blast

# Remove tmp
rm ${RESULTS}/all_vs_all_blast/tmp.blast
}
6 changes: 6 additions & 0 deletions recipe/src/gc_skew.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
gc_skew()
{
# exec GCcalc.py
$CONDA_PREFIX/bin/python3 $CONDA_PREFIX/bin/GCcalc.py -f ${RESULTS}/all_vs_all_blast/concatenated_genomes.fasta -w $GCWINDOW -s $GCSTEP | \
cut -f 1,2,3,5 | awk '{ if ($4 > 0) print $0 "\t" "color=dblue"; else print $0 "\t" "color=red"}' > ${RESULTS}/conf/GC_skew.txt
}
53 changes: 53 additions & 0 deletions recipe/src/help.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
Help()
{
cat << EOF
Simple script to create a circos plot between two FASTA files.
Copyright, Felipe Almeida <almeidafmarques@outlook.com>, 2021
Syntax: plot_circos.sh [-h] [--fofn <file> --outdir <outdir> --minlen <int> --minid <int>
--linklen <int> --show_intrachr --gc_window <int> --gc_step <int>
--labels <file> ]
Options:
# Help
-h/--help Print this help
# Threads for blastn
--threads Number of threads to use [Default: 1]
# Output
--outdir Path to output directory [Default: ./results]
# Input file of file names
# CSV: fasta path,prefix,color
--fofn File of file names contatining list of fastas to
draw circos plot.
# Input min. length
--minlen Min size of contigs to consider for plot [Default: 10000]
# Links (blastn) min. percentage id
--minid Min. percentage id to filter the results of blastn to draw links [Default: 85]
--linklen Min. link (blastn hit) length to display in plot [Default: 5000]
--show_intrachr Tells the program to create a conf file showing intra chr links [Default: false]
Mandatory if using only one FASTA, otherwise, links will not be shown.
# GC skew config
--gc_window GC skew window size [Default: 5000]
--gc_step GC skew step size [Default: 5000]
# Labels config
--labels TSV file containing the label definitions for plotting. The file must contain
3 or 4 columns as shown at http://circos.ca/documentation/tutorials/2d_tracks/text_1/lesson,
"DATA FORMAT" section. The first column must be the name (ID) of the contig.
# Tiles config
--tiles TSV file containing the tile definitions for plotting. The file must contain
3 or 4 columns as shown at http://circos.ca/documentation/tutorials/configuration/data_files.
The first column must be the name (ID) of the contig.
EOF
}
15 changes: 15 additions & 0 deletions recipe/src/karyotypes.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
karyotype()
{
# create dir
mkdir -p ${RESULTS}/conf

# write karyotypes
IFS=','
while read -r FASTA FASTA_PREFIX FASTA_COLOR ; do
name="$(basename $FASTA)" ;
FILTERED_FASTA=${RESULTS}/filtered/"$name" ;
bioawk -c fastx -v p=$FASTA_PREFIX -v color=$FASTA_COLOR \
'{ printf "chr - " substr($name,1) " " p":" substr($name,1) " " "0" " " length($seq) " " color"\n" }' \
$FILTERED_FASTA >> ${RESULTS}/conf/circos.sequences.txt ;
done<"$FOFN"
}
28 changes: 28 additions & 0 deletions recipe/src/labels.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
labels()
{
# Create label definition
read -r -d '' LABELS_CONF << EOM
# Labels
# to understand it more read: http://circos.ca/documentation/tutorials/2d_tracks/text_1/lesson
<plot>
label_snuggle = yes
type = text
color = black
file = ${LABELS}
r0 = 1r
r1 = 1r+200p
show_links = yes
link_dims = 4p,4p,8p,4p,4p
link_thickness = 5p
link_color = black
label_size = 30p
padding = 0p
rpadding = 0p
</plot>
EOM
}
21 changes: 21 additions & 0 deletions recipe/src/parse_links.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
parse_links()
{
# create dir
mkdir -p ${RESULTS}/conf

# Filter blocks with more then N bp hits
awk -v minlen=$MINLINKLEN '{ if ($8 >= minlen) { print } }' \
${RESULTS}/all_vs_all_blast/all_vs_all.blast | cut -f 1,2,3,4,5,6 >> ${RESULTS}/conf/links_concatenated.txt ;

# get links comming from contigs and give it colors
IFS=','
while read -r FASTA FASTA_PREFIX FASTA_COLOR ; do
bioawk -c fastx '{ printf $name"\n" }' $FASTA > tmp_names.fasta ;
awk -v color1=$FASTA_COLOR -F'\t' 'NR==FNR{c[$1]++;next};c[$1] > 0 {print $0 "\t" "color="color1}' \
tmp_names.fasta ${RESULTS}/conf/links_concatenated.txt >> ${RESULTS}/conf/links_concatenated_colored.txt
rm tmp_names.fasta ;
done<"$FOFN"

# create additional file whithout intrachr links
awk -F'\t' '{ if ($1 != $4) { print } }' ${RESULTS}/conf/links_concatenated_colored.txt > ${RESULTS}/conf/links_concatenated_colored_no_intrachr.txt ;
}
14 changes: 14 additions & 0 deletions recipe/src/plot_circos.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
plot_circos()
{
# get current dir
CURRENT_DIR=$PWD

# got to conf dir
cd ${RESULTS}/conf/

# draw
circos

# go back
cd $CURRENT_DIR
}
31 changes: 31 additions & 0 deletions recipe/src/tiles.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
tiles()
{
# Create label definition
read -r -d '' TILES_CONF << EOM
# Tiles
# to understand its configuration read: http://circos.ca/documentation/tutorials/2d_tracks/tiles/configuration
<plot>
type = tile
layers_overflow = grow
file = ${TILES}
r1 = 0.8r
r0 = 0.7r
orientation = out
layers = 5
margin = 0.02u
thickness = 15
padding = 8
stroke_thickness = 1
stroke_color = grey
<backgrounds>
<background>
color = vvlgrey
</background>
</backgrounds>
</plot>
EOM
}
58 changes: 58 additions & 0 deletions recipe/src/workflow.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
workflow()
{
## remove existing results
rm -rf $RESULTS ;

# Step 1
echo " # Preparing inputs!"
filter ;

# Step 2
echo " # Writing karyotypes!"
karyotype ;

# Step 3
echo " # Finding links (all vs all blastn)!"
find_links ;
parse_links ;

# Step 4
echo " # Removing duplicate lines in conf files!"
dedup ;
check_links ;

# Step 5
echo " # Computing GC Skew!"
gc_skew ;

# Step 6
# Check for labels
if [ -z "$LABELS" ]
then
echo "" > /dev/null ;
else
echo " # Parsing labels!" ;
labels ;
fi

# Step 7
# Check for tiles
if [ -z "$TILES" ]
then
echo "" > /dev/null ;
else
echo " # Parsing tiles!" ;
tiles ;
fi

# Step 6
echo " # Wrinting circos conf file!"
write_circos > ${RESULTS}/conf/circos.conf ;

# Step 7
echo " # Plotting circos!"
plot_circos ;

# Bye
echo ${BYE}
}
Loading

0 comments on commit ee7f7da

Please sign in to comment.