-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Changing script structure to have functions in separate files!
- Loading branch information
Showing
15 changed files
with
510 additions
and
404 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
read -r -d '' BYE << EOM | ||
# Bye Bye | ||
Now your plot is complete and must be available at: ${RESULTS}/conf/ | ||
All the required files for a minimal circos plot have been produced and stored at ${RESULTS}/conf/. | ||
Now you can play with the ${RESULTS}/conf/circos.conf file in order to change the plot as you like. | ||
Remember to read the circos manual in order to understand the conf file. | ||
Have fun! | ||
EOM |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
check_links() | ||
{ | ||
# get chrs with links | ||
## chr source | ||
cut -f 1 ${RESULTS}/conf/links_concatenated_colored.txt >> tmp.chrs | ||
## chr target | ||
cut -f 4 ${RESULTS}/conf/links_concatenated_colored.txt >> tmp.chrs | ||
|
||
# export LINE | ||
CHRS=$(cat tmp.chrs | sort -u | tr '\n' ';') ; | ||
rm tmp.chrs ; | ||
export CUSTOM_CHR_LINE="chromosomes = "${CHRS} ; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
dedup() | ||
{ | ||
for file in ${RESULTS}/conf/*.txt; do | ||
sort -u ${file} > tmp.txt ; | ||
cat tmp.txt > ${file} ; | ||
rm tmp.txt | ||
done | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
filter() | ||
{ | ||
# create results dir | ||
mkdir -p $RESULTS | ||
|
||
# create dir for files | ||
mkdir -p ${RESULTS}/filtered | ||
|
||
# filter genomes | ||
IFS=',' | ||
while read -r FASTA FASTA_PREFIX FASTA_COLOR ; do | ||
name="$(basename $FASTA)" ; | ||
$CONDA_PREFIX/bin/perl $CONDA_PREFIX/bin/removesmalls.pl $MINLEN $FASTA >> ${RESULTS}/filtered/"$name" ; | ||
continue | ||
done<"$FOFN" | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
find_links() | ||
{ | ||
# create dir | ||
mkdir -p ${RESULTS}/all_vs_all_blast | ||
|
||
# concatenate genomes | ||
cat ${RESULTS}/filtered/* >> ${RESULTS}/all_vs_all_blast/concatenated_genomes.fasta ; | ||
export CONCAT_FASTA=${RESULTS}/all_vs_all_blast/concatenated_genomes.fasta | ||
export BLAST_DB=${RESULTS}/all_vs_all_blast/blast_db | ||
|
||
# Run blast | ||
makeblastdb -in $CONCAT_FASTA -dbtype nucl -out $BLAST_DB &> /dev/null ; | ||
blastn -task blastn -perc_identity $MINID -query $CONCAT_FASTA -db $BLAST_DB \ | ||
-outfmt "6 qseqid qstart qend sseqid sstart send pident length mismatch gapopen evalue bitscore stitle" \ | ||
-out ${RESULTS}/all_vs_all_blast/tmp.blast -num_threads $THREADS | ||
|
||
# Filter blast | ||
awk -F '\t' -v minid=$MINID '{ if ($7 >= minid) { print } }' ${RESULTS}/all_vs_all_blast/tmp.blast > ${RESULTS}/all_vs_all_blast/all_vs_all.blast | ||
|
||
# Remove tmp | ||
rm ${RESULTS}/all_vs_all_blast/tmp.blast | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
gc_skew() | ||
{ | ||
# exec GCcalc.py | ||
$CONDA_PREFIX/bin/python3 $CONDA_PREFIX/bin/GCcalc.py -f ${RESULTS}/all_vs_all_blast/concatenated_genomes.fasta -w $GCWINDOW -s $GCSTEP | \ | ||
cut -f 1,2,3,5 | awk '{ if ($4 > 0) print $0 "\t" "color=dblue"; else print $0 "\t" "color=red"}' > ${RESULTS}/conf/GC_skew.txt | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
Help() | ||
{ | ||
cat << EOF | ||
Simple script to create a circos plot between two FASTA files. | ||
Copyright, Felipe Almeida <almeidafmarques@outlook.com>, 2021 | ||
Syntax: plot_circos.sh [-h] [--fofn <file> --outdir <outdir> --minlen <int> --minid <int> | ||
--linklen <int> --show_intrachr --gc_window <int> --gc_step <int> | ||
--labels <file> ] | ||
Options: | ||
# Help | ||
-h/--help Print this help | ||
# Threads for blastn | ||
--threads Number of threads to use [Default: 1] | ||
# Output | ||
--outdir Path to output directory [Default: ./results] | ||
# Input file of file names | ||
# CSV: fasta path,prefix,color | ||
--fofn File of file names contatining list of fastas to | ||
draw circos plot. | ||
# Input min. length | ||
--minlen Min size of contigs to consider for plot [Default: 10000] | ||
# Links (blastn) min. percentage id | ||
--minid Min. percentage id to filter the results of blastn to draw links [Default: 85] | ||
--linklen Min. link (blastn hit) length to display in plot [Default: 5000] | ||
--show_intrachr Tells the program to create a conf file showing intra chr links [Default: false] | ||
Mandatory if using only one FASTA, otherwise, links will not be shown. | ||
# GC skew config | ||
--gc_window GC skew window size [Default: 5000] | ||
--gc_step GC skew step size [Default: 5000] | ||
# Labels config | ||
--labels TSV file containing the label definitions for plotting. The file must contain | ||
3 or 4 columns as shown at http://circos.ca/documentation/tutorials/2d_tracks/text_1/lesson, | ||
"DATA FORMAT" section. The first column must be the name (ID) of the contig. | ||
# Tiles config | ||
--tiles TSV file containing the tile definitions for plotting. The file must contain | ||
3 or 4 columns as shown at http://circos.ca/documentation/tutorials/configuration/data_files. | ||
The first column must be the name (ID) of the contig. | ||
EOF | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
karyotype() | ||
{ | ||
# create dir | ||
mkdir -p ${RESULTS}/conf | ||
|
||
# write karyotypes | ||
IFS=',' | ||
while read -r FASTA FASTA_PREFIX FASTA_COLOR ; do | ||
name="$(basename $FASTA)" ; | ||
FILTERED_FASTA=${RESULTS}/filtered/"$name" ; | ||
bioawk -c fastx -v p=$FASTA_PREFIX -v color=$FASTA_COLOR \ | ||
'{ printf "chr - " substr($name,1) " " p":" substr($name,1) " " "0" " " length($seq) " " color"\n" }' \ | ||
$FILTERED_FASTA >> ${RESULTS}/conf/circos.sequences.txt ; | ||
done<"$FOFN" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
labels() | ||
{ | ||
# Create label definition | ||
read -r -d '' LABELS_CONF << EOM | ||
# Labels | ||
# to understand it more read: http://circos.ca/documentation/tutorials/2d_tracks/text_1/lesson | ||
<plot> | ||
label_snuggle = yes | ||
type = text | ||
color = black | ||
file = ${LABELS} | ||
r0 = 1r | ||
r1 = 1r+200p | ||
show_links = yes | ||
link_dims = 4p,4p,8p,4p,4p | ||
link_thickness = 5p | ||
link_color = black | ||
label_size = 30p | ||
padding = 0p | ||
rpadding = 0p | ||
</plot> | ||
EOM | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
parse_links() | ||
{ | ||
# create dir | ||
mkdir -p ${RESULTS}/conf | ||
|
||
# Filter blocks with more then N bp hits | ||
awk -v minlen=$MINLINKLEN '{ if ($8 >= minlen) { print } }' \ | ||
${RESULTS}/all_vs_all_blast/all_vs_all.blast | cut -f 1,2,3,4,5,6 >> ${RESULTS}/conf/links_concatenated.txt ; | ||
|
||
# get links comming from contigs and give it colors | ||
IFS=',' | ||
while read -r FASTA FASTA_PREFIX FASTA_COLOR ; do | ||
bioawk -c fastx '{ printf $name"\n" }' $FASTA > tmp_names.fasta ; | ||
awk -v color1=$FASTA_COLOR -F'\t' 'NR==FNR{c[$1]++;next};c[$1] > 0 {print $0 "\t" "color="color1}' \ | ||
tmp_names.fasta ${RESULTS}/conf/links_concatenated.txt >> ${RESULTS}/conf/links_concatenated_colored.txt | ||
rm tmp_names.fasta ; | ||
done<"$FOFN" | ||
|
||
# create additional file whithout intrachr links | ||
awk -F'\t' '{ if ($1 != $4) { print } }' ${RESULTS}/conf/links_concatenated_colored.txt > ${RESULTS}/conf/links_concatenated_colored_no_intrachr.txt ; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
plot_circos() | ||
{ | ||
# get current dir | ||
CURRENT_DIR=$PWD | ||
|
||
# got to conf dir | ||
cd ${RESULTS}/conf/ | ||
|
||
# draw | ||
circos | ||
|
||
# go back | ||
cd $CURRENT_DIR | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
tiles() | ||
{ | ||
# Create label definition | ||
read -r -d '' TILES_CONF << EOM | ||
# Tiles | ||
# to understand its configuration read: http://circos.ca/documentation/tutorials/2d_tracks/tiles/configuration | ||
<plot> | ||
type = tile | ||
layers_overflow = grow | ||
file = ${TILES} | ||
r1 = 0.8r | ||
r0 = 0.7r | ||
orientation = out | ||
layers = 5 | ||
margin = 0.02u | ||
thickness = 15 | ||
padding = 8 | ||
stroke_thickness = 1 | ||
stroke_color = grey | ||
<backgrounds> | ||
<background> | ||
color = vvlgrey | ||
</background> | ||
</backgrounds> | ||
</plot> | ||
EOM | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
workflow() | ||
{ | ||
## remove existing results | ||
rm -rf $RESULTS ; | ||
|
||
# Step 1 | ||
echo " # Preparing inputs!" | ||
filter ; | ||
|
||
# Step 2 | ||
echo " # Writing karyotypes!" | ||
karyotype ; | ||
|
||
# Step 3 | ||
echo " # Finding links (all vs all blastn)!" | ||
find_links ; | ||
parse_links ; | ||
|
||
# Step 4 | ||
echo " # Removing duplicate lines in conf files!" | ||
dedup ; | ||
check_links ; | ||
|
||
# Step 5 | ||
echo " # Computing GC Skew!" | ||
gc_skew ; | ||
|
||
# Step 6 | ||
# Check for labels | ||
if [ -z "$LABELS" ] | ||
then | ||
echo "" > /dev/null ; | ||
else | ||
echo " # Parsing labels!" ; | ||
labels ; | ||
fi | ||
|
||
# Step 7 | ||
# Check for tiles | ||
if [ -z "$TILES" ] | ||
then | ||
echo "" > /dev/null ; | ||
else | ||
echo " # Parsing tiles!" ; | ||
tiles ; | ||
fi | ||
|
||
# Step 6 | ||
echo " # Wrinting circos conf file!" | ||
write_circos > ${RESULTS}/conf/circos.conf ; | ||
|
||
# Step 7 | ||
echo " # Plotting circos!" | ||
plot_circos ; | ||
|
||
# Bye | ||
echo ${BYE} | ||
} |
Oops, something went wrong.