Skip to content

Commit

Permalink
Merge branch 'master' of github.com:oushujun/EDTA into oushujun-master
Browse files Browse the repository at this point in the history
  • Loading branch information
jguhlin committed Dec 4, 2024
2 parents 3666e08 + 4f0424b commit 23d3d06
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 31 deletions.
10 changes: 5 additions & 5 deletions EDTA.pl
Original file line number Diff line number Diff line change
Expand Up @@ -686,8 +686,8 @@
print "$date\tEDTA final stage finished! You may check out:
The final EDTA TE library: $genome.EDTA.TElib.fa\n";
print " Family names of intact TEs have been updated by $HQlib: $genome.EDTA.intact.gff3\n" if $HQlib ne '';
print "\tComparing to the provided library, EDTA found these novel TEs: $genome.EDTA.TElib.novel.fa
\tThe provided library has been incorporated into the final library: $genome.EDTA.TElib.fa\n\n" if $HQlib ne '';
print " Comparing to the provided library, EDTA found these novel TEs: $genome.EDTA.TElib.novel.fa
The provided library has been incorporated into the final library: $genome.EDTA.TElib.fa\n\n" if $HQlib ne '';
chdir "..";


Expand Down Expand Up @@ -826,9 +826,9 @@
die "ERROR: TE annotation stats results not found in $genome.EDTA.TE.fa.stat!\n\n" unless -s "$genome.EDTA.TE.fa.stat";
chomp ($date = `date`);
print "$date\tEvaluation of TE annotation finished! Check out these files:\n
Overall: $genome.EDTA.TE.fa.stat.all.sum
Nested: $genome.EDTA.TE.fa.stat.nested.sum
Non-nested: $genome.EDTA.TE.fa.stat.redun.sum\n\n";
Overall: $genome.EDTA.TE.fa.stat.all.sum
Nested: $genome.EDTA.TE.fa.stat.nested.sum
Non-nested: $genome.EDTA.TE.fa.stat.redun.sum\n\n";
}

print "\t\tIf you want to learn more about the formatting and information of these files, please visit:
Expand Down
31 changes: 7 additions & 24 deletions EDTA_raw.pl
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,11 @@
my $trf = ''; #path to trf
my $GRF = ''; #path to GRF
my $annosine = ""; #path to the AnnoSINE program

# my $TIR_Learner = "$script_path/bin/TIR-Learner3/"; #tianyulu
# my $LTR_FINDER = "$script_path/bin/LTR_FINDER_parallel/LTR_FINDER_parallel"; #tianyulu
# my $LTR_HARVEST = "$script_path/bin/LTR_HARVEST_parallel/LTR_HARVEST_parallel"; #tianyulu
my $HelitronScanner_Runner = "$script_path/bin/run_helitron_scanner.sh";

my $TIR_Learner = ""; #path to TIR-Learner3 program #tianyulu
my $LTR_FINDER = ""; #path to LTR_FINDER_parallel program #tianyulu
my $LTR_HARVEST = ""; #path to LTR_HARVEST_parallel program #tianyulu
my $HelitronScanner = ""; #path to HelitronScanner program #tianyulu
my $HelitronScanner_Runner = "$script_path/bin/run_helitron_scanner.sh";

my $help = undef;

Expand Down Expand Up @@ -160,9 +155,6 @@
print STDERR "$date\tEDTA_raw: Check dependencies, prepare working directories.\n\n";

# check files and dependencies
# die "The LTR_FINDER_parallel is not found in $LTR_FINDER!\n" unless -s $LTR_FINDER; #tianyulu
# die "The LTR_HARVEST_parallel is not found in $LTR_HARVEST!\n" unless -s $LTR_HARVEST; #tianyulu
# die "The TIR_Learner is not found in $TIR_Learner!\n" unless -s "$TIR_Learner/bin/main.py"; #tianyulu
die "The script get_range.pl is not found in $get_range!\n" unless -s $get_range;
die "The script rename_LTR_skim.pl is not found in $rename_LTR!\n" unless -s $rename_LTR;
die "The script filter_gff3.pl is not found in $filter_gff!\n" unless -s $filter_gff;
Expand All @@ -171,7 +163,6 @@
die "The script rename_tirlearner.pl is not found in $rename_tirlearner!\n" unless -s $rename_tirlearner;
die "The script cleanup_tandem.pl is not found in $cleanup_tandem!\n" unless -s $cleanup_tandem;
die "The script get_ext_seq.pl is not found in $get_ext_seq!\n" unless -s $get_ext_seq;
# die "The HelitronScanner is not found in $HelitronScanner!\n" unless -s $HelitronScanner; #tianyulu
die "The script format_helitronscanner_out.pl is not found in $format_helitronscanner!\n" unless -s $format_helitronscanner;
die "The script flanking_filter.pl is not found in $flank_filter!\n" unless -s $flank_filter;
die "The script bed2gff.pl is not found in $bed2gff!\n" unless -s $bed2gff;
Expand All @@ -190,7 +181,6 @@
$repeatmasker = dirname($repeatmasker) unless -d $repeatmasker;
$repeatmasker="$repeatmasker/" if $repeatmasker ne '' and $repeatmasker !~ /\/$/;
die "Error: RepeatMasker is not found in the RepeatMasker path $repeatmasker!\n" unless -X "${repeatmasker}RepeatMasker";
# `cp $script_path/database/dummy060817.fa ./dummy060817.fa.$rand`;
`cp \"$script_path/database/dummy060817.fa\" ./dummy060817.fa.$rand`; #tianyulu
my $RM_test=`${repeatmasker}RepeatMasker -e ncbi -q -pa 1 -no_is -nolow dummy060817.fa.$rand -lib dummy060817.fa.$rand 2>/dev/null`;
die "Error: The RMblast engine is not installed in RepeatMasker!\n" unless $RM_test=~s/done//gi;
Expand Down Expand Up @@ -245,29 +235,17 @@
`${grfp}grf-main 2>/dev/null`;
die "Error: The Generic Repeat Finder (GRF) is not found in the GRF path: $grfp\n" if $?==32256;

# # TIR-Learner3
# chomp ($TIR_Learner = `which TIR-Learner 2>/dev/null`) if $TIR_Learner eq '';
# $TIR_Learner =~ s/\n$//;
# my $tirp= dirname ($TIR_Learner);
# $tirp =~ s/\n$//;
# `${tirp}TIR-Learner 2>/dev/null`;
# die "Error: TIR-Learner3 is not found in the TIR-Learner path: $tirp\n" if $?==32256;

# TIR-Learner #tianyuLu
# Remove any trailing whitespace
$TIR_Learner =~ s/\s+$//;
if ($TIR_Learner eq "") {
# Find TIR-Learner path and remove any trailing newline
chomp ($TIR_Learner=`command -v TIR-Learner 2>/dev/null`);
die "Error: TIR-Learner not installed!\n" if $TIR_Learner eq "";
# $TIR_Learner =~ s/\s+$//;
} else {
# # Extract directory name from path if path is not a directory
# $TIR_Learner = dirname($TIR_Learner) unless -d $TIR_Learner;
# If path is directory
if (-d $TIR_Learner) {
# # Add trailing slash if path not empty string and not already end with slash
# $TIR_Learner .= "/" if $TIR_Learner ne "" and $TIR_Learner !~ /\/$/;
# Add trailing slash if path not already end with slash
$TIR_Learner .= "/" if $TIR_Learner !~ /\/$/;
$TIR_Learner = "python3 $TIR_Learner/TIR-Learner.py";
Expand Down Expand Up @@ -715,7 +693,12 @@
print STDERR "$date\tIdentify Helitron candidates from scratch.\n\n";

# run HelitronScanner
`$HelitronScanner_Runner $genome $threads \"$HelitronScanner\"`;
if ($overwrite eq 0 and (-s "$genome.HelitronScanner.draw.hel.fa" and -s "$genome.HelitronScanner.draw.rc.hel.fa")){
#cat $genome.HelitronScanner.draw.hel.fa $genome.HelitronScanner.draw.rc.hel.fa
print STDERR "$date\tExisting HelitronScanner result files $genome.HelitronScanner.draw.hel.fa $genome.HelitronScanner.draw.rc.hel.fa found!\n\t\tWill keep these files without rerunning HelitronScanner\n\t\tPlease specify --overwrite 1 if you want to rerun this module.\n\n";
} else {
`$HelitronScanner_Runner $genome $threads \"$HelitronScanner\"`;
}

# filter candidates based on repeatness of flanking regions
`perl $format_helitronscanner -genome $genome -sitefilter 1 -minscore 12 -keepshorter 1 -extlen 30 -extout 1`;
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ The default `conda env` name is `EDTA2` specified by the first line of the yml f

2. Install by specifying all dependencies:

`mamba create -n EDTA2.2 -c conda-forge -c bioconda -c r annosine2 biopython cd-hit coreutils genericrepeatfinder genometools-genometools glob2 tir-learner ltr_finder_parallel ltr_retriever mdust multiprocess muscle openjdk perl perl-text-soundex r-base r-dplyr regex repeatmodeler r-ggplot2 r-here r-tidyr tesorter samtools bedtools LTR_HARVEST_parallel HelitronScanner`
`mamba create -n EDTA2.2 -c conda-forge -c bioconda annosine2 biopython cd-hit coreutils genericrepeatfinder genometools-genometools glob2 tir-learner ltr_finder_parallel ltr_retriever mdust multiprocess muscle openjdk perl perl-text-soundex r-base r-dplyr regex repeatmodeler r-ggplot2 r-here r-tidyr tesorter samtools bedtools LTR_HARVEST_parallel HelitronScanner`

</details>

Expand Down
2 changes: 1 addition & 1 deletion bin/TE_Sequence_Ontology.txt
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ L1_LINE_retrotransposon SO:0002272 L1_LINE_retrotransposon,LINE/L1,LINE/RIL,nonL
I_LINE_retrotransposon SO:0002273 I_LINE_retrotransposon,LINE/I,LINE/RII,nonLTR/RII,LINE/I-Nimb,LINE/I_Nimb
RTE_LINE_retrotransposon SO:0002270 RTE_LINE_retrotransposon,LINE/RTE,LINE/RIT,nonLTR/RIT,RIT,LINE/RTEX,LINE/RTE-X,LINE/RTE-BovB,LINE/RTE_BovB,LINE/RTE_X,LINE/RTE-RTE,LINE/RTE_RTE
SINE_element SO:0000206 SINE_element,SINE/unknown,SINE,SINE/Unknown,SINE?,SINE?/NA,SINE/U,SINE/NA
tRNA_SINE_retrotransposon SO:0002274 tRNA_SINE_retrotransposon,SINE/tRNA,SINE/RST,nonLTR/RST,RST,tRNA,SINE2/tRNA,SINE/tRNA-Core-RTE,SINE/tRNA-V-CR1,tRNA,SINE/tRNA_CR1,SINE/tRNA_RTE,SINE/tRNA_V_RTE,SINE/tRNA-V-RTE,SINE/tRNA-CR1,SINE/tRNA_Core_RTE,SINE/tRNA_V_CR1,SINE/tRNA-RTE
tRNA_SINE_retrotransposon SO:0002274 tRNA_SINE_retrotransposon,SINE/tRNA,SINE/RST,nonLTR/RST,RST,tRNA,SINE2/tRNA,SINE/tRNA-Core-RTE,SINE/tRNA-V-CR1,tRNA,SINE/tRNA_CR1,SINE/tRNA_RTE,SINE/tRNA_V_RTE,SINE/tRNA-V-RTE,SINE/tRNA-CR1,SINE/tRNA_Core_RTE,SINE/tRNA_V_CR1,SINE/tRNA-RTE,tRNA/NA
5S_SINE_retrotransposon SO:0002276 5S_SINE_retrotransposon,SINE/5S,SINE/RSS,nonLTR/RSS,RSS,SINE3/5S,SINE/5S-Deu-L2,SINE/5S_Deu_L2
7SL_SINE_retrotransposon SO:0002275 7SL_SINE_retrotransposon,SINE/7SL,SINE/RSL,nonLTR/RSL,RSL,SINE1/7SL
YR_retrotransposon SO:0002286 YR_retrotransposon,YR/unknown,YR/Unknown
Expand Down

0 comments on commit 23d3d06

Please sign in to comment.