fix d/l paths for RefSeq non-redundant proteins

fix #280
bioinformatics-centre · Mar 2, 2024 · a3746da · a3746da
1 parent 8c4ffba
commit a3746da
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 4 deletions.
diff --git a/README.md b/README.md
@@ -102,7 +102,7 @@ respective index and for creating the index (in brackets).
 | Index name | Description | Sequences<sup>\*</sup> | RAM in GB (makedb)<sup>\*</sup> |
 | --- | --- | --- | --- |
 | `refseq` | Completely assembled and annotated reference genomes of Archaea, Bacteria, and viruses from the NCBI RefSeq database. |  127 M |  87 (112) |
-| `refseq_nr` | Sequences for Archaea, Bacteria, viruses and microbial eukaryotes from the NCBI RefSeq non-redundant protein collection. |  210 M |  116 (194) |
+| `refseq_nr` | Sequences for Archaea, Bacteria, viruses and microbial eukaryotes from the [NCBI RefSeq non-redundant protein collection](https://www.ncbi.nlm.nih.gov/refseq/about/nonredundantproteins/). |  210 M |  116 (194) |
 | `refseq_ref` | Protein sequences from representative assemblies of Archaea and bacteria from NCBI RefSeq plus viruses from NCBI RefSeq. |  69 M |  49 (63) |
 | `progenomes` |  Representative set of genomes from the [proGenomes](http://progenomes.embl.de/) database and viruses from the NCBI RefSeq database. |  141 M | 102 (120) |
 | `viruses` |  Only viruses from the NCBI RefSeq database. | 0.65 M | 0.5  (0.5) |

diff --git a/util/kaiju-makedb b/util/kaiju-makedb
@@ -300,8 +300,8 @@ then
 		echo "Downloading nonredundant protein files"
 		for i in `seq 1 999`
 		do
-			if ! wget -P $DB/source -nc -nv 'https://ftp.ncbi.nlm.nih.gov/refseq/release/complete/complete.nonredundant_protein.'$i'.protein.faa.gz'
-			then 
+			if ! wget -P $DB/source -nc -nv 'https://ftp.ncbi.nlm.nih.gov/refseq/release/complete/complete.wp_protein.'$i'.protein.faa.gz'
+			then
 				break
 			fi
 		done
@@ -312,7 +312,7 @@ then
 	if [ $index_only -eq 0 ]
 	then
 		echo "Converting RefSeq non-redundant proteins to Kaiju database"
-		gunzip -c $DB/source/complete.nonredundant_protein.*.protein.faa.gz | kaiju-convertRefSeq -l $SCRIPTDIR/kaiju-taxonlistEuk.tsv -m merged.dmp -t nodes.dmp -g $DB/prot.accession2taxid.FULL.gz -a -o $DB/kaiju_db_$DB.faa 2>log
+		gunzip -c $DB/source/complete.wp_protein.*.protein.faa.gz | kaiju-convertRefSeq -l $SCRIPTDIR/kaiju-taxonlistEuk.tsv -m merged.dmp -t nodes.dmp -g $DB/prot.accession2taxid.FULL.gz -a -o $DB/kaiju_db_$DB.faa 2>log
 	fi
 	[ -r $DB/kaiju_db_$DB.faa ] || { echo Missing file $DB/kaiju_db_$DB.faa; exit 1; }
 	echo Creating BWT from Kaiju database