diff --git a/iss/app.py b/iss/app.py index 10257ed..e7033aa 100644 --- a/iss/app.py +++ b/iss/app.py @@ -34,7 +34,9 @@ def generate_reads(args): logger.debug("Using verbose logger") logger.info("Starting iss generate") - error_model = load_error_model(args.mode, args.seed, args.model, args.fragment_length, args.fragment_length_sd, args.store_mutations) + error_model = load_error_model( + args.mode, args.seed, args.model, args.fragment_length, args.fragment_length_sd, args.store_mutations + ) genome_list, genome_file = load_genomes( args.genomes, args.draft, args.ncbi, args.n_genomes_ncbi, args.output, args.n_genomes @@ -54,6 +56,9 @@ def generate_reads(args): error_model, ) + if args.store_mutations: + logger.info(f"Storing inserted sequence errors in {args.output}.vcf") + logger.info("Using %s cpus for read generation" % args.cpus) if readcount_dic is not None: @@ -123,8 +128,8 @@ def generate_reads(args): if args.store_mutations: util.concatenate( temp_mut, - args.output + '.vcf', - "##fileformat=VCFv4.1\n" + "\t".join(["#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"]) + args.output + ".vcf", + "##fileformat=VCFv4.1\n" + "\t".join(["#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"]), ) full_tmp_list = temp_R1 + temp_R2 full_tmp_list.append(genome_file) @@ -135,7 +140,7 @@ def generate_reads(args): util.compress(args.output + "_R1.fastq") util.compress(args.output + "_R2.fastq") if args.store_mutations: - util.compress(args.output + '.vcf') + util.compress(args.output + ".vcf") logger.info("Read generation complete") @@ -392,11 +397,11 @@ def main(): help="Fragment length standard deviation for metagenomics sequencing (default: %(default)s).", ) parser_gen.add_argument( - '--store_mutations', - '-M', - action='store_true', + "--store_mutations", + "-M", + action="store_true", default=False, - help='Generates an additional VCF file with the mutations introduced in the reads', + help="Generates an additional VCF file with the mutations introduced in the reads", ) parser_gen._optionals.title = "arguments" parser_gen.set_defaults(func=generate_reads) diff --git a/iss/error_models/__init__.py b/iss/error_models/__init__.py index d39b098..17ea70f 100644 --- a/iss/error_models/__init__.py +++ b/iss/error_models/__init__.py @@ -96,14 +96,16 @@ def mut_sequence(self, record, orientation): np.random.choice(nucl_choices[position][nucl.upper()][0], p=nucl_choices[position][nucl.upper()][1]) ) if self.store_mutations and mutated_nuc != record.annotations["original"][position]: - record.annotations["mutations"].append({ + record.annotations["mutations"].append( + { "id": record.id, "position": position, "ref": mutable_seq[position], "alt": mutated_nuc, "quality": qual, "type": "snp", - }) + } + ) mutable_seq[position] = mutated_nuc position += 1 record.seq = Seq(mutable_seq) diff --git a/iss/generator.py b/iss/generator.py index 5de32c6..2387e54 100644 --- a/iss/generator.py +++ b/iss/generator.py @@ -72,7 +72,6 @@ def reads_generator(n_pairs, record, error_model, cpu_number, gc_bias, sequence_ i = 0 while i < n_pairs: try: - # forward, reverse = simulate_read(record, error_model, i, cpu_number, sequence_type) forward, reverse, mutations = simulate_read(record, error_model, i, cpu_number, sequence_type) except AssertionError: @@ -190,7 +189,7 @@ def simulate_read(record, error_model, i, cpu_number, sequence_type): reverse = error_model.introduce_error_scores(reverse, "reverse") reverse.seq = error_model.mut_sequence(reverse, "reverse") - return (forward, reverse) # mutations + return (forward, reverse, forward.annotations["mutations"] + reverse.annotations["mutations"]) def to_fastq(generator, output): @@ -602,13 +601,14 @@ def write_mutations(mutations, mutations_handle): "\t".join( [ str(vcf_dict["id"]), - str(vcf_dict["position"] + 1), # vcf files have 1-based index + str(vcf_dict["position"] + 1), # vcf files have 1-based index ".", vcf_dict["ref"], str(vcf_dict["alt"]), str(vcf_dict["quality"]), "", - "" + "", ] - ) + "\n" - ) \ No newline at end of file + ) + + "\n" + ) diff --git a/iss/test/test_generator.py b/iss/test/test_generator.py index 37d2811..eaca114 100644 --- a/iss/test/test_generator.py +++ b/iss/test/test_generator.py @@ -13,18 +13,17 @@ from iss.error_models import basic, kde from iss.util import cleanup -# due to inconsistent seeding between python 2 and 3, some of the following -# tests are disabled with python2 - - -def teardown_cleanup(): - cleanup(["data/.test.iss.tmp.my_genome.0_R1.fastq", "data/.test.iss.tmp.my_genome.0_R2.fastq"]) - @pytest.fixture def setup_and_teardown(): yield - cleanup(["data/.test.iss.tmp.my_genome.0_R1.fastq", "data/.test.iss.tmp.my_genome.0_R2.fastq"]) + cleanup( + [ + "data/.test.iss.tmp.my_genome.0_R1.fastq", + "data/.test.iss.tmp.my_genome.0_R2.fastq", + "data/.test.iss.tmp.my_genome.tsv", + ] + ) def test_cleanup_fail(): @@ -43,7 +42,10 @@ def test_simulate_and_save(setup_and_teardown): ref_genome = SeqRecord(Seq(str("AAAAACCCCC" * 100)), id="my_genome", description="test genome") forward_handle = open("data/.test.iss.tmp.my_genome.0_R1.fastq", "w") reverse_handle = open("data/.test.iss.tmp.my_genome.0_R2.fastq", "w") - generator.simulate_reads(ref_genome, err_mod, 1000, 0, forward_handle, reverse_handle, "metagenomics", gc_bias=True) + mutations_handle = open("data/.test.iss.tmp.my_genome.tsv", "w") + generator.simulate_reads( + ref_genome, err_mod, 1000, 0, forward_handle, reverse_handle, mutations_handle, "metagenomics", gc_bias=True + ) def test_simulate_and_save_short(setup_and_teardown): @@ -51,7 +53,10 @@ def test_simulate_and_save_short(setup_and_teardown): ref_genome = SeqRecord(Seq(str("AACCC" * 100)), id="my_genome", description="test genome") forward_handle = open("data/.test.iss.tmp.my_genome.0_R1.fastq", "w") reverse_handle = open("data/.test.iss.tmp.my_genome.0_R2.fastq", "w") - generator.simulate_reads(ref_genome, err_mod, 1000, 0, forward_handle, reverse_handle, "metagenomics", gc_bias=True) + mutations_handle = open("data/.test.iss.tmp.my_genome.tsv", "w") + generator.simulate_reads( + ref_genome, err_mod, 1000, 0, forward_handle, reverse_handle, mutations_handle, "metagenomics", gc_bias=True + ) def test_small_input(): @@ -88,7 +93,6 @@ def test_kde_short(with_seed): def test_amp(with_seed): err_mod = kde.KDErrorModel(os.path.join(os.path.dirname(__file__), "../profiles/MiSeq"), 1000, 10) - print(err_mod.read_length) ref_genome = SeqRecord( Seq( ( diff --git a/iss/util.py b/iss/util.py index 222f0dc..580ee21 100644 --- a/iss/util.py +++ b/iss/util.py @@ -210,7 +210,7 @@ def reservoir(records, record_list, n=None): yield record -def concatenate(file_list, output, header = None): +def concatenate(file_list, output, header=None): """Concatenate files together Args: