Skip to content

Commit

Permalink
format and fix
Browse files Browse the repository at this point in the history
  • Loading branch information
ThijsMaas committed Feb 5, 2024
1 parent 8633b9f commit 968d6e7
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 28 deletions.
21 changes: 13 additions & 8 deletions iss/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ def generate_reads(args):
logger.debug("Using verbose logger")
logger.info("Starting iss generate")

error_model = load_error_model(args.mode, args.seed, args.model, args.fragment_length, args.fragment_length_sd, args.store_mutations)
error_model = load_error_model(
args.mode, args.seed, args.model, args.fragment_length, args.fragment_length_sd, args.store_mutations
)

genome_list, genome_file = load_genomes(
args.genomes, args.draft, args.ncbi, args.n_genomes_ncbi, args.output, args.n_genomes
Expand All @@ -54,6 +56,9 @@ def generate_reads(args):
error_model,
)

if args.store_mutations:
logger.info(f"Storing inserted sequence errors in {args.output}.vcf")

logger.info("Using %s cpus for read generation" % args.cpus)

if readcount_dic is not None:
Expand Down Expand Up @@ -123,8 +128,8 @@ def generate_reads(args):
if args.store_mutations:
util.concatenate(
temp_mut,
args.output + '.vcf',
"##fileformat=VCFv4.1\n" + "\t".join(["#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"])
args.output + ".vcf",
"##fileformat=VCFv4.1\n" + "\t".join(["#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"]),
)
full_tmp_list = temp_R1 + temp_R2
full_tmp_list.append(genome_file)
Expand All @@ -135,7 +140,7 @@ def generate_reads(args):
util.compress(args.output + "_R1.fastq")
util.compress(args.output + "_R2.fastq")
if args.store_mutations:
util.compress(args.output + '.vcf')
util.compress(args.output + ".vcf")
logger.info("Read generation complete")


Expand Down Expand Up @@ -392,11 +397,11 @@ def main():
help="Fragment length standard deviation for metagenomics sequencing (default: %(default)s).",
)
parser_gen.add_argument(
'--store_mutations',
'-M',
action='store_true',
"--store_mutations",
"-M",
action="store_true",
default=False,
help='Generates an additional VCF file with the mutations introduced in the reads',
help="Generates an additional VCF file with the mutations introduced in the reads",
)
parser_gen._optionals.title = "arguments"
parser_gen.set_defaults(func=generate_reads)
Expand Down
6 changes: 4 additions & 2 deletions iss/error_models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,16 @@ def mut_sequence(self, record, orientation):
np.random.choice(nucl_choices[position][nucl.upper()][0], p=nucl_choices[position][nucl.upper()][1])
)
if self.store_mutations and mutated_nuc != record.annotations["original"][position]:
record.annotations["mutations"].append({
record.annotations["mutations"].append(
{
"id": record.id,
"position": position,
"ref": mutable_seq[position],
"alt": mutated_nuc,
"quality": qual,
"type": "snp",
})
}
)
mutable_seq[position] = mutated_nuc
position += 1
record.seq = Seq(mutable_seq)
Expand Down
12 changes: 6 additions & 6 deletions iss/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ def reads_generator(n_pairs, record, error_model, cpu_number, gc_bias, sequence_
i = 0
while i < n_pairs:
try:
# forward, reverse = simulate_read(record, error_model, i, cpu_number, sequence_type)
forward, reverse, mutations = simulate_read(record, error_model, i, cpu_number, sequence_type)

except AssertionError:
Expand Down Expand Up @@ -190,7 +189,7 @@ def simulate_read(record, error_model, i, cpu_number, sequence_type):
reverse = error_model.introduce_error_scores(reverse, "reverse")
reverse.seq = error_model.mut_sequence(reverse, "reverse")

return (forward, reverse) # mutations
return (forward, reverse, forward.annotations["mutations"] + reverse.annotations["mutations"])


def to_fastq(generator, output):
Expand Down Expand Up @@ -602,13 +601,14 @@ def write_mutations(mutations, mutations_handle):
"\t".join(
[
str(vcf_dict["id"]),
str(vcf_dict["position"] + 1), # vcf files have 1-based index
str(vcf_dict["position"] + 1), # vcf files have 1-based index
".",
vcf_dict["ref"],
str(vcf_dict["alt"]),
str(vcf_dict["quality"]),
"",
""
"",
]
) + "\n"
)
)
+ "\n"
)
26 changes: 15 additions & 11 deletions iss/test/test_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,17 @@
from iss.error_models import basic, kde
from iss.util import cleanup

# due to inconsistent seeding between python 2 and 3, some of the following
# tests are disabled with python2


def teardown_cleanup():
cleanup(["data/.test.iss.tmp.my_genome.0_R1.fastq", "data/.test.iss.tmp.my_genome.0_R2.fastq"])


@pytest.fixture
def setup_and_teardown():
yield
cleanup(["data/.test.iss.tmp.my_genome.0_R1.fastq", "data/.test.iss.tmp.my_genome.0_R2.fastq"])
cleanup(
[
"data/.test.iss.tmp.my_genome.0_R1.fastq",
"data/.test.iss.tmp.my_genome.0_R2.fastq",
"data/.test.iss.tmp.my_genome.tsv",
]
)


def test_cleanup_fail():
Expand All @@ -43,15 +42,21 @@ def test_simulate_and_save(setup_and_teardown):
ref_genome = SeqRecord(Seq(str("AAAAACCCCC" * 100)), id="my_genome", description="test genome")
forward_handle = open("data/.test.iss.tmp.my_genome.0_R1.fastq", "w")
reverse_handle = open("data/.test.iss.tmp.my_genome.0_R2.fastq", "w")
generator.simulate_reads(ref_genome, err_mod, 1000, 0, forward_handle, reverse_handle, "metagenomics", gc_bias=True)
mutations_handle = open("data/.test.iss.tmp.my_genome.tsv", "w")
generator.simulate_reads(
ref_genome, err_mod, 1000, 0, forward_handle, reverse_handle, mutations_handle, "metagenomics", gc_bias=True
)


def test_simulate_and_save_short(setup_and_teardown):
err_mod = basic.BasicErrorModel()
ref_genome = SeqRecord(Seq(str("AACCC" * 100)), id="my_genome", description="test genome")
forward_handle = open("data/.test.iss.tmp.my_genome.0_R1.fastq", "w")
reverse_handle = open("data/.test.iss.tmp.my_genome.0_R2.fastq", "w")
generator.simulate_reads(ref_genome, err_mod, 1000, 0, forward_handle, reverse_handle, "metagenomics", gc_bias=True)
mutations_handle = open("data/.test.iss.tmp.my_genome.tsv", "w")
generator.simulate_reads(
ref_genome, err_mod, 1000, 0, forward_handle, reverse_handle, mutations_handle, "metagenomics", gc_bias=True
)


def test_small_input():
Expand Down Expand Up @@ -88,7 +93,6 @@ def test_kde_short(with_seed):

def test_amp(with_seed):
err_mod = kde.KDErrorModel(os.path.join(os.path.dirname(__file__), "../profiles/MiSeq"), 1000, 10)
print(err_mod.read_length)
ref_genome = SeqRecord(
Seq(
(
Expand Down
2 changes: 1 addition & 1 deletion iss/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def reservoir(records, record_list, n=None):
yield record


def concatenate(file_list, output, header = None):
def concatenate(file_list, output, header=None):
"""Concatenate files together
Args:
Expand Down

0 comments on commit 968d6e7

Please sign in to comment.