Skip to content

Commit

Permalink
Merge pull request #834 from uclahs-cds/czhu-add-fuzz-test-log
Browse files Browse the repository at this point in the history
Add fuzz test log
  • Loading branch information
zhuchcn authored Dec 23, 2023
2 parents e7a4096 + eea7e6d commit 18e8cda
Show file tree
Hide file tree
Showing 10 changed files with 31 additions and 34 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

## [Unreleased]

## [1.2.2] - 2023-10-23
## [1.2.2] - 2023-12-22

### Fixed:

Expand Down
3 changes: 3 additions & 0 deletions docs/files/fuzz_test_history.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,6 @@ v1.2.0 aca093e 2023-09-21 comprehensive 184432 1 0 0:00:00.348405 1.459285447300
v1.2.0 8bb50b1 2023-10-03 snv 298799 0 0 0:00:00.148556 0.3663789187817885 0:00:56.622936 118.54456105047444
v1.2.0 8bb50b1 2023-10-03 indel 295046 0 0 0:00:00.200385 0.41735107691824685 0:00:42.615877 100.6975160415037
v1.2.0 8bb50b1 2023-10-03 comprehensive 562642 0 0 0:00:00.360436 1.536579515902589 0:00:41.751076 182.56285717474975
v1.2.1 e7a4096 2023-11-28 snv 277035 0 0 0:00:00.138178 0.36248703508333757 0:00:56.331649 118.07408019871977
v1.2.1 e7a4096 2023-11-28 indel 275106 0 0 0:00:00.196090 0.41617068589153344 0:00:41.637490 99.04928608751558
v1.2.1 e7a4096 2023-11-28 comprehensive 522929 0 0 0:00:00.378236 15.50100807513127 0:00:41.256473 179.70483365195142
6 changes: 3 additions & 3 deletions moPepGen/aa/AminoAcidSeqRecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,8 +412,8 @@ def update_peptides(peptide):

class AminoAcidSeqRecordWithCoordinates(AminoAcidSeqRecord):
""" Amino acid sequence record with coordinates """
def __init__(self, seq:Seq, locations:List[MatchedLocation],
*args, orf:FeatureLocation=None, **kwargs ):
def __init__(self, seq:Seq, *args,
locations:List[MatchedLocation]=None, orf:FeatureLocation=None, **kwargs ):
""" Constract a DNASeqRecordWithCoordinates object.
Args:
Expand All @@ -423,7 +423,7 @@ def __init__(self, seq:Seq, locations:List[MatchedLocation],
orf (FeatureLocation): The open reading frame start and end.
"""
super().__init__(seq=seq, *args, **kwargs)
self.locations = locations
self.locations = locations or []
# query index
self.orf = orf

Expand Down
6 changes: 3 additions & 3 deletions moPepGen/dna/DNASeqRecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,8 +277,8 @@ class DNASeqRecordWithCoordinates(DNASeqRecord):
sequence is aligned tp.
orf (FeatureLocation): The open reading frame start and end.
"""
def __init__(self, seq:Seq, locations:List[MatchedLocation],
*args, orf:FeatureLocation=None, selenocysteine:List[FeatureLocation]=None,
def __init__(self, seq:Seq, *args, locations:List[MatchedLocation]=None,
orf:FeatureLocation=None, selenocysteine:List[FeatureLocation]=None,
**kwargs,):
""" Constract a DNASeqRecordWithCoordinates object.
Expand All @@ -289,7 +289,7 @@ def __init__(self, seq:Seq, locations:List[MatchedLocation],
orf (FeatureLocation): The open reading frame start and end.
"""
super().__init__(seq=seq, *args, **kwargs)
self.locations = locations
self.locations = locations or []
# query index
self.orf = orf
self.selenocysteine = selenocysteine or []
Expand Down
34 changes: 14 additions & 20 deletions moPepGen/fake.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,10 +551,9 @@ def fake_transcript_model(n_exons:int, is_coding:bool, is_selenoprotein:bool,
attributes['tag'].append('mrna_end_NF')
for i in range(n_exons):
exon_len = random.randint(min_exon_size, max_exon_size)
loc = FeatureLocation(start=offset, end=offset + exon_len, seqname=chrom)
loc = FeatureLocation(start=offset, end=offset + exon_len, seqname=chrom, strand=strand)
exon = GTFSeqFeature(
location=loc, strand=strand, type='exon', id=tx_id,
attributes=attributes, chrom=chrom
location=loc, type='exon', id=tx_id, attributes=attributes, chrom=chrom
)
exon_set.append(exon)
if is_coding:
Expand Down Expand Up @@ -586,10 +585,9 @@ def fake_transcript_model(n_exons:int, is_coding:bool, is_selenoprotein:bool,
else:
cds_end = offset + exon_len

loc = FeatureLocation(start=cds_start, end=cds_end, seqname=chrom)
loc = FeatureLocation(start=cds_start, end=cds_end, seqname=chrom, strand=strand)
cds = GTFSeqFeature(
location=loc, strand=strand, type='CDS', id=tx_id,
attributes=attributes, chrom=chrom
location=loc, type='CDS', id=tx_id, attributes=attributes, chrom=chrom
)
cds_set.append(cds)

Expand Down Expand Up @@ -661,12 +659,12 @@ def fake_transcript_model(n_exons:int, is_coding:bool, is_selenoprotein:bool,

if strand == 1:
sec_pos = sec_pos_cds - k + cds.location.start
loc = FeatureLocation(sec_pos, sec_pos + 3)
loc = FeatureLocation(sec_pos, sec_pos + 3, strand=strand)
else:
sec_pos = cds.location.end - (sec_pos_cds - k)
loc = FeatureLocation(sec_pos - 3, sec_pos)
loc = FeatureLocation(sec_pos - 3, sec_pos, strand=strand)
sec = GTFSeqFeature(
location=loc, strand=strand, type='selenocysteine', id=tx_id,
location=loc, type='selenocysteine', id=tx_id,
attributes=attributes, chrom=chrom
)
sec_set.append(sec)
Expand All @@ -677,11 +675,10 @@ def fake_transcript_model(n_exons:int, is_coding:bool, is_selenoprotein:bool,
if cds_set[0].location.start != exon_set[0].location.start:
loc = FeatureLocation(
start=exon_set[0].location.start, end=cds_set[0].location.start,
seqname=chrom
seqname=chrom, strand=strand
)
utr = GTFSeqFeature(
location=loc, strand=strand, type='UTR', id=tx_id,
attributes=attributes, chrom=chrom
location=loc, type='UTR', id=tx_id, attributes=attributes, chrom=chrom
)
utr_set.append(utr)
if strand == 1:
Expand All @@ -692,11 +689,10 @@ def fake_transcript_model(n_exons:int, is_coding:bool, is_selenoprotein:bool,
if cds_set[-1].location.end != exon_set[-1].location.end:
loc = FeatureLocation(
start=cds_set[-1].location.end, end=exon_set[-1].location.end,
seqname=chrom
seqname=chrom, strand=strand
)
utr = GTFSeqFeature(
location=loc, strand=strand, type='UTR', id=tx_id,
attributes=attributes, chrom=chrom
location=loc, type='UTR', id=tx_id, attributes=attributes, chrom=chrom
)
utr_set.append(utr)
if strand == 1:
Expand All @@ -705,12 +701,11 @@ def fake_transcript_model(n_exons:int, is_coding:bool, is_selenoprotein:bool,
five_utr_set.append(utr)

loc = FeatureLocation(
start=exon_set[0].location.start,
end=exon_set[-1].location.end,
seqname=chrom
start=exon_set[0].location.start, end=exon_set[-1].location.end,
seqname=chrom, strand=strand
)
transcript = GTFSeqFeature(
location=loc, strand=strand, type='transcript', id=tx_id,
location=loc, type='transcript', id=tx_id,
attributes=attributes, chrom=chrom
)

Expand Down Expand Up @@ -761,7 +756,6 @@ def fake_genomic_annotation(n_genes:int, chrom:str, min_exons:int, max_exons:int
loc = FeatureLocation(start=gene_start, end=gene_end, seqname=chrom)
gene_model = GeneAnnotationModel(
location=loc, chrom=chrom, transcripts=[tx_id], type='gene',
strand=strand,
attributes=copy.deepcopy(tx_model.transcript.attributes)
)
anno.genes[gene_id] = gene_model
Expand Down
2 changes: 1 addition & 1 deletion moPepGen/util/brute_force.py
Original file line number Diff line number Diff line change
Expand Up @@ -937,7 +937,7 @@ def call_peptides_main(self, variants:seqvar.VariantRecordPool,

sec_positions = [] if is_circ_rna else \
self.get_sec_positions(variant_coordinates)
variant_effects = self.check_variant_effect(seq, variant_coordinates)
variant_effects = self.check_variant_effect(str(seq), variant_coordinates)
stop_lost, stop_gain, silent_mutation = variant_effects

if not (is_coding and is_mrna_end_nf):
Expand Down
2 changes: 1 addition & 1 deletion test/files/annotation_gene.idx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
##source=GENCODE
##python=3.8.17
##moPepGen=1.2.0
##biopython=1.81
##biopython=1.82
ENSG00000128408.9 0 174 ENST00000614167.2,ENST00000614168.2
ENSG00000244486.9 12491 12694 ENST00000622235.5
ENSG00000099949.21 24586 24955 ENST00000642151.1
Expand Down
2 changes: 1 addition & 1 deletion test/files/annotation_tx.idx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
##source=GENCODE
##python=3.8.17
##moPepGen=1.2.0
##biopython=1.81
##biopython=1.82
ENST00000614167.2 174 7260 True
ENST00000614168.2 7260 12491 True
ENST00000622235.5 12694 24586 True
Expand Down
4 changes: 2 additions & 2 deletions test/unit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def create_three_frame_tvg(nodes:Dict[int,list], seq:str, graph_id:str='') -> Ty
"""
node_list:Dict[int,svgraph.TVGNode] = {}
raw_seq = Seq(seq)
seq = dna.DNASeqRecordWithCoordinates(raw_seq, [])
seq = dna.DNASeqRecordWithCoordinates(raw_seq, locations=[])
graph = svgraph.ThreeFrameTVG(seq, _id=graph_id)
graph.cleavage_params = params.CleavageParams()
for edge in copy.copy(graph.root.out_edges):
Expand Down Expand Up @@ -220,7 +220,7 @@ def create_three_frame_tvg(nodes:Dict[int,list], seq:str, graph_id:str='') -> Ty
)
seq_locations.append(seq_location)

seq = dna.DNASeqRecordWithCoordinates(_seq, seq_locations)
seq = dna.DNASeqRecordWithCoordinates(_seq, locations=seq_locations)
node = svgraph.TVGNode(seq, variants,
reading_frame_index=orf_idx, subgraph_id=graph.id)
node_list[key] = node
Expand Down
4 changes: 2 additions & 2 deletions test/unit/test_cvg.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def test_init_three_frames(self):
circ_record = create_circ_model('ENST0001', [(0,8),(10,18)], 'CIRCXXX')
seq = Seq('AATTGGCCCCGGTTAA')
locations = []
seq = dna.DNASeqRecordWithCoordinates(seq, locations)
seq = dna.DNASeqRecordWithCoordinates(seq, locations=locations)
graph = svgraph.ThreeFrameCVG(seq, 'ENST0001', circ_record=circ_record)
graph.init_three_frames()
for root in graph.reading_frames:
Expand All @@ -40,7 +40,7 @@ def test_extend_loop(self):
circ_record = create_circ_model('ENST0001', [(0,8),(10,18)], 'CIRCXXX')
seq = Seq('AATTGGCCCCGGTTAA')
locations = []
seq = dna.DNASeqRecordWithCoordinates(seq, locations)
seq = dna.DNASeqRecordWithCoordinates(seq, locations=locations)
graph = svgraph.ThreeFrameCVG(seq, 'ENST0001', circ_record=circ_record)
graph.init_three_frames()
graph.extend_loop()
Expand Down

0 comments on commit 18e8cda

Please sign in to comment.