From 21873655c7bf961a707ec683614ace6527e3f536 Mon Sep 17 00:00:00 2001 From: Benjamin Wingfield Date: Fri, 15 Dec 2023 14:13:30 +0000 Subject: [PATCH] update effect allele class --- pgscatalog_utils/scorefile/effectallele.py | 43 ++++++++++++++++++---- pgscatalog_utils/scorefile/qc.py | 2 +- 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/pgscatalog_utils/scorefile/effectallele.py b/pgscatalog_utils/scorefile/effectallele.py index 6f0dfcb..a72e3d1 100644 --- a/pgscatalog_utils/scorefile/effectallele.py +++ b/pgscatalog_utils/scorefile/effectallele.py @@ -1,10 +1,31 @@ class EffectAllele: - _valid_bases = frozenset({"A", "C", "T", "G"}) - __slots__ = ("allele", "is_valid") + """A class that represents an effect allele found in PGS Catalog scoring files - def __init__(self, allele: str): - self.allele = allele - self.is_valid = self.is_valid_allele() + The allele that's dosage is counted (e.g. {0, 1, 2}) and multiplied by the variant's + weight (effect_weight) when calculating score. The effect allele is also known as + the 'risk allele'. + >>> simple_ea = EffectAllele("A") + >>> simple_ea + EffectAllele("A") + >>> simple_ea.is_snp + True + >>> str(simple_ea) + 'A' + >>> EffectAllele("AG") + EffectAllele("AG") + >>> hla_example = EffectAllele("+") + >>> hla_example + EffectAllele("+") + >>> hla_example.is_snp + False + """ + + _valid_snp_bases = frozenset({"A", "C", "T", "G"}) + __slots__ = ("allele", "is_snp") + + def __init__(self, allele): + self.allele = str(allele) + self.is_snp = self._is_snp() def __repr__(self): return f'{type(self).__name__}("{self.allele}")' @@ -12,5 +33,13 @@ def __repr__(self): def __str__(self): return self.allele - def is_valid_allele(self) -> bool: - return not frozenset(self.allele) - self._valid_bases + def _is_snp(self) -> bool: + """SNPs are the most common type of effect allele. More complex effect + alleles, like HLAs or APOE genes, often require extra work to represent in + genomes. Users should be warned about complex effect alleles. + >>> EffectAllele("+")._is_snp() + False + >>> EffectAllele("A")._is_snp() + True + """ + return not frozenset(self.allele) - self._valid_snp_bases diff --git a/pgscatalog_utils/scorefile/qc.py b/pgscatalog_utils/scorefile/qc.py index 50fcb52..526fda2 100644 --- a/pgscatalog_utils/scorefile/qc.py +++ b/pgscatalog_utils/scorefile/qc.py @@ -195,7 +195,7 @@ def check_effect_allele( ) -> typing.Generator[ScoreVariant, None, None]: n_bad = 0 for variant in variants: - if not variant.effect_allele.is_valid: + if not variant.effect_allele.is_snp: n_bad += 1 yield variant