Skip to content

Commit

Permalink
Merge pull request #293 from neherlab/fix/parsimony-reconstruction
Browse files Browse the repository at this point in the history
fix: reconstruction of ambiguous nucs in ancestral parsimony mode
  • Loading branch information
ivan-aksamentov authored Dec 6, 2024
2 parents dd992e4 + d7eeb0f commit f676392
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 30 deletions.
58 changes: 31 additions & 27 deletions packages/treetime/src/alphabet/alphabet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ pub enum AlphabetName {
}

pub type ProfileMap = IndexMap<char, Array1<f64>>;
pub type StateSetMap = IndexMap<char, StateSet>;
pub type CharToSet = IndexMap<char, StateSet>;
pub type SetToChar = IndexMap<StateSet, char>;

#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Alphabet {
Expand All @@ -43,6 +46,11 @@ pub struct Alphabet {
treat_gap_as_unknown: bool,
profile_map: ProfileMap,

#[serde(skip)]
char_to_set: IndexMap<char, StateSet>,
#[serde(skip)]
set_to_char: IndexMap<StateSet, char>,

#[serde(skip)]
char_to_index: Vec<Option<usize>>,
#[serde(skip)]
Expand Down Expand Up @@ -124,6 +132,18 @@ impl Alphabet {
index_to_char.push(c);
}

let char_to_set = {
let mut char_to_set: CharToSet = canonical.iter().map(|c| (c, StateSet::from_char(c))).collect();
ambiguous.iter().for_each(|(key, chars)| {
char_to_set.insert(*key, StateSet::from_iter(chars));
});
char_to_set.insert(*gap, StateSet::from_char(*gap));
char_to_set.insert(*unknown, StateSet::from_char(*unknown));
char_to_set
};

let set_to_char: SetToChar = char_to_set.iter().map(|(&c, &s)| (s, c)).collect();

Ok(Self {
all,
char_to_index,
Expand All @@ -137,25 +157,11 @@ impl Alphabet {
gap: *gap,
treat_gap_as_unknown: *treat_gap_as_unknown,
profile_map,
char_to_set,
set_to_char,
})
}

/// Resolve possible ambiguity of the given character to the set of canonical chars
pub fn disambiguate(&self, c: char) -> StateSet {
// If unknown then could be any canonical (e.g. N => { A, C, G, T })
if self.is_unknown(c) {
self.canonical().collect()
}
// If ambiguous (e.g. R => { A, G })
else if let Some(resolutions) = self.ambiguous.get(&c) {
resolutions.iter().copied().collect()
}
// Otherwise it's not ambiguous and it's the char itself (incl. gap)
else {
once(c).collect()
}
}

#[inline]
pub fn get_profile(&self, c: char) -> &Array1<f64> {
self
Expand All @@ -178,7 +184,7 @@ impl Alphabet {
{
let mut profile = Array1::<f64>::zeros(self.n_canonical());
for c in chars {
let chars = self.disambiguate(*c.borrow());
let chars = self.char_to_set(*c.borrow());
for c in chars.iter() {
let index = self.index(c);
profile[index] = 1.0;
Expand Down Expand Up @@ -206,6 +212,14 @@ impl Alphabet {
Ok(prof)
}

pub fn set_to_char(&self, c: StateSet) -> char {
self.set_to_char[&c]
}

pub fn char_to_set(&self, c: char) -> StateSet {
self.char_to_set[&c]
}

/// All existing characters (including 'unknown' and 'gap')
pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
self.all.iter()
Expand Down Expand Up @@ -466,16 +480,6 @@ mod tests {
use indoc::indoc;
use pretty_assertions::assert_eq;

#[test]
fn test_disambiguate() -> Result<(), Report> {
let alphabet = Alphabet::new(AlphabetName::Nuc, false)?;
assert_eq!(stateset! {'A', 'G'}, alphabet.disambiguate('R'));
assert_eq!(stateset! {'A', 'C', 'G', 'T'}, alphabet.disambiguate('N'));
assert_eq!(stateset! {'C'}, alphabet.disambiguate('C'));
assert_eq!(stateset! {alphabet.gap()}, alphabet.disambiguate(alphabet.gap()));
Ok(())
}

#[test]
fn test_alphabet_nuc() -> Result<(), Report> {
let alphabet = Alphabet::new(AlphabetName::Nuc, false)?;
Expand Down
3 changes: 1 addition & 2 deletions packages/treetime/src/commands/ancestral/fitch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -509,8 +509,7 @@ pub fn ancestral_reconstruction_fitch(
}

for (&pos, &states) in &node.fitch.variable {
seq[pos] = states.first().unwrap();
// seq[pos] = alphabet.ambiguate(states).first().unwrap();
seq[pos] = alphabet.set_to_char(states);
}

node.sequence = seq.clone();
Expand Down
2 changes: 1 addition & 1 deletion packages/treetime/src/representation/graph_sparse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ impl SparseSeqNode {
.iter()
.enumerate()
.filter(|(_, &c)| alphabet.is_ambiguous(c))
.map(|(pos, &c)| (pos, alphabet.disambiguate(c)))
.map(|(pos, &c)| (pos, alphabet.char_to_set(c)))
.collect();

let seq_dis = ParsimonySeqDis {
Expand Down

0 comments on commit f676392

Please sign in to comment.