From 865345f4d047f13f5b1dca30783576cd0acaea70 Mon Sep 17 00:00:00 2001 From: Richard Neher Date: Thu, 26 Dec 2024 13:17:41 +0100 Subject: [PATCH] fix: marginal reconstruction needs use the most likely state, not the fitch state --- .../src/commands/ancestral/marginal_sparse.rs | 31 ++++++------------- test_scripts/ancestral_sparse.py | 5 +-- 2 files changed, 10 insertions(+), 26 deletions(-) diff --git a/packages/treetime/src/commands/ancestral/marginal_sparse.rs b/packages/treetime/src/commands/ancestral/marginal_sparse.rs index 06e46eee..69880d42 100644 --- a/packages/treetime/src/commands/ancestral/marginal_sparse.rs +++ b/packages/treetime/src/commands/ancestral/marginal_sparse.rs @@ -414,10 +414,12 @@ pub fn ancestral_reconstruction_marginal_sparse( return; } + dbg!(&node.payload.name); let seq: Seq = (0..n_partitions) .flat_map(|si| { let PartitionLikelihood { alphabet, .. } = &partitions[si]; let node_seq = &node.payload.sparse_partitions[si].seq; + let node_profile = &node.payload.sparse_partitions[si].profile; let mut seq = if node.is_root { node_seq.sequence.clone() @@ -433,11 +435,6 @@ pub fn ancestral_reconstruction_marginal_sparse( seq[m.pos()] = m.qry(); } - // Implant most likely state of variable sites - for (&pos, states) in &node.payload.sparse_partitions[si].seq.fitch.variable { - seq[pos] = states.get_one(); - } - // Implant indels for indel in &edge.indels { if indel.deletion { @@ -456,8 +453,10 @@ pub fn ancestral_reconstruction_marginal_sparse( seq[r.0..r.1].fill(ambig_char); } - for (pos, states) in &node_seq.fitch.variable { - seq[*pos] = states.get_one(); + // change variable sites to their most likely state + for (pos, states) in &node_profile.variable { + dbg!(pos, &states); + seq[*pos] = alphabet.char(states.dis.argmax().unwrap()); } node.payload.sparse_partitions[si].seq.sequence = seq.clone(); @@ -501,16 +500,10 @@ mod tests { let aln = read_many_fasta_str( indoc! {r#" - >root - ACAGCCATGTATTG-- - >AB - ACATCCCTGTA-TG-- >A ACATCGCCNNA--GAC >B GCATCCCTGTA-NG-- - >CD - CCGGCCATGTATTG-- >C CCGGCGATGTRTTG-- >D @@ -522,11 +515,11 @@ mod tests { let expected = read_many_fasta_str( indoc! {r#" >root - ACAGCCATGTATTG-- + TCGGCGCTGTATTG-- >AB - ACATCCCTGTA-TG-- + ACATCGCTGTA-TG-- >CD - CCGGCCATGTATTG-- + TCGGCGGTGTATTG-- "#}, &NUC_ALPHABET, )? @@ -570,16 +563,10 @@ mod tests { let aln = read_many_fasta_str( indoc! {r#" - >root - ACAGCCATGTATTG-- - >AB - ACATCCCTGTA-TG-- >A ACATCGCCNNA--GAC >B GCATCCCTGTA-NG-- - >CD - CCGGCCATGTATTG-- >C CCGGCGATGTRTTG-- >D diff --git a/test_scripts/ancestral_sparse.py b/test_scripts/ancestral_sparse.py index bac755d2..db069dd3 100644 --- a/test_scripts/ancestral_sparse.py +++ b/test_scripts/ancestral_sparse.py @@ -224,11 +224,8 @@ def ancestral_sparse(graph: Graph) -> float: return logLH def tests(): - aln = {"root":"ACAGCCATGTATTG--", - "AB":"ACATCCCTGTA-TG--", - "A":"ACATCGCCNNA--GAC", + aln = {"A":"ACATCGCCNNA--GAC", "B":"GCATCCCTGTA-NG--", - "CD":"CCGGCCATGTATTG--", "C":"CCGGCGATGTRTTG--", "D":"TCGGCCGTGTRTTG--"}