Merge pull request #58 from rmnldwg/release-1.0.0.a3

rmnldwg · web-flow · commit d905b687294e · 2023-12-06T12:17:14.000+01:00
Release 1.0.0.a3
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,30 @@
 All notable changes to this project will be documented in this file.
 
 
+<a name="1.0.0.a3"></a>
+## [1.0.0.a3] - 2023-12-06
+
+Fourth alpha release. [@YoelPH](https://github.com/YoelPH) noticed some more bugs that have been fixed now. Most notably, the risk prediction raised exceptions, because of a missing transponed matrix `.T`.
+
+### Bug Fixes
+
+- Raise `ValueError` if diagnose time parameters are invalid (Fixes [#53])
+- Use names of LNLs in unilateral `comp_encoding()` (Fixes [#56])
+- Wrong shape in unilateral posterior computation (missing `.T`) (Fixes [#57])
+- Wrong shape in bilateral joint posterior computation (missing `.T`) (Fixes [#57])
+
+### Documentation
+
+- Add info on diagnose time distribution's `ValueError`
+
+### Testing
+
+- `ValueError` raised in diagnose time distribution's `set_params`
+- Check `comp_encoding_diagnoses()` for shape and dtype
+- Test unilateral posterior state distribution for shape and sum
+- Test bilateral posterior joint state distribution for shape and sum
+
+
 <a name="1.0.0.a2"></a>
 ## [1.0.0.a2] - 2023-09-15
 
@@ -160,7 +184,8 @@ Almost the entire API has changed. I'd therefore recommend to have a look at the
 - add pre-commit hook to check commit msg
 
 
-[Unreleased]: https://github.com/rmnldwg/lymph/compare/1.0.0.a2...HEAD
+[Unreleased]: https://github.com/rmnldwg/lymph/compare/1.0.0.a3...HEAD
+[1.0.0.a3]: https://github.com/rmnldwg/lymph/compare/1.0.0.a2...1.0.0.a3
 [1.0.0.a2]: https://github.com/rmnldwg/lymph/compare/1.0.0.a1...1.0.0.a2
 [1.0.0.a1]: https://github.com/rmnldwg/lymph/compare/1.0.0.a0...1.0.0.a1
 [1.0.0.a0]: https://github.com/rmnldwg/lymph/compare/0.4.3...1.0.0.a0
@@ -169,6 +194,9 @@ Almost the entire API has changed. I'd therefore recommend to have a look at the
 [0.4.1]: https://github.com/rmnldwg/lymph/compare/0.4.0...0.4.1
 [0.4.0]: https://github.com/rmnldwg/lymph/compare/0.3.10...0.4.0
 
+[#57]: https://github.com/rmnldwg/lymph/issues/57
+[#56]: https://github.com/rmnldwg/lymph/issues/56
+[#53]: https://github.com/rmnldwg/lymph/issues/53
 [#46]: https://github.com/rmnldwg/lymph/issues/46
 [#45]: https://github.com/rmnldwg/lymph/issues/45
 [#41]: https://github.com/rmnldwg/lymph/issues/41
diff --git a/docs/source/quickstart_unilateral.ipynb b/docs/source/quickstart_unilateral.ipynb
@@ -322,6 +322,8 @@
     "\n",
     "Here, it's important that the first argument is the support of the probability mass function, i.e., the discrete time-steps from 0 to `max_time`. Also, all parameters must have default values. Otherwise, there would be cases when such a stored distribution cannot be accessed.\n",
     "\n",
+    "Lastly, if some parameters have bounds, like e.g. the binomial distribution, they should raise a `ValueError`. This exception is propagated upwards but causes the `likelihood` method to simply return `-np.inf`. That way it will be seamlessly rejected during an MCMC sampling round.\n",
+    "\n",
     "Let's look at a concrete, binomial example:"
    ]
   },
@@ -336,6 +338,7 @@
     "def binom_pmf(k: np.ndarray, n: int, p: float):\n",
     "    \"\"\"Binomial PMF\"\"\"\n",
     "    if p > 1. or p < 0.:\n",
+    "        # This value error is important to enable seamless sampling!\n",
     "        raise ValueError(\"Binomial prob must be btw. 0 and 1\")\n",
     "    q = (1. - p)\n",
     "    binom_coeff = factorial(n) / (factorial(k) * factorial(n - k))\n",
diff --git a/lymph/diagnose_times.py b/lymph/diagnose_times.py
@@ -44,7 +44,9 @@ def __init__(
         function must return a list of probabilities for each diagnose time.
 
         Note:
-            All arguments except ``support`` must have default values.
+            All arguments except ``support`` must have default values and if some
+            parameters have bounds (like the binomial distribution's ``p``), the
+            function must raise a ``ValueError`` if the parameter is invalid.
 
         Since ``max_time`` specifies the support of the distribution (rangin from 0 to
         ``max_time``), it must be provided if a parametrized function is passed. If a
@@ -180,12 +182,29 @@ def get_params(
 
 
     def set_params(self, **kwargs) -> None:
-        """Update distribution by setting its parameters and storing the frozen PMF."""
+        """Update distribution by setting its parameters and storing the frozen PMF.
+
+        To work during inference using e.g. MCMC sampling, it needs to throw a
+        ``ValueError`` if the parameters are invalid. To this end, it expects the
+        underlying function to raise a ``ValueError`` if one of the parameters is
+        invalid. If the parameters are valid, the frozen PMF is stored and can be
+        retrieved via the :py:meth:`distribution` property.
+        """
         params_to_set = set(kwargs.keys()).intersection(self._kwargs.keys())
         if self.is_updateable:
-            if hasattr(self, "_frozen"):
-                del self._frozen
-            self._kwargs.update({p: kwargs[p] for p in params_to_set})
+            new_kwargs = self._kwargs.copy()
+            new_kwargs.update({p: kwargs[p] for p in params_to_set})
+
+            try:
+                self._frozen = self.normalize(
+                    self._func(self.support, **new_kwargs)
+                )
+            except ValueError as val_err:
+                raise ValueError(
+                    "Invalid parameter(s) provided to distribution over diagnose times"
+                ) from val_err
+
+            self._kwargs = new_kwargs
         else:
             warnings.warn("Distribution is not updateable, skipping...")
 
diff --git a/lymph/models/bilateral.py b/lymph/models/bilateral.py
@@ -519,7 +519,7 @@ def comp_posterior_joint_state_dist(
             )
             observation_matrix = getattr(self, side).observation_matrix
             # vector with P(Z=z|X) for each state X. A data matrix for one "patient"
-            diagnose_given_state[side] = diagnose_encoding @ observation_matrix
+            diagnose_given_state[side] = diagnose_encoding @ observation_matrix.T
 
         joint_state_dist = self.comp_joint_state_dist(t_stage=t_stage, mode=mode)
         # matrix with P(Zi=zi,Zc=zc|Xi,Xc) * P(Xi,Xc) for all states Xi,Xc.
diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py
@@ -819,7 +819,7 @@ def comp_diagnose_encoding(
             diagnose_encoding = np.kron(
                 diagnose_encoding,
                 matrix.compute_encoding(
-                    lnls=[lnl.name for lnl in self.graph.lnls],
+                    lnls=self.graph.lnls.keys(),
                     pattern=given_diagnoses.get(modality, {}),
                 ),
             )
@@ -873,7 +873,7 @@ def comp_posterior_state_dist(
 
         diagnose_encoding = self.comp_diagnose_encoding(given_diagnoses)
         # vector containing P(Z=z|X). Essentially a data matrix for one patient
-        diagnose_given_state = diagnose_encoding @ self.observation_matrix
+        diagnose_given_state = diagnose_encoding @ self.observation_matrix.T
 
         # vector P(X=x) of probabilities of arriving in state x (marginalized over time)
         state_dist = self.comp_state_dist(t_stage, mode=mode)
diff --git a/tests/binary_bilateral_test.py b/tests/binary_bilateral_test.py
@@ -122,3 +122,36 @@ def test_compute_likelihood_twice(self):
         first_llh = self.model.likelihood(log=True)
         second_llh = self.model.likelihood(log=True)
         self.assertEqual(first_llh, second_llh)
+
+
+class RiskTestCase(fixtures.BilateralModelMixin, unittest.TestCase):
+    """Check that the risk is computed correctly."""
+
+    def setUp(self):
+        super().setUp()
+        self.model.modalities = fixtures.MODALITIES
+
+    def create_random_diagnoses(self):
+        """Create a random diagnosis for each modality and LNL."""
+        diagnoses = {}
+
+        for modality in self.model.modalities:
+            diagnoses[modality] = {}
+            for lnl in self.model.ipsi.graph.lnls.keys():
+                diagnoses[modality][lnl] = self.rng.choice([True, False, None])
+
+        return diagnoses
+
+    def test_posterior_state_dist(self):
+        """Test that the posterior state distribution is computed correctly."""
+        num_states = len(self.model.ipsi.state_list)
+        random_parameters = self.create_random_params()
+        random_diagnoses = self.create_random_diagnoses()
+
+        posterior = self.model.comp_posterior_joint_state_dist(
+            given_param_kwargs=random_parameters,
+            given_diagnoses=random_diagnoses,
+        )
+        self.assertEqual(posterior.shape, (num_states, num_states))
+        self.assertEqual(posterior.dtype, float)
+        self.assertTrue(np.isclose(posterior.sum(), 1.))
diff --git a/tests/binary_unilateral_test.py b/tests/binary_unilateral_test.py
@@ -317,3 +317,44 @@ def test_likelihood_invalid_params_isinf(self):
             mode="HMM",
         )
         self.assertEqual(likelihood, -np.inf)
+
+
+class RiskTestCase(fixtures.BinaryUnilateralModelMixin, unittest.TestCase):
+    """Test anything related to the risk computation."""
+
+    def setUp(self):
+        """Load params."""
+        super().setUp()
+        self.model.modalities = fixtures.MODALITIES
+        self.init_diag_time_dists(early="frozen", late="parametric")
+        self.model.assign_params(**self.create_random_params())
+
+    def create_random_diagnoses(self):
+        """Create a random diagnosis for each modality and LNL."""
+        self.diagnoses = {}
+
+        for modality in self.model.modalities:
+            self.diagnoses[modality] = {}
+            for lnl in self.model.graph.lnls.keys():
+                self.diagnoses[modality][lnl] = self.rng.choice([True, False, None])
+
+    def test_comp_diagnose_encoding(self):
+        """Check computation of one-hot encoding of diagnoses."""
+        self.create_random_diagnoses()
+        num_lnls, num_mods = len(self.model.graph.lnls), len(self.model.modalities)
+        num_posible_diagnoses = 2**(num_lnls * num_mods)
+
+        diagnose_encoding = self.model.comp_diagnose_encoding(self.diagnoses)
+        self.assertEqual(diagnose_encoding.shape, (num_posible_diagnoses,))
+        self.assertEqual(diagnose_encoding.dtype, bool)
+
+    def test_posterior_state_dist(self):
+        """Make sure the posterior state dist is correctly computed."""
+        posterior_state_dist = self.model.comp_posterior_state_dist(
+            given_param_kwargs=self.create_random_params(),
+            given_diagnoses=self.create_random_diagnoses(),
+            t_stage=self.rng.choice(["early", "late"]),
+        )
+        self.assertEqual(posterior_state_dist.shape, (2**len(self.model.graph.lnls),))
+        self.assertEqual(posterior_state_dist.dtype, float)
+        self.assertTrue(np.isclose(np.sum(posterior_state_dist), 1.))
diff --git a/tests/distribution_test.py b/tests/distribution_test.py
@@ -11,10 +11,27 @@
 class FixtureMixin:
     """Mixin that provides fixtures for the tests."""
 
+    @staticmethod
+    def binom_pmf(
+        support: np.ndarray,
+        max_time: int = 10,
+        p: float = 0.5,
+    ) -> np.ndarray:
+        """Binomial probability mass function."""
+        if max_time <= 0:
+            raise ValueError("max_time must be a positive integer.")
+        if len(support) != max_time + 1:
+            raise ValueError("support must have length max_time + 1.")
+        if not 0. <= p <= 1.:
+            raise ValueError("p must be between 0 and 1.")
+
+        return sp.stats.binom.pmf(support, max_time, p)
+
+
     def setUp(self):
         self.max_time = 10
         self.array_arg = np.random.uniform(size=self.max_time + 1, low=0., high=10.)
-        self.func_arg = lambda support, p=0.5: sp.stats.binom.pmf(support, self.max_time, p)
+        self.func_arg = lambda support, p=0.5: self.binom_pmf(support, self.max_time, p)
 
 
 class DistributionTestCase(FixtureMixin, unittest.TestCase):
@@ -58,6 +75,12 @@ def test_updateable_distribution_with_max_time(self):
         self.assertTrue(len(dist.distribution) == self.max_time + 1)
         self.assertTrue(np.allclose(sum(dist.distribution), 1.))
 
+    def test_updateable_distribution_raises_value_error(self):
+        """Check that an invalid parameter raises a ValueError."""
+        dist = Distribution(self.func_arg, max_time=self.max_time)
+        self.assertTrue(dist.is_updateable)
+        self.assertRaises(ValueError, dist.set_params, p=1.5)
+
 
 class DistributionDictTestCase(FixtureMixin, unittest.TestCase):
     """Test the distribution dictionary."""

Original file line number	Diff line number	Diff line change
`@@ -519,7 +519,7 @@ def comp_posterior_joint_state_dist(`
`519`	`519`	`)`
`520`	`520`	`observation_matrix = getattr(self, side).observation_matrix`
`521`	`521`	`# vector with P(Z=z\|X) for each state X. A data matrix for one "patient"`
`522`		`- diagnose_given_state[side] = diagnose_encoding @ observation_matrix`
	`522`	`+ diagnose_given_state[side] = diagnose_encoding @ observation_matrix.T`
`523`	`523`
`524`	`524`	`joint_state_dist = self.comp_joint_state_dist(t_stage=t_stage, mode=mode)`
`525`	`525`	`# matrix with P(Zi=zi,Zc=zc\|Xi,Xc) * P(Xi,Xc) for all states Xi,Xc.`