From 2d22bda4477bb4c0a01a096f1a0a76e679d45c20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Paul=20M=C3=BCller?= <dev@craban.de>
Date: Fri, 10 Nov 2023 09:28:25 +0100
Subject: [PATCH] fix: bad data caused TypeError in numpy.polynomial (close
 #25)

---
 CHANGELOG             |  1 +
 nanite/preproc.py     |  7 +++++--
 tests/test_preproc.py | 31 +++++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index f2cb5c3..eadc30f 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,7 @@
  - BREAKING CHANGE: Due to the new imputation of nan-valued features,
    rating inference might change slightly. For the "zef18" dataset,
    inference was off by about 0.1 on average.
+ - fix: bad data caused TypeError in numpy.polynomial (#25)
  - enh: impute nan-valued feature data if corresponding response was 0
  - enh: allow empty-valued groups in rating HDF5 file
  - enh: when encountering inf values in a training set, replace them
diff --git a/nanite/preproc.py b/nanite/preproc.py
index 451787e..383734f 100644
--- a/nanite/preproc.py
+++ b/nanite/preproc.py
@@ -143,7 +143,9 @@ def find_turning_point(tip_position, force, contact_point_index):
     # Flip and normalize tip position so that maximum is at minimum
     # z-position (set to 1) which coincides with maximum indentation.
     x -= x[idp]
-    x /= x.min()
+    xmin = x.min()
+    if xmin != 0:
+        x /= x.min()
     x[x < 0] = 0
 
     # Flip and normalize force so that maximum force is set to 1.
@@ -336,7 +338,7 @@ def preproc_correct_force_slope(apret, region="baseline", strategy="shift",
     force = apret["force"]
 
     # Get the current contact point position computed by "correct_tip_offset".
-    idp = np.argmin(np.abs(tip_position))
+    idp = max(2, np.argmin(np.abs(tip_position)))
     # Determine whether we want to do temporal or spatial correction:
     # Fit a linear slope to the baseline part (all data up until idp)
     mod = lmfit.models.LinearModel()
@@ -362,6 +364,7 @@ def preproc_correct_force_slope(apret, region="baseline", strategy="shift",
         idturn = find_turning_point(tip_position=tip_position,
                                     force=force_edit,
                                     contact_point_index=idp)
+        idturn = max(2, idturn)
         # Extend the best fit towards the turning point.
         best_fit_approach = mod.eval(out.params, x=abscissa[:idturn])
         force_edit[:idturn] -= best_fit_approach - best_fit_approach[-1]
diff --git a/tests/test_preproc.py b/tests/test_preproc.py
index aedb1b5..a885536 100644
--- a/tests/test_preproc.py
+++ b/tests/test_preproc.py
@@ -78,6 +78,37 @@ def test_get_steps_required():
     assert req_act == req_exp
 
 
+def test_preproc_correct_force_slope_bad_monotonic_data_issue_25():
+    fd = IndentationGroup(
+        data_path
+        / "fmt-jpk-fd_single_tilted-baseline-drift-"
+          "mitotic_2021-01-29.jpk-force")[0]
+    # put a custom, bad tip position (this requires some hacking)
+    s = len(fd["force"])
+    raw_data = {}
+    for col in fd.columns_innate:
+        raw_data[col] = fd[col]
+    tippos = np.linspace(0, 5e-9, s)
+    tippos = np.roll(tippos, s//2)
+    tippos[:s//2] = 0
+    raw_data["tip position"] = tippos
+    fd._raw_data = raw_data
+    # sanity check
+    assert np.all(fd["tip position"] == tippos)
+    assert "tip position" in fd.columns_innate
+    # This caused "TypeError: expected non-empty vector for x" in
+    # np.polynomial in nanite 3.7.3.
+    fd.apply_preprocessing(
+        ["compute_tip_position", "correct_tip_offset", "correct_force_slope",
+         "correct_force_offset"],
+        options={
+            "correct_tip_offset": {"method": "deviation_from_baseline"},
+            "correct_force_slope": {"region": "approach",
+                                    "strategy": "drift"},
+        },
+        ret_details=True)
+
+
 def test_preproc_correct_force_slope_drift_approach():
     fd = IndentationGroup(
         data_path