From 81d0bb9203af87e159ab0572214c0ded1d27ff86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Srokosz?= <pawel.srokosz@cert.pl>
Date: Tue, 12 Mar 2024 12:00:09 +0100
Subject: [PATCH] Fix negated filters logic: non-boolean AND/OR (#247)

---
 docs/requirements.txt          |   2 +-
 docs/task_headers_payloads.rst |  12 ++++
 karton/core/task.py            |  71 +++++++++++++++-------
 tests/test_task_filters.py     | 105 +++++++++++++++++++++++++++++++++
 4 files changed, 167 insertions(+), 23 deletions(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index c73e84ef..a9962384 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,4 +1,4 @@
-Sphinx==4.5.0
+Sphinx==5.3.0
 sphinx-autodoc-annotation==1.0.post1
 sphinx-autodoc-typehints==1.10.3
 sphinx-rtd-theme==0.5.0
diff --git a/docs/task_headers_payloads.rst b/docs/task_headers_payloads.rst
index 466bed36..38fbe862 100644
--- a/docs/task_headers_payloads.rst
+++ b/docs/task_headers_payloads.rst
@@ -107,6 +107,18 @@ Filter logic can be used to fulfill specific use-cases:
 ``[{"foo": "!*"}]``                   'foo' header must be not defined.
 ====================================  ==============================================================================
 
+Excluding (negated) filters come with specific corner-cases. Regular filters require specific value to be defined in header, while
+negated filters are accepting all possible values except specified in filter.
+
+==================================================================================  =============================================================================================================================================
+   ``filters`` value                                                                  Meaning
+----------------------------------------------------------------------------------  ---------------------------------------------------------------------------------------------------------------------------------------------
+``[{"type": "sample", "stage": "!*"}]``                                             matches only tasks that have type 'sample' but no 'stage' key
+``[{"platform": "!linux"}, {"platform": "!windows"}]``                              matches **all** tasks (even with no headers) but not these with platform 'linux' or 'windows'
+``[{"foo": "bar", "platform": "!linux"}, {"foo": "bar", "platform": "!windows"}]``  'foo' header is required and must have 'bar' value, but platform can't be 'linux' or 'windows'
+``[{"foo": "bar", "platform": "!linux"}, {"foo": "baz", "platform": "!windows"}]``  'foo' header is required and must have 'bar' value and no 'linux' in platform key, or foo must be 'baz', but then platform can't be 'windows'
+==================================================================================  =============================================================================================================================================
+
 .. warning::
 
     It's recommended to use only strings in filter and header values
diff --git a/karton/core/task.py b/karton/core/task.py
index 51cba9aa..c8e2dddd 100644
--- a/karton/core/task.py
+++ b/karton/core/task.py
@@ -212,38 +212,65 @@ def matches_filters(self, filters: List[Dict[str, Any]]) -> bool:
         :meta private:
         """
 
-        matches = False
-        for task_filter in filters:
-            matched = []
-            for filter_key, filter_value in task_filter.items():
-                # Coerce to string for comparison
-                header_value = self.headers.get(filter_key)
+        def test_filter(headers: Dict[str, Any], filter: Dict[str, Any]) -> int:
+            """
+            Filter match follows AND logic, but it's non-boolean because filters may be
+            negated (task:!platform).
+
+            Result values are as follows:
+            - 1  - positive match, no mismatched values in headers
+                   (all matched)
+            - 0  - no match, found value that doesn't match to the filter
+                   (some are not matched)
+            - -1 - negative match, found value that matches negated filter value
+                   (all matched but found negative matches)
+            """
+            matches = 1
+            for filter_key, filter_value in filter.items():
+                # Coerce filter value to string
                 filter_value_str = str(filter_value)
-                header_value_str = str(header_value)
-
                 negated = False
                 if filter_value_str.startswith("!"):
                     negated = True
                     filter_value_str = filter_value_str[1:]
 
-                if header_value is None:
-                    matched.append(negated)
-                    continue
+                # If expected key doesn't exist in headers
+                if filter_key not in headers:
+                    # Negated filter ignores non-existent values
+                    if negated:
+                        continue
+                    # But positive filter doesn't
+                    return 0
 
+                # Coerce header value to string
+                header_value_str = str(headers[filter_key])
                 # fnmatch is great for handling simple wildcard patterns (?, *, [abc])
                 match = fnmatch.fnmatchcase(header_value_str, filter_value_str)
-                # if matches, but it's negated then we can return straight away
-                # since no matter the other filters
+                # If matches, but it's negated: it's negative match
                 if match and negated:
-                    return False
-
-                # else, apply a XOR logic to take care of negation matching
-                matched.append(match != negated)
-
-            # set the flag if all consumer filter fields match the task header.
-            # It will be set to True only if at least one filter matches the header
-            matches |= all(matched)
-
+                    matches = -1
+                # If doesn't match but filter is not negated: it's not a match
+                if not match and not negated:
+                    return 0
+            # If there are no mismatched values: filter is matched
+            return matches
+
+        # List of filter matches follow OR logic, but -1 is special
+        # If there is any -1, result is False
+        #   (any matched, but it's negative match)
+        # If there is any 1, but no -1's: result is True
+        #   (any matched, no negative match)
+        # If there are only 0's: result is False
+        #   (none matched)
+        matches = False
+        for task_filter in filters:
+            match_result = test_filter(self.headers, task_filter)
+            if match_result == -1:
+                # Any negative match results in False
+                return False
+            if match_result == 1:
+                # Any positive match but without negative matches results in True
+                matches = True
         return matches
 
     def set_task_parent(self, parent: "Task"):
diff --git a/tests/test_task_filters.py b/tests/test_task_filters.py
index 035b5ae6..2bd8599e 100644
--- a/tests/test_task_filters.py
+++ b/tests/test_task_filters.py
@@ -57,6 +57,14 @@ def test_catch_all_filter(self):
         task_without_headers = Task(headers={})
         self.assertTrue(task_without_headers.matches_filters(filters))
 
+    def test_catch_none_filter(self):
+        filters = []
+        task_with_headers = Task(headers={"foo": "bar", "bar": "baz"})
+        self.assertFalse(task_with_headers.matches_filters(filters))
+
+        task_without_headers = Task(headers={})
+        self.assertFalse(task_without_headers.matches_filters(filters))
+
     def test_negated_filter(self):
         filters = [
             {
@@ -129,6 +137,68 @@ def test_negate_header_existence(self):
         })
         self.assertTrue(task_noplatform.matches_filters(filters))
 
+    def test_negate_header_existence_but_catch_all(self):
+        filters = [
+            {
+                "type": "sample",
+                "platform": "!*"
+            },
+            {}
+        ]
+        task_linux = Task(headers={
+            "type": "sample",
+            "kind": "runnable",
+            "platform": "linux"
+        })
+        self.assertFalse(task_linux.matches_filters(filters))
+
+        task_empty_string = Task(headers={
+            "type": "sample",
+            "kind": "runnable",
+            "platform": ""
+        })
+        self.assertFalse(task_empty_string.matches_filters(filters))
+
+        task_noplatform = Task(headers={
+            "type": "sample",
+            "kind": "runnable",
+        })
+        self.assertTrue(task_noplatform.matches_filters(filters))
+
+        # Platform requirement is defined for type=sample
+        task_different_type = Task(headers={
+            "type": "different",
+            "platform": "hehe",
+        })
+        self.assertTrue(task_different_type.matches_filters(filters))
+
+    def test_require_header_existence(self):
+        filters = [
+            {
+                "type": "sample",
+                "platform": "*"
+            }
+        ]
+        task_linux = Task(headers={
+            "type": "sample",
+            "kind": "runnable",
+            "platform": "linux"
+        })
+        self.assertTrue(task_linux.matches_filters(filters))
+
+        task_empty_string = Task(headers={
+            "type": "sample",
+            "kind": "runnable",
+            "platform": ""
+        })
+        self.assertTrue(task_empty_string.matches_filters(filters))
+
+        task_noplatform = Task(headers={
+            "type": "sample",
+            "kind": "runnable",
+        })
+        self.assertFalse(task_noplatform.matches_filters(filters))
+
     def test_non_string_headers(self):
         # It's not recommended but was allowed by earlier versions
         filters = [
@@ -156,3 +226,38 @@ def test_non_string_headers(self):
         })
         self.assertFalse(task_different_value.matches_filters(filters))
 
+    def test_negated_filter_for_different_type(self):
+        filters = [
+            {
+                "type": "sample",
+                "platform": "win32"
+            },
+            {
+                "type": "different",
+                "platform": "!win32"
+            }
+        ]
+
+        task_sample = Task(headers={
+            "type": "sample",
+            "platform": "win32"
+        })
+        self.assertTrue(task_sample.matches_filters(filters))
+
+        task_different_win32 = Task(headers={
+            "type": "different",
+            "platform": "win32"
+        })
+        self.assertFalse(task_different_win32.matches_filters(filters))
+
+        task_different_win64 = Task(headers={
+            "type": "different",
+            "platform": "win64"
+        })
+        self.assertTrue(task_different_win64.matches_filters(filters))
+
+        task_sample_win64 = Task(headers={
+            "type": "sample",
+            "platform": "win64"
+        })
+        self.assertFalse(task_sample_win64.matches_filters(filters))