refactor(forms): rewrite structure / jexl evaluator

The new structure / jexl evaluator works a bit differently: Instead of trying to replace evaluation contexts during recursive evaluation (for example is_hidden checks), we now have a local JEXL runtime for each field. Also, the JEXL expressions (or their results, rather) are heavily cached and should speed things up significantly. Note this is WIP, many tests are still failing, but many are already succeeding as well. We're trying to keep the test cases 100% unchanged - the only modifications currently are some improved assertion messages, so debugging becomes easier.
projectcaluma · Feb 13, 2025 · d64cbb8 · d64cbb8
1 parent cb4e2f3
commit d64cbb8
Show file tree

Hide file tree

Showing 13 changed files with 1,372 additions and 408 deletions.
diff --git a/caluma/caluma_core/jexl.py b/caluma/caluma_core/jexl.py
@@ -167,6 +167,11 @@ def _length_transform(self, value, *options):
             return None
 
     def evaluate(self, expression, context=None):
+        # log.info(
+        #    "JEXL: evaluating expression <<< %s >>> in context: %s",
+        #    str(expression),
+        #    str(dict(context)),
+        # )
         self._expr_stack.append(expression)
         try:
             return super().evaluate(expression, context)

diff --git a/caluma/caluma_form/domain_logic.py b/caluma/caluma_form/domain_logic.py
@@ -1,4 +1,5 @@
 from graphlib import TopologicalSorter
+from logging import getLogger
 from typing import Optional
 
 from django.db import transaction
@@ -7,11 +8,13 @@
 
 from caluma.caluma_core.models import BaseModel
 from caluma.caluma_core.relay import extract_global_id
-from caluma.caluma_form import models, structure, utils, validators
+from caluma.caluma_form import models, structure2, utils, validators
 from caluma.caluma_form.utils import update_or_create_calc_answer
 from caluma.caluma_user.models import BaseUser
 from caluma.utils import update_model
 
+log = getLogger(__name__)
+
 
 class BaseLogic:
     @staticmethod
@@ -153,17 +156,47 @@ def post_save(answer: models.Answer) -> models.Answer:
         return answer
 
     @staticmethod
-    def update_calc_dependents(answer):
-        if not answer.question.calc_dependents:
+    def update_calc_dependents(answer, update_info):
+        is_table = update_info and answer.question.type == models.Question.TYPE_TABLE
+        if not is_table and not answer.question.calc_dependents:
             return
+        log.debug("update_calc_dependents(%s)", answer)
 
         root_doc = utils.prefetch_document(answer.document.family_id)
-        struc = structure.FieldSet(root_doc, root_doc.form)
+        struc = structure2.FieldSet(root_doc)
+
+        field = struc.find_field_by_answer(answer)
+        if is_table:
+            if not field:
+                breakpoint()
+            # Find all children, and if any are of type calc (and are in the "created"
+            # update info bit), recalculate them.
+            for child in field.get_all_fields():
+                if (
+                    child.question.type == models.Question.TYPE_CALCULATED_FLOAT
+                    and child.parent._document.pk in update_info["created"]
+                ):
+                    update_or_create_calc_answer(
+                        child.question, child.parent._document, struc, field=child
+                    )
+
+        # TODO: if we're in a table row, how do we know that a dependant is
+        # *only* inside the table and therefore only *our* row needs recalculating?
 
-        for question in models.Question.objects.filter(
-            pk__in=answer.question.calc_dependents
-        ):
-            update_or_create_calc_answer(question, root_doc, struc)
+        for dep_slug in field.question.calc_dependents or []:
+            # ... maybe this is enough? Cause this will find the closest "match",
+            # going only to the outer context from a table if the field is not found
+            # inside of it
+            dep_field = field.get_field(dep_slug)
+
+            log.debug(
+                "update_calc_dependents(%s): updating question %s",
+                answer,
+                dep_field.question.pk,
+            )
+            update_or_create_calc_answer(
+                dep_field.question, dep_field.parent._document, struc, field=dep_field
+            )
 
     @classmethod
     @transaction.atomic
@@ -179,10 +212,11 @@ def create(
         if validated_data["question"].type == models.Question.TYPE_FILES:
             cls.update_answer_files(answer, files)
 
+        update_info = None
         if answer.question.type == models.Question.TYPE_TABLE:
-            answer.create_answer_documents(documents)
+            update_info = answer.create_answer_documents(documents)
 
-        cls.update_calc_dependents(answer)
+        cls.update_calc_dependents(answer, update_info)
 
         return answer
 
@@ -198,10 +232,11 @@ def update(cls, answer, validated_data, user: Optional[BaseUser] = None):
 
         BaseLogic.update(answer, validated_data, user)
 
+        update_info = None
         if answer.question.type == models.Question.TYPE_TABLE:
-            answer.create_answer_documents(documents)
+            update_info = answer.create_answer_documents(documents)
 
-        cls.update_calc_dependents(answer)
+        cls.update_calc_dependents(answer, update_info)
 
         answer.refresh_from_db()
         return answer
@@ -307,9 +342,11 @@ def _initialize_calculated_answers(document):
         sort them topoligically, and then update their answer.
         """
         root_doc = utils.prefetch_document(document.family_id)
-        struc = structure.FieldSet(root_doc, root_doc.form)
+        struc = structure2.FieldSet(root_doc)
 
         calculated_questions = (
+            # TODO we could fetch those as fields from the structure. Minus a DB query,
+            # and we'd already have the fields as well
             models.Form.get_all_questions([(document.family or document).form_id])
             .filter(type=models.Question.TYPE_CALCULATED_FLOAT)
             .values("slug", "calc_dependents")
@@ -328,9 +365,14 @@ def _initialize_calculated_answers(document):
         _questions = models.Question.objects.in_bulk(sorted_question_slugs)
         for slug in sorted_question_slugs:
             print("question", slug)
-            update_or_create_calc_answer(
-                _questions[slug], document, struc, update_dependents=False
-            )
+            for field in struc.find_all_fields_by_slug(slug):
+                update_or_create_calc_answer(
+                    _questions[slug],
+                    field.parent._document,
+                    struc,
+                    update_dependents=False,
+                    field=field,
+                )
 
         return document
 

diff --git a/caluma/caluma_form/jexl2.py b/caluma/caluma_form/jexl2.py
@@ -0,0 +1,122 @@
+import weakref
+from collections import ChainMap
+from functools import partial
+
+from pyjexl.analysis import ValidatingAnalyzer
+
+from caluma.caluma_form.jexl import QuestionMissing
+
+from ..caluma_core.jexl import (
+    JEXL,
+    ExtractTransformArgumentAnalyzer,
+    ExtractTransformSubjectAnalyzer,
+    ExtractTransformSubjectAndArgumentsAnalyzer,
+)
+from .structure import Field
+
+"""
+Rewrite of the JEXL handling code.
+
+Design principles:
+
+* The JEXL classes do not deal with context switching between questions anymore
+* The QuestionJexl class only sets up the "runtime", any context is used from the
+  structure2 code
+* We only deal with the *evaluation*, no transform/extraction is happening here - that code
+  is mostly fine and doesn't need a rewrite
+* Caching is done by the structure2 code, not here
+* JEXL evaluation happens lazily, but the results are cached.
+"""
+
+
+class QuestionValidatingAnalyzer(ValidatingAnalyzer):
+    def visit_Transform(self, transform):
+        if transform.name == "answer" and not isinstance(transform.subject.value, str):
+            yield f"{transform.subject.value} is not a valid question slug."
+
+        yield from super().visit_Transform(transform)
+
+
+class QuestionJexl2(JEXL):
+    def __init__(self, field, **kwargs):
+        super().__init__(**kwargs)
+        self.field = weakref.proxy(field)
+
+        self.add_transform("answer", self.answer_transform)
+
+    def answer_transform(self, question_slug, *args):
+        field = self.field.get_field(question_slug)
+
+        def _default_or_empty():
+            if len(args):
+                return args[0]
+            return field.question.empty_value()
+
+        if not field:
+            if args:
+                return args[0]
+            else:
+                # No default arg, so we must raise an exception
+                raise QuestionMissing(
+                    f"Question `{question_slug}` could not be found in form {self.field.get_form()}"
+                )
+
+        if field.is_hidden():
+            # Hidden fields *always* return the empty value, even if we have
+            # a default
+            return field.question.empty_value()
+        elif field.is_empty():
+            # not hidden, but empty
+            return _default_or_empty()
+
+        return field.get_value()
+
+    def validate(self, expression, **kwargs):
+        return super().validate(expression, QuestionValidatingAnalyzer)
+
+    def extract_referenced_questions(self, expr):
+        transforms = ["answer"]
+        yield from self.analyze(
+            expr, partial(ExtractTransformSubjectAnalyzer, transforms=transforms)
+        )
+
+    def extract_referenced_questions_with_arguments(self, expr):
+        transforms = ["answer"]
+        yield from self.analyze(
+            expr,
+            partial(ExtractTransformSubjectAndArgumentsAnalyzer, transforms=transforms),
+        )
+
+    def extract_referenced_mapby_questions(self, expr):
+        transforms = ["mapby"]
+        yield from self.analyze(
+            expr, partial(ExtractTransformArgumentAnalyzer, transforms=transforms)
+        )
+
+    def _get_referenced_fields(self, field: Field, expr: str):
+        deps = list(self.extract_referenced_questions_with_arguments(expr))
+        referenced_fields = [self._structure.get_field(slug) for slug, _ in deps]
+
+        referenced_slugs = [ref.question.slug for ref in referenced_fields if ref]
+
+        for slug, args in deps:
+            required = len(args) == 0
+            if slug not in referenced_slugs and required:
+                raise QuestionMissing(
+                    f"Question `{slug}` could not be found in form {field.form}"
+                )
+
+        return [field for field in referenced_fields if field]
+
+    def evaluate(self, expr, raise_on_error=True):
+        try:
+            return super().evaluate(expr, ChainMap(self.context))
+        except (
+            TypeError,
+            ValueError,
+            ZeroDivisionError,
+            AttributeError,
+        ):
+            if raise_on_error:
+                raise
+            return None
diff --git a/caluma/caluma_form/models.py b/caluma/caluma_form/models.py
@@ -518,8 +518,14 @@ def delete(self, *args, **kwargs):
         super().delete(args, kwargs)
 
     def create_answer_documents(self, documents):
+        """Create AnswerDocuments for this table question, and attach them.
+
+        Return a dict with two keys: "created", and "updated", each
+        containing a list of document IDs that were either created or kept.
+        """
         family = getattr(self.document, "family", None)
         document_ids = [document.pk for document in documents]
+        res = {"updated": [], "created": []}
 
         for sort, document_id in enumerate(reversed(document_ids), start=1):
             ans_doc, created = AnswerDocument.objects.get_or_create(
@@ -532,6 +538,10 @@ def create_answer_documents(self, documents):
                 # Already-existing documents are already in the family,
                 # so we're updating only the newly attached rows
                 ans_doc.document.set_family(family)
+                res["created"].append(document_id)
+            else:
+                res["updated"].append(document_id)
+        return res
 
     def unlink_unused_rows(self, docs_to_keep):
         existing = AnswerDocument.objects.filter(answer=self).exclude(