Skip to content

Commit

Permalink
refactor(forms): rewrite structure / jexl evaluator
Browse files Browse the repository at this point in the history
The new structure / jexl evaluator works a bit differently: Instead of
trying to replace evaluation contexts during recursive evaluation (for
example is_hidden checks), we now have a local JEXL runtime for each
field. Also, the JEXL expressions (or their results, rather) are heavily
cached and should speed things up significantly.

Note this is WIP, many tests are still failing, but many are already
succeeding as well.

We're trying to keep the test cases 100% unchanged - the only
modifications currently are some improved assertion messages, so
debugging becomes easier.
  • Loading branch information
winged authored and Your Name committed Feb 13, 2025
1 parent cb4e2f3 commit d64cbb8
Show file tree
Hide file tree
Showing 13 changed files with 1,372 additions and 408 deletions.
5 changes: 5 additions & 0 deletions caluma/caluma_core/jexl.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,11 @@ def _length_transform(self, value, *options):
return None

def evaluate(self, expression, context=None):
# log.info(
# "JEXL: evaluating expression <<< %s >>> in context: %s",
# str(expression),
# str(dict(context)),
# )
self._expr_stack.append(expression)
try:
return super().evaluate(expression, context)
Expand Down
74 changes: 58 additions & 16 deletions caluma/caluma_form/domain_logic.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from graphlib import TopologicalSorter
from logging import getLogger
from typing import Optional

from django.db import transaction
Expand All @@ -7,11 +8,13 @@

from caluma.caluma_core.models import BaseModel
from caluma.caluma_core.relay import extract_global_id
from caluma.caluma_form import models, structure, utils, validators
from caluma.caluma_form import models, structure2, utils, validators
from caluma.caluma_form.utils import update_or_create_calc_answer
from caluma.caluma_user.models import BaseUser
from caluma.utils import update_model

log = getLogger(__name__)


class BaseLogic:
@staticmethod
Expand Down Expand Up @@ -153,17 +156,47 @@ def post_save(answer: models.Answer) -> models.Answer:
return answer

@staticmethod
def update_calc_dependents(answer):
if not answer.question.calc_dependents:
def update_calc_dependents(answer, update_info):
is_table = update_info and answer.question.type == models.Question.TYPE_TABLE
if not is_table and not answer.question.calc_dependents:
return
log.debug("update_calc_dependents(%s)", answer)

root_doc = utils.prefetch_document(answer.document.family_id)
struc = structure.FieldSet(root_doc, root_doc.form)
struc = structure2.FieldSet(root_doc)

field = struc.find_field_by_answer(answer)
if is_table:
if not field:
breakpoint()
# Find all children, and if any are of type calc (and are in the "created"
# update info bit), recalculate them.
for child in field.get_all_fields():
if (
child.question.type == models.Question.TYPE_CALCULATED_FLOAT
and child.parent._document.pk in update_info["created"]
):
update_or_create_calc_answer(
child.question, child.parent._document, struc, field=child
)

# TODO: if we're in a table row, how do we know that a dependant is
# *only* inside the table and therefore only *our* row needs recalculating?

for question in models.Question.objects.filter(
pk__in=answer.question.calc_dependents
):
update_or_create_calc_answer(question, root_doc, struc)
for dep_slug in field.question.calc_dependents or []:
# ... maybe this is enough? Cause this will find the closest "match",
# going only to the outer context from a table if the field is not found
# inside of it
dep_field = field.get_field(dep_slug)

log.debug(
"update_calc_dependents(%s): updating question %s",
answer,
dep_field.question.pk,
)
update_or_create_calc_answer(
dep_field.question, dep_field.parent._document, struc, field=dep_field
)

@classmethod
@transaction.atomic
Expand All @@ -179,10 +212,11 @@ def create(
if validated_data["question"].type == models.Question.TYPE_FILES:
cls.update_answer_files(answer, files)

update_info = None
if answer.question.type == models.Question.TYPE_TABLE:
answer.create_answer_documents(documents)
update_info = answer.create_answer_documents(documents)

cls.update_calc_dependents(answer)
cls.update_calc_dependents(answer, update_info)

return answer

Expand All @@ -198,10 +232,11 @@ def update(cls, answer, validated_data, user: Optional[BaseUser] = None):

BaseLogic.update(answer, validated_data, user)

update_info = None
if answer.question.type == models.Question.TYPE_TABLE:
answer.create_answer_documents(documents)
update_info = answer.create_answer_documents(documents)

cls.update_calc_dependents(answer)
cls.update_calc_dependents(answer, update_info)

answer.refresh_from_db()
return answer
Expand Down Expand Up @@ -307,9 +342,11 @@ def _initialize_calculated_answers(document):
sort them topoligically, and then update their answer.
"""
root_doc = utils.prefetch_document(document.family_id)
struc = structure.FieldSet(root_doc, root_doc.form)
struc = structure2.FieldSet(root_doc)

calculated_questions = (
# TODO we could fetch those as fields from the structure. Minus a DB query,
# and we'd already have the fields as well
models.Form.get_all_questions([(document.family or document).form_id])
.filter(type=models.Question.TYPE_CALCULATED_FLOAT)
.values("slug", "calc_dependents")
Expand All @@ -328,9 +365,14 @@ def _initialize_calculated_answers(document):
_questions = models.Question.objects.in_bulk(sorted_question_slugs)
for slug in sorted_question_slugs:
print("question", slug)
update_or_create_calc_answer(
_questions[slug], document, struc, update_dependents=False
)
for field in struc.find_all_fields_by_slug(slug):
update_or_create_calc_answer(
_questions[slug],
field.parent._document,
struc,
update_dependents=False,
field=field,
)

return document

Expand Down
122 changes: 122 additions & 0 deletions caluma/caluma_form/jexl2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import weakref
from collections import ChainMap
from functools import partial

from pyjexl.analysis import ValidatingAnalyzer

from caluma.caluma_form.jexl import QuestionMissing

from ..caluma_core.jexl import (
JEXL,
ExtractTransformArgumentAnalyzer,
ExtractTransformSubjectAnalyzer,
ExtractTransformSubjectAndArgumentsAnalyzer,
)
from .structure import Field

"""
Rewrite of the JEXL handling code.
Design principles:
* The JEXL classes do not deal with context switching between questions anymore
* The QuestionJexl class only sets up the "runtime", any context is used from the
structure2 code
* We only deal with the *evaluation*, no transform/extraction is happening here - that code
is mostly fine and doesn't need a rewrite
* Caching is done by the structure2 code, not here
* JEXL evaluation happens lazily, but the results are cached.
"""


class QuestionValidatingAnalyzer(ValidatingAnalyzer):
def visit_Transform(self, transform):
if transform.name == "answer" and not isinstance(transform.subject.value, str):
yield f"{transform.subject.value} is not a valid question slug."

yield from super().visit_Transform(transform)


class QuestionJexl2(JEXL):
def __init__(self, field, **kwargs):
super().__init__(**kwargs)
self.field = weakref.proxy(field)

self.add_transform("answer", self.answer_transform)

def answer_transform(self, question_slug, *args):
field = self.field.get_field(question_slug)

def _default_or_empty():
if len(args):
return args[0]
return field.question.empty_value()

if not field:
if args:
return args[0]
else:
# No default arg, so we must raise an exception
raise QuestionMissing(
f"Question `{question_slug}` could not be found in form {self.field.get_form()}"
)

if field.is_hidden():
# Hidden fields *always* return the empty value, even if we have
# a default
return field.question.empty_value()
elif field.is_empty():
# not hidden, but empty
return _default_or_empty()

return field.get_value()

def validate(self, expression, **kwargs):
return super().validate(expression, QuestionValidatingAnalyzer)

def extract_referenced_questions(self, expr):
transforms = ["answer"]
yield from self.analyze(
expr, partial(ExtractTransformSubjectAnalyzer, transforms=transforms)
)

def extract_referenced_questions_with_arguments(self, expr):
transforms = ["answer"]
yield from self.analyze(
expr,
partial(ExtractTransformSubjectAndArgumentsAnalyzer, transforms=transforms),
)

def extract_referenced_mapby_questions(self, expr):
transforms = ["mapby"]
yield from self.analyze(
expr, partial(ExtractTransformArgumentAnalyzer, transforms=transforms)
)

def _get_referenced_fields(self, field: Field, expr: str):
deps = list(self.extract_referenced_questions_with_arguments(expr))
referenced_fields = [self._structure.get_field(slug) for slug, _ in deps]

referenced_slugs = [ref.question.slug for ref in referenced_fields if ref]

for slug, args in deps:
required = len(args) == 0
if slug not in referenced_slugs and required:
raise QuestionMissing(
f"Question `{slug}` could not be found in form {field.form}"
)

return [field for field in referenced_fields if field]

def evaluate(self, expr, raise_on_error=True):
try:
return super().evaluate(expr, ChainMap(self.context))
except (
TypeError,
ValueError,
ZeroDivisionError,
AttributeError,
):
if raise_on_error:
raise
return None
10 changes: 10 additions & 0 deletions caluma/caluma_form/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,8 +518,14 @@ def delete(self, *args, **kwargs):
super().delete(args, kwargs)

def create_answer_documents(self, documents):
"""Create AnswerDocuments for this table question, and attach them.
Return a dict with two keys: "created", and "updated", each
containing a list of document IDs that were either created or kept.
"""
family = getattr(self.document, "family", None)
document_ids = [document.pk for document in documents]
res = {"updated": [], "created": []}

for sort, document_id in enumerate(reversed(document_ids), start=1):
ans_doc, created = AnswerDocument.objects.get_or_create(
Expand All @@ -532,6 +538,10 @@ def create_answer_documents(self, documents):
# Already-existing documents are already in the family,
# so we're updating only the newly attached rows
ans_doc.document.set_family(family)
res["created"].append(document_id)
else:
res["updated"].append(document_id)
return res

def unlink_unused_rows(self, docs_to_keep):
existing = AnswerDocument.objects.filter(answer=self).exclude(
Expand Down
Loading

0 comments on commit d64cbb8

Please sign in to comment.