Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restore: Added ordering_key to the Opinions index #5005

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cl/lib/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ def midnight_pt_test(d: datetime.date) -> datetime.datetime:
"snippet": lambda x: (
x["snippet"] if x.get("snippet") else x["result"].plain_text or ""
),
"ordering_key": lambda x: x["result"].ordering_key,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Semgrep identified an issue in your code:

return only makes sense inside a function

To resolve this comment:

🔧 No guidance has been designated for this issue. Fix according to your organization's approved methods.

💬 Ignore this finding

Leave a nosemgrep comment directly above or at the end of line 193 like so // nosemgrep: python.lang.maintainability.return.return-not-in-function

Take care to validate that this is not a true positive finding before ignoring it.
Learn more about ignoring code, files and folders here.

You can view more details about this finding in the Semgrep AppSec Platform.

}

opinion_cluster_v3_fields = opinion_cluster_v3_v4_common_fields.copy()
Expand Down
1 change: 1 addition & 0 deletions cl/search/api_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,7 @@ class Meta:
"local_path",
"sha1",
"cites",
"ordering_key",
)


Expand Down
1 change: 1 addition & 0 deletions cl/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -1633,6 +1633,7 @@ class OpinionDocument(OpinionBaseDocument):
joined_by_ids = fields.ListField(
fields.IntegerField(multi=True),
)
ordering_key = fields.IntegerField(attr="ordering_key")

class Django:
model = Opinion
Expand Down
15 changes: 14 additions & 1 deletion cl/search/management/commands/cl_index_parent_and_child_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,13 @@ def add_arguments(self, parser):
action="store_true",
help="Use this flag to only index documents missing in the index.",
)
parser.add_argument(
"--non-null-field",
type=str,
required=False,
choices=["ordering_key"],
help="Include only documents where this field is not Null.",
)

def handle(self, *args, **options):
super().handle(*args, **options)
Expand All @@ -363,6 +370,7 @@ def handle(self, *args, **options):
)
start_date: date | None = options.get("start_date", None)
end_date: date | None = options.get("end_date", None)
non_null_field: str | None = options.get("non_null_field", None)

es_document = None
match search_type:
Expand Down Expand Up @@ -414,8 +422,13 @@ def handle(self, *args, **options):

case SEARCH_TYPES.OPINION:
if document_type == "child":
filters = {"pk__gte": pk_offset}
# If non_null_field is not None use it as a filter
if non_null_field:
filters[f"{non_null_field}__isnull"] = False

queryset = (
Opinion.objects.filter(pk__gte=pk_offset)
Opinion.objects.filter(**filters)
.order_by("pk")
.values_list("pk", "cluster_id")
)
Expand Down
1 change: 1 addition & 0 deletions cl/search/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3215,6 +3215,7 @@ class Opinion(AbstractDateTimeModel):
"html",
"plain_text",
"sha1",
"ordering_key",
]
)
ordering_key = models.IntegerField(null=True, blank=True)
Expand Down
1 change: 1 addition & 0 deletions cl/search/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@
"html": ["text"],
"plain_text": ["text"],
"sha1": ["sha1"],
"ordering_key": ["ordering_key"],
},
},
},
Expand Down
60 changes: 60 additions & 0 deletions cl/search/tests/tests_es_opinion.py
Original file line number Diff line number Diff line change
Expand Up @@ -3111,6 +3111,45 @@ def test_opinions_indexing_missing_flag(self):
s.count(), 6, msg="Wrong number of Opinions returned."
)

def test_opinions_indexing_non_null_field(self):
"""Confirm that the indexing command properly filters out instances to
be indexed based on the non-null field value provided as a parameter.
"""

s = OpinionClusterDocument.search().query("match_all")
self.assertEqual(s.count(), 0)

opinion = OpinionFactory.create(
extracted_by_ocr=False,
author=self.person_2,
plain_text="my plain text secret word for queries",
cluster=self.opinion_cluster_1,
local_path="test/search/opinion_doc.doc",
per_curiam=False,
type="020lead",
ordering_key=5,
)

# Call cl_index_parent_and_child_docs command for Opinion.
call_command(
"cl_index_parent_and_child_docs",
search_type=SEARCH_TYPES.OPINION,
queue="celery",
pk_offset=0,
document_type="child",
testing_mode=True,
non_null_field="ordering_key",
)

# Confirm 1 Opinions is indexed.
s = OpinionClusterDocument.search()
s = s.query("parent_id", type="opinion", id=self.opinion_cluster_1.pk)
self.assertEqual(
s.count(), 1, msg="Wrong number of Opinions returned."
)
es_doc = OpinionDocument.get(ES_CHILD_ID(opinion.pk).OPINION)
self.assertEqual(es_doc.ordering_key, opinion.ordering_key)


class EsOpinionsIndexingTest(
CountESTasksTestCase, ESIndexTestCase, TransactionTestCase
Expand Down Expand Up @@ -3285,12 +3324,17 @@ def test_child_document_update_properly(self) -> None:
local_path="test/search/opinion_doc.doc",
per_curiam=False,
type="020lead",
ordering_key=1,
)

# Two es_save_document task should be called on creation, one for
# opinion and one for opinion_cluster
self.reset_and_assert_task_count(expected=2)

# Confirm the new ordering_key field is indexed upon Opinion creation.
es_doc = OpinionDocument.get(ES_CHILD_ID(opinion.pk).OPINION)
self.assertEqual(es_doc.ordering_key, opinion.ordering_key)

with mock.patch(
"cl.lib.es_signal_processor.update_es_document.si",
side_effect=lambda *args, **kwargs: self.count_task_calls(
Expand All @@ -3303,6 +3347,22 @@ def test_child_document_update_properly(self) -> None:
# One update_es_document task should be called on tracked field update.
self.reset_and_assert_task_count(expected=1)

with mock.patch(
"cl.lib.es_signal_processor.update_es_document.si",
side_effect=lambda *args, **kwargs: self.count_task_calls(
update_es_document, True, *args, **kwargs
),
):
# Update the ordering_key field in the opinion record.
opinion.ordering_key = None
opinion.save()

# One update_es_document task should be called on tracked field update.
self.reset_and_assert_task_count(expected=1)
# Confirm the ordering_key has been updated.
es_doc = OpinionDocument.get(ES_CHILD_ID(opinion.pk).OPINION)
self.assertEqual(es_doc.ordering_key, None)

# Update an opinion untracked field.
with mock.patch(
"cl.lib.es_signal_processor.update_es_document.si",
Expand Down