diff --git a/cl/lib/test_helpers.py b/cl/lib/test_helpers.py index 04068a4601..13e6887c37 100644 --- a/cl/lib/test_helpers.py +++ b/cl/lib/test_helpers.py @@ -190,6 +190,7 @@ def midnight_pt_test(d: datetime.date) -> datetime.datetime: "snippet": lambda x: ( x["snippet"] if x.get("snippet") else x["result"].plain_text or "" ), + "ordering_key": lambda x: x["result"].ordering_key, } opinion_cluster_v3_fields = opinion_cluster_v3_v4_common_fields.copy() diff --git a/cl/search/api_serializers.py b/cl/search/api_serializers.py index 6728e60617..1a49535080 100644 --- a/cl/search/api_serializers.py +++ b/cl/search/api_serializers.py @@ -564,6 +564,7 @@ class Meta: "local_path", "sha1", "cites", + "ordering_key", ) diff --git a/cl/search/documents.py b/cl/search/documents.py index fcae4baaf6..8fb50bc5af 100644 --- a/cl/search/documents.py +++ b/cl/search/documents.py @@ -1633,6 +1633,7 @@ class OpinionDocument(OpinionBaseDocument): joined_by_ids = fields.ListField( fields.IntegerField(multi=True), ) + ordering_key = fields.IntegerField(attr="ordering_key") class Django: model = Opinion diff --git a/cl/search/management/commands/cl_index_parent_and_child_docs.py b/cl/search/management/commands/cl_index_parent_and_child_docs.py index 366d9fe13e..42121456e6 100644 --- a/cl/search/management/commands/cl_index_parent_and_child_docs.py +++ b/cl/search/management/commands/cl_index_parent_and_child_docs.py @@ -342,6 +342,13 @@ def add_arguments(self, parser): action="store_true", help="Use this flag to only index documents missing in the index.", ) + parser.add_argument( + "--non-null-field", + type=str, + required=False, + choices=["ordering_key"], + help="Include only documents where this field is not Null.", + ) def handle(self, *args, **options): super().handle(*args, **options) @@ -363,6 +370,7 @@ def handle(self, *args, **options): ) start_date: date | None = options.get("start_date", None) end_date: date | None = options.get("end_date", None) + non_null_field: str | None = options.get("non_null_field", None) es_document = None match search_type: @@ -414,8 +422,13 @@ def handle(self, *args, **options): case SEARCH_TYPES.OPINION: if document_type == "child": + filters = {"pk__gte": pk_offset} + # If non_null_field is not None use it as a filter + if non_null_field: + filters[f"{non_null_field}__isnull"] = False + queryset = ( - Opinion.objects.filter(pk__gte=pk_offset) + Opinion.objects.filter(**filters) .order_by("pk") .values_list("pk", "cluster_id") ) diff --git a/cl/search/models.py b/cl/search/models.py index 92ae30c2f4..ec6f819ded 100644 --- a/cl/search/models.py +++ b/cl/search/models.py @@ -3215,6 +3215,7 @@ class Opinion(AbstractDateTimeModel): "html", "plain_text", "sha1", + "ordering_key", ] ) ordering_key = models.IntegerField(null=True, blank=True) diff --git a/cl/search/signals.py b/cl/search/signals.py index abfe8d448c..bf19cb1678 100644 --- a/cl/search/signals.py +++ b/cl/search/signals.py @@ -425,6 +425,7 @@ "html": ["text"], "plain_text": ["text"], "sha1": ["sha1"], + "ordering_key": ["ordering_key"], }, }, }, diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py index 7014aaa8c8..82f6734e57 100644 --- a/cl/search/tests/tests_es_opinion.py +++ b/cl/search/tests/tests_es_opinion.py @@ -3111,6 +3111,45 @@ def test_opinions_indexing_missing_flag(self): s.count(), 6, msg="Wrong number of Opinions returned." ) + def test_opinions_indexing_non_null_field(self): + """Confirm that the indexing command properly filters out instances to + be indexed based on the non-null field value provided as a parameter. + """ + + s = OpinionClusterDocument.search().query("match_all") + self.assertEqual(s.count(), 0) + + opinion = OpinionFactory.create( + extracted_by_ocr=False, + author=self.person_2, + plain_text="my plain text secret word for queries", + cluster=self.opinion_cluster_1, + local_path="test/search/opinion_doc.doc", + per_curiam=False, + type="020lead", + ordering_key=5, + ) + + # Call cl_index_parent_and_child_docs command for Opinion. + call_command( + "cl_index_parent_and_child_docs", + search_type=SEARCH_TYPES.OPINION, + queue="celery", + pk_offset=0, + document_type="child", + testing_mode=True, + non_null_field="ordering_key", + ) + + # Confirm 1 Opinions is indexed. + s = OpinionClusterDocument.search() + s = s.query("parent_id", type="opinion", id=self.opinion_cluster_1.pk) + self.assertEqual( + s.count(), 1, msg="Wrong number of Opinions returned." + ) + es_doc = OpinionDocument.get(ES_CHILD_ID(opinion.pk).OPINION) + self.assertEqual(es_doc.ordering_key, opinion.ordering_key) + class EsOpinionsIndexingTest( CountESTasksTestCase, ESIndexTestCase, TransactionTestCase @@ -3285,12 +3324,17 @@ def test_child_document_update_properly(self) -> None: local_path="test/search/opinion_doc.doc", per_curiam=False, type="020lead", + ordering_key=1, ) # Two es_save_document task should be called on creation, one for # opinion and one for opinion_cluster self.reset_and_assert_task_count(expected=2) + # Confirm the new ordering_key field is indexed upon Opinion creation. + es_doc = OpinionDocument.get(ES_CHILD_ID(opinion.pk).OPINION) + self.assertEqual(es_doc.ordering_key, opinion.ordering_key) + with mock.patch( "cl.lib.es_signal_processor.update_es_document.si", side_effect=lambda *args, **kwargs: self.count_task_calls( @@ -3303,6 +3347,22 @@ def test_child_document_update_properly(self) -> None: # One update_es_document task should be called on tracked field update. self.reset_and_assert_task_count(expected=1) + with mock.patch( + "cl.lib.es_signal_processor.update_es_document.si", + side_effect=lambda *args, **kwargs: self.count_task_calls( + update_es_document, True, *args, **kwargs + ), + ): + # Update the ordering_key field in the opinion record. + opinion.ordering_key = None + opinion.save() + + # One update_es_document task should be called on tracked field update. + self.reset_and_assert_task_count(expected=1) + # Confirm the ordering_key has been updated. + es_doc = OpinionDocument.get(ES_CHILD_ID(opinion.pk).OPINION) + self.assertEqual(es_doc.ordering_key, None) + # Update an opinion untracked field. with mock.patch( "cl.lib.es_signal_processor.update_es_document.si",