Skip to content

Commit

Permalink
Merge pull request #468 from hellohaptik/cj_revert_es_sorting_crf_data
Browse files Browse the repository at this point in the history
fix: revert sorting on elasticsearch side for crf data and do it in memory
  • Loading branch information
chiragjn authored Feb 18, 2022
2 parents 0e57edc + 6fdd20f commit 68dea20
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions datastore/elastic_search/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,12 +580,10 @@ def get_crf_data_for_entity_name(connection, index_name, doc_type, entity_name,
]
}
"""

# TODO: Enable sorting on ES side after verifying mappings are correctly setup.
# Currently in datastore.elastic_search.create.create_crf_index we dont add keyword fields to
# `language_script` and `sentence`. We need to add integration tests for these too
data = {
"sort": [
{"language_script.keyword": {"order": "asc"}},
{"sentence.keyword": {"order": "asc"}},
],
"query": {
"bool": {
"must": [
Expand Down Expand Up @@ -619,8 +617,10 @@ def get_crf_data_for_entity_name(connection, index_name, doc_type, entity_name,
# Parse hits
results = search_results['hits']['hits']

language_mapped_results = collections.defaultdict(list)
# TODO: Remove and switch to sorting on ES side once mappings are set correctly
results.sort(key=lambda _doc: (_doc['_source']['language_script'], _doc['_source']['sentence']))

language_mapped_results = collections.defaultdict(list)
for result in results:
language_mapped_results[result['_source']['language_script']].append(
{
Expand Down

0 comments on commit 68dea20

Please sign in to comment.