From 9d96c72d7bd3f3b5543922c5004e4ffb9a3cd1f2 Mon Sep 17 00:00:00 2001
From: chiragjn <jain.chirag925@gmail.com>
Date: Fri, 12 Apr 2019 02:28:39 +0530
Subject: [PATCH 01/12] Add a get_timezone utils functions to set timezone
 correctly when passed onto another constructor

---
 .../detectors/temporal/date/date_detection.py | 19 +++-------
 .../temporal/date/en/date_detection.py        | 24 +++++-------
 .../temporal/date/standard_date_regex.py      | 19 +++-------
 .../temporal/time/en/time_detection.py        |  3 +-
 .../temporal/time/standard_time_regex.py      | 11 +-----
 .../detectors/temporal/time/time_detection.py |  3 +-
 ner_v2/detectors/temporal/utils.py            | 38 ++++++++++++++++++-
 7 files changed, 65 insertions(+), 52 deletions(-)

diff --git a/ner_v2/detectors/temporal/date/date_detection.py b/ner_v2/detectors/temporal/date/date_detection.py
index 3f219d06e..04e52a210 100644
--- a/ner_v2/detectors/temporal/date/date_detection.py
+++ b/ner_v2/detectors/temporal/date/date_detection.py
@@ -5,19 +5,17 @@
 import os
 import re
 
-import pytz
-
 import models.crf.constant as model_constant
 import ner_v2.detectors.temporal.constant as temporal_constant
-from chatbot_ner.config import ner_logger
 from language_utilities.constant import ENGLISH_LANG, TRANSLATED_TEXT
 from language_utilities.utils import translate_text
 from models.crf.models import Models
-from ner_constants import FROM_MESSAGE, FROM_MODEL_VERIFIED, FROM_MODEL_NOT_VERIFIED, FROM_STRUCTURE_VALUE_VERIFIED, \
-    FROM_STRUCTURE_VALUE_NOT_VERIFIED, FROM_FALLBACK_VALUE
+from ner_constants import (FROM_MESSAGE, FROM_MODEL_VERIFIED, FROM_MODEL_NOT_VERIFIED, FROM_STRUCTURE_VALUE_VERIFIED,
+                           FROM_STRUCTURE_VALUE_NOT_VERIFIED, FROM_FALLBACK_VALUE)
 from ner_v2.detectors.base_detector import BaseDetector
-from ner_v2.detectors.temporal.constant import TYPE_EXACT, TYPE_EVERYDAY, TYPE_PAST, \
-    TYPE_NEXT_DAY, TYPE_REPEAT_DAY
+from ner_v2.detectors.temporal.constant import (TYPE_EXACT, TYPE_EVERYDAY, TYPE_PAST,
+                                                TYPE_NEXT_DAY, TYPE_REPEAT_DAY)
+from ner_v2.detectors.temporal.utils import get_timezone
 from ner_v2.detectors.utils import get_lang_data_path
 
 
@@ -765,12 +763,7 @@ def __init__(self, entity_name, language=ENGLISH_LANG, timezone='UTC', past_date
         self.original_date_text = []
         self.entity_name = entity_name
         self.tag = '__' + entity_name + '__'
-        try:
-            self.timezone = pytz.timezone(timezone)
-        except Exception as e:
-            ner_logger.debug('Timezone error: %s ' % e)
-            self.timezone = pytz.timezone('UTC')
-            ner_logger.debug('Default timezone passed as "UTC"')
+        self.timezone = get_timezone(timezone)
         self.now_date = datetime.datetime.now(tz=self.timezone)
         self.bot_message = None
         self.language = language
diff --git a/ner_v2/detectors/temporal/date/en/date_detection.py b/ner_v2/detectors/temporal/date/en/date_detection.py
index 12036b6f0..3ce428f67 100644
--- a/ner_v2/detectors/temporal/date/en/date_detection.py
+++ b/ner_v2/detectors/temporal/date/en/date_detection.py
@@ -2,13 +2,13 @@
 import datetime
 import re
 
-import pytz
-
-from chatbot_ner.config import ner_logger
-from ner_v2.detectors.temporal.constant import TYPE_EXACT, TYPE_EVERYDAY, TYPE_TODAY, TYPE_TOMORROW, TYPE_YESTERDAY, \
-    TYPE_DAY_AFTER, TYPE_DAY_BEFORE, TYPE_N_DAYS_AFTER, TYPE_NEXT_DAY, TYPE_THIS_DAY, TYPE_POSSIBLE_DAY, WEEKDAYS, \
-    REPEAT_WEEKDAYS, WEEKENDS, REPEAT_WEEKENDS, TYPE_REPEAT_DAY, MONTH_DICT, DAY_DICT, ORDINALS_MAP
-from ner_v2.detectors.temporal.utils import get_weekdays_for_month, get_next_date_with_dd, get_previous_date_with_dd
+from ner_v2.detectors.temporal.constant import (TYPE_EXACT, TYPE_EVERYDAY, TYPE_TODAY, TYPE_TOMORROW, TYPE_YESTERDAY,
+                                                TYPE_DAY_AFTER, TYPE_DAY_BEFORE, TYPE_N_DAYS_AFTER, TYPE_NEXT_DAY,
+                                                TYPE_THIS_DAY, TYPE_POSSIBLE_DAY, WEEKDAYS,
+                                                REPEAT_WEEKDAYS, WEEKENDS, REPEAT_WEEKENDS, TYPE_REPEAT_DAY,
+                                                MONTH_DICT, DAY_DICT, ORDINALS_MAP)
+from ner_v2.detectors.temporal.utils import (get_weekdays_for_month, get_next_date_with_dd, get_previous_date_with_dd,
+                                             get_timezone)
 
 
 class DateDetector(object):
@@ -90,12 +90,7 @@ def __init__(self, entity_name, timezone='UTC', past_date_referenced=False):
         self.day_dictionary = {}
         self.entity_name = entity_name
         self.tag = '__' + entity_name + '__'
-        try:
-            self.timezone = pytz.timezone(timezone)
-        except Exception as e:
-            ner_logger.debug('Timezone error: %s ' % e)
-            self.timezone = pytz.timezone('UTC')
-            ner_logger.debug('Default timezone passed as "UTC"')
+        self.timezone = get_timezone(timezone)
         self.now_date = datetime.datetime.now(tz=self.timezone)
         self.month_dictionary = MONTH_DICT
         self.day_dictionary = DAY_DICT
@@ -875,7 +870,8 @@ def _yesterdays_date(self, date_list=None, original_list=None):
             original_list = []
         if date_list is None:
             date_list = []
-        regex_pattern = re.compile(r'\b((yesterday|sterday|yesterdy|yestrdy|yestrday|previous day|prev day|prevday))\b')
+        regex_pattern = re.compile(
+            r'\b((yesterday|sterday|yesterdy|yestrdy|yestrday|previous day|prev day|prevday))\b')
         patterns = regex_pattern.findall(self.processed_text.lower())
         for pattern in patterns:
             original = pattern[0]
diff --git a/ner_v2/detectors/temporal/date/standard_date_regex.py b/ner_v2/detectors/temporal/date/standard_date_regex.py
index c4dceb475..efcea1aa4 100644
--- a/ner_v2/detectors/temporal/date/standard_date_regex.py
+++ b/ner_v2/detectors/temporal/date/standard_date_regex.py
@@ -3,15 +3,13 @@
 import datetime
 import re
 
-import pytz
 from dateutil.relativedelta import relativedelta
 
-from chatbot_ner.config import ner_logger
-from ner_v2.detectors.temporal.constant import TYPE_EXACT
-from ner_v2.detectors.temporal.constant import DATE_CONSTANT_FILE, DATETIME_CONSTANT_FILE, \
-    RELATIVE_DATE, DATE_LITERAL_TYPE, MONTH_LITERAL_TYPE, WEEKDAY_TYPE, \
-    MONTH_TYPE, ADD_DIFF_DATETIME_TYPE, MONTH_DATE_REF_TYPE, NUMERALS_CONSTANT_FILE
-from ner_v2.detectors.temporal.utils import next_weekday, nth_weekday, get_tuple_dict
+from ner_v2.detectors.temporal.constant import (DATE_CONSTANT_FILE, DATETIME_CONSTANT_FILE,
+                                                RELATIVE_DATE, DATE_LITERAL_TYPE, MONTH_LITERAL_TYPE, WEEKDAY_TYPE,
+                                                MONTH_TYPE, ADD_DIFF_DATETIME_TYPE, MONTH_DATE_REF_TYPE,
+                                                NUMERALS_CONSTANT_FILE, TYPE_EXACT)
+from ner_v2.detectors.temporal.utils import next_weekday, nth_weekday, get_tuple_dict, get_timezone
 
 
 class BaseRegexDate(object):
@@ -32,12 +30,7 @@ def __init__(self, entity_name, data_directory_path, timezone='UTC', past_date_r
         self.original_date_text = []
         self.entity_name = entity_name
         self.tag = '__' + entity_name + '__'
-        try:
-            self.timezone = pytz.timezone(timezone)
-        except Exception as e:
-            ner_logger.debug('Timezone error: %s ' % e)
-            self.timezone = pytz.timezone('UTC')
-            ner_logger.debug('Default timezone passed as "UTC"')
+        self.timezone = get_timezone(timezone)
 
         self.now_date = datetime.datetime.now(tz=self.timezone)
         self.bot_message = None
diff --git a/ner_v2/detectors/temporal/time/en/time_detection.py b/ner_v2/detectors/temporal/time/en/time_detection.py
index f1463bd50..707199065 100644
--- a/ner_v2/detectors/temporal/time/en/time_detection.py
+++ b/ner_v2/detectors/temporal/time/en/time_detection.py
@@ -4,6 +4,7 @@
 import pytz
 
 from ner_v2.detectors.temporal.constant import AM_MERIDIEM, PM_MERIDIEM, TWELVE_HOUR, EVERY_TIME_TYPE
+from ner_v2.detectors.temporal.utils import get_timezone
 
 
 class TimeDetector(object):
@@ -79,7 +80,7 @@ def __init__(self, entity_name, timezone='UTC'):
         self.original_time_text = []
         self.tag = '__' + entity_name + '__'
         self.bot_message = None
-        self.timezone = timezone or 'UTC'
+        self.timezone = get_timezone(timezone)
 
     def set_bot_message(self, bot_message):
         """
diff --git a/ner_v2/detectors/temporal/time/standard_time_regex.py b/ner_v2/detectors/temporal/time/standard_time_regex.py
index 9b94c0e80..c9644ba7d 100644
--- a/ner_v2/detectors/temporal/time/standard_time_regex.py
+++ b/ner_v2/detectors/temporal/time/standard_time_regex.py
@@ -5,14 +5,12 @@
 import os
 import re
 
-import pytz
-
 from chatbot_ner.config import ner_logger
 from ner_v2.detectors.temporal.constant import (DATETIME_CONSTANT_FILE, ADD_DIFF_DATETIME_TYPE, NUMERALS_CONSTANT_FILE,
                                                 TIME_CONSTANT_FILE, REF_DATETIME_TYPE, HOUR_TIME_TYPE,
                                                 MINUTE_TIME_TYPE, DAYTIME_MERIDIEM, AM_MERIDIEM, PM_MERIDIEM,
                                                 TWELVE_HOUR)
-from ner_v2.detectors.temporal.utils import get_tuple_dict, get_hour_min_diff
+from ner_v2.detectors.temporal.utils import get_tuple_dict, get_hour_min_diff, get_timezone
 
 
 class BaseRegexTime(object):
@@ -29,12 +27,7 @@ def __init__(self, entity_name, data_directory_path, timezone='UTC'):
         self.processed_text = ''
         self.entity_name = entity_name
         self.tag = '__' + entity_name + '__'
-        try:
-            self.timezone = pytz.timezone(timezone)
-        except Exception as e:
-            ner_logger.debug('Timezone error: %s ' % e)
-            self.timezone = pytz.timezone('UTC')
-            ner_logger.debug('Default timezone passed as "UTC"')
+        self.timezone = get_timezone(timezone)
         self.now_date = datetime.datetime.now(tz=self.timezone)
         self.bot_message = None
 
diff --git a/ner_v2/detectors/temporal/time/time_detection.py b/ner_v2/detectors/temporal/time/time_detection.py
index 509c14407..e7df5c386 100644
--- a/ner_v2/detectors/temporal/time/time_detection.py
+++ b/ner_v2/detectors/temporal/time/time_detection.py
@@ -4,6 +4,7 @@
 
 from language_utilities.constant import ENGLISH_LANG
 from ner_v2.detectors.base_detector import BaseDetector
+from ner_v2.detectors.temporal.utils import get_timezone
 from ner_v2.detectors.utils import get_lang_data_path
 
 
@@ -60,7 +61,7 @@ def __init__(self, entity_name='time', timezone='UTC', language=ENGLISH_LANG):
         self.time = []
         self.original_time_text = []
         self.tag = '__' + entity_name + '__'
-        self.timezone = timezone or 'UTC'
+        self.timezone = get_timezone(timezone)
         self.language = language
 
         try:
diff --git a/ner_v2/detectors/temporal/utils.py b/ner_v2/detectors/temporal/utils.py
index bb224cc18..264d5f8fb 100644
--- a/ner_v2/detectors/temporal/utils.py
+++ b/ner_v2/detectors/temporal/utils.py
@@ -1,8 +1,11 @@
 import calendar
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, tzinfo  # FIXME: Change import to `import datetime`
 
 import pandas as pd
+import pytz
+import six
 
+from chatbot_ner.config import ner_logger
 from ner_v2.detectors.temporal.constant import POSITIVE_TIME_DIFF, NEGATIVE_TIME_DIFF, CONSTANT_FILE_KEY
 
 
@@ -260,3 +263,36 @@ def get_next_date_with_dd(dd, after_datetime):
         mm, yy = get_next_month_number(mm=mm, yy=yy)
 
     return None, None, None
+
+
+def get_timezone(timezone, ignore_errors=True):
+    # type: (Union[datetime.tzinfo, str, unicode], bool) -> datetime.tzinfo
+    """
+    Return a datetime.tzinfo (pytz timezone object). If `timezone` is a str, try constructing a pytz
+    timezone object with it. If an invalid timezone is mentioned and `ignore_errors` is True, an UTC timezone object
+    will be returned. If `timezone` is already a datetime.tzinfo object it will be returned as is
+
+    Args:
+        timezone (str or datetime.tzinfo): Either a valid timezone string or datetime.tzinfo object
+        ignore_errors (bool, optional): when set to True, ignore errors and return a pytz.UTC when error occurs. When
+            set to False, raise exception when invalid timezone is given. Defaults to True.
+
+    Returns:
+        datetime.tzinfo: A pytz timezone object
+
+    """
+    if (not isinstance(timezone, six.string_types) and
+            isinstance(timezone, tzinfo) and
+            hasattr(timezone, 'localize')):
+        return timezone
+
+    try:
+        timezone = pytz.timezone(timezone)
+    except Exception as e:
+        if ignore_errors:
+            ner_logger.debug('Timezone error: %s ' % e)
+            timezone = pytz.timezone('UTC')
+            ner_logger.debug('Using "UTC" as default timezone')
+        else:
+            raise
+    return timezone

From e55564672e87373ae7a32d7657ab22e6b7aefb29 Mon Sep 17 00:00:00 2001
From: chiragjn <jain.chirag925@gmail.com>
Date: Fri, 12 Apr 2019 02:41:51 +0530
Subject: [PATCH 02/12] fix a date initialisation error

---
 ner_v2/detectors/temporal/time/en/time_detection.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/ner_v2/detectors/temporal/time/en/time_detection.py b/ner_v2/detectors/temporal/time/en/time_detection.py
index 707199065..ae7291ce7 100644
--- a/ner_v2/detectors/temporal/time/en/time_detection.py
+++ b/ner_v2/detectors/temporal/time/en/time_detection.py
@@ -1,8 +1,5 @@
 import re
-from datetime import datetime
-
-import pytz
-
+import datetime
 from ner_v2.detectors.temporal.constant import AM_MERIDIEM, PM_MERIDIEM, TWELVE_HOUR, EVERY_TIME_TYPE
 from ner_v2.detectors.temporal.utils import get_timezone
 
@@ -81,6 +78,7 @@ def __init__(self, entity_name, timezone='UTC'):
         self.tag = '__' + entity_name + '__'
         self.bot_message = None
         self.timezone = get_timezone(timezone)
+        self.now_date = datetime.datetime.now(self.timezone)
 
     def set_bot_message(self, bot_message):
         """
@@ -1135,7 +1133,7 @@ def _get_meridiem(self, hours, mins):
         Returns
             meridiem type (str): returns the meridiem type whether its am and pm
         """
-        current_datetime = datetime.now(pytz.timezone(self.timezone))
+        current_datetime = self.now_date
         current_hour = current_datetime.hour
         current_min = current_datetime.minute
         if hours == 0 or hours >= TWELVE_HOUR:

From 784e22bf352c481b56a9321bfcf9fcd4be867c93 Mon Sep 17 00:00:00 2001
From: Prathmesh Ghadge <prathmesh071@gmail.com>
Date: Tue, 16 Apr 2019 10:54:24 +0530
Subject: [PATCH 03/12] Exclude "miscellaneous" pull requests from release
 notes

Exclude "miscellaneous" pull requests from release notes
---
 .github/release-drafter.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
index 5026bfca6..047479fbd 100644
--- a/.github/release-drafter.yml
+++ b/.github/release-drafter.yml
@@ -19,3 +19,5 @@ categories:
     label: packages-updated
   - title: 👺 Miscellaneous 
     label: miscellaneous
+exclude-labels:
+  - miscellaneous

From e973adf8edb218a9ba33e52236fc7958bb85ac65 Mon Sep 17 00:00:00 2001
From: viraj <anchan.viraj@gmail.com>
Date: Tue, 16 Apr 2019 15:11:52 +0530
Subject: [PATCH 04/12] add shopping quantity units for hindi

add shopping quantity units for hindi
---
 ner_v2/detectors/numeral/number/hi/data/units.csv | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/ner_v2/detectors/numeral/number/hi/data/units.csv b/ner_v2/detectors/numeral/number/hi/data/units.csv
index e7bc47fed..53b9c82ca 100644
--- a/ner_v2/detectors/numeral/number/hi/data/units.csv
+++ b/ner_v2/detectors/numeral/number/hi/data/units.csv
@@ -1,3 +1,9 @@
 unit_type,unit_value,unit_variants
 currency,rupees,rupees | rupee | rs | rupya | rupaya | rupaye | rupye | rupay | paisa | paise | inr | रूपीस | रुपया | रूपए| पैसा| पैसे| ₹  
 currency,dollar,Dollar | usd | डॉलर | $
+package_metric_unit,mg,mg | milligram | milligrams | mgs | मिलीग्राम | मिलिग्राम | मिल्लीग्राम | मिलीग्राम्स | मिल्लीग्राम्स | मिलिग्रामस
+package_metric_unit,gms,gms | grams | gram | gm | g | ग्राम | ग्राम्स
+package_metric_unit,kg,kilogram | kilograms | kg | kilo | kgs | किलोग्राम | किलोग्राम्स | किलो
+package_metric_unit,ml,ml | milliliter | millilitre | milliliters | millilitres | मिलीलीटर | मिलिलिटर | मिललिलिटर | मिली लीटर
+package_metric_unit,ltr,ltr | litre | liter | litres | liters | l | लीटर | लिटर
+package_metric_unit,pcs,pcs | pc | pieces | piece | पीस | पिस | टुकड़े | टुकड़ा

From 92d82a00649ac8b2f9c0cce88fc114eaea425fc5 Mon Sep 17 00:00:00 2001
From: chiragjn <jain.chirag925@gmail.com>
Date: Thu, 18 Apr 2019 13:26:36 +0530
Subject: [PATCH 05/12] Skip next from-to pair if a pair of dates is detected
 in current pair

---
 ner_v2/detectors/temporal/date/date_detection.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/ner_v2/detectors/temporal/date/date_detection.py b/ner_v2/detectors/temporal/date/date_detection.py
index 04e52a210..d1de987d3 100644
--- a/ner_v2/detectors/temporal/date/date_detection.py
+++ b/ner_v2/detectors/temporal/date/date_detection.py
@@ -1,4 +1,5 @@
 # coding=utf-8
+import six
 import copy
 import datetime
 import importlib
@@ -215,9 +216,13 @@ def _detect_range(self):
                     date_dicts[1][temporal_constant.DATE_END_RANGE_PROPERTY] = True
                     date_dict_list.extend(date_dicts)
         else:
-            parts = iter(re.split(r'\s+(?:\-|to|till|se)\s+', self.processed_text))
+            parts = re.split(r'\s+(?:\-|to|till|se)\s+', self.processed_text)
+            skip_next_pair = False
             _day_of_week_types = [temporal_constant.TYPE_THIS_DAY, temporal_constant.TYPE_NEXT_DAY]
-            for start_part, end_part in zip(parts, parts):  # Consumes 2 items at a time from parts
+            for start_part, end_part in six.moves.zip(parts, parts[1:]):
+                if skip_next_pair:
+                    continue
+                skip_next_pair = False
                 start_date_list = self._date_dict_from_text(text=start_part, start_range_property=True)
                 end_date_list = self._date_dict_from_text(text=end_part, end_range_property=True)
                 if start_date_list and end_date_list:
@@ -235,6 +240,7 @@ def _detect_range(self):
                         end_date_list = [possible_end_date]
                     date_dict_list.extend(start_date_list)
                     date_dict_list.extend(end_date_list)
+                    skip_next_pair = True
         return date_dict_list
 
     def _fix_day_range(self, start_date_dict, end_date_dict):

From 1dbe3f8ccaaf7d32e12ba1f7fb0fd057b0f4a13b Mon Sep 17 00:00:00 2001
From: chiragjn <jain.chirag925@gmail.com>
Date: Thu, 18 Apr 2019 13:38:58 +0530
Subject: [PATCH 06/12] Split by conjunctions first before finding date pairs

---
 .../detectors/temporal/date/date_detection.py | 51 ++++++++++---------
 1 file changed, 27 insertions(+), 24 deletions(-)

diff --git a/ner_v2/detectors/temporal/date/date_detection.py b/ner_v2/detectors/temporal/date/date_detection.py
index d1de987d3..815d0f3fd 100644
--- a/ner_v2/detectors/temporal/date/date_detection.py
+++ b/ner_v2/detectors/temporal/date/date_detection.py
@@ -1,4 +1,6 @@
 # coding=utf-8
+import itertools
+
 import six
 import copy
 import datetime
@@ -216,31 +218,32 @@ def _detect_range(self):
                     date_dicts[1][temporal_constant.DATE_END_RANGE_PROPERTY] = True
                     date_dict_list.extend(date_dicts)
         else:
-            parts = re.split(r'\s+(?:\-|to|till|se)\s+', self.processed_text)
-            skip_next_pair = False
-            _day_of_week_types = [temporal_constant.TYPE_THIS_DAY, temporal_constant.TYPE_NEXT_DAY]
-            for start_part, end_part in six.moves.zip(parts, parts[1:]):
-                if skip_next_pair:
-                    continue
+            for sentence_part in re.split(r'\s+(?:and|aur|&|or)\s+', self.processed_text):
+                parts = re.split(r'\s+(?:\-|to|till|se)\s+', sentence_part)
                 skip_next_pair = False
-                start_date_list = self._date_dict_from_text(text=start_part, start_range_property=True)
-                end_date_list = self._date_dict_from_text(text=end_part, end_range_property=True)
-                if start_date_list and end_date_list:
-                    possible_start_date = start_date_list[0]
-                    possible_end_date = end_date_list[-1]
-                    start_date_type = possible_start_date[temporal_constant.DATE_VALUE]['type']
-                    end_date_type = possible_end_date[temporal_constant.DATE_VALUE]['type']
-                    if start_date_type in _day_of_week_types and end_date_type in _day_of_week_types:
-                        start_date_list, end_date_list = self._fix_day_range(start_date_dict=possible_start_date,
-                                                                             end_date_dict=possible_end_date)
-                    else:
-                        # FIXME: Assumes end_date > start_date. Also can return dates in past when date detector
-                        # returns dates in the past
-                        start_date_list = [possible_start_date]
-                        end_date_list = [possible_end_date]
-                    date_dict_list.extend(start_date_list)
-                    date_dict_list.extend(end_date_list)
-                    skip_next_pair = True
+                _day_of_week_types = [temporal_constant.TYPE_THIS_DAY, temporal_constant.TYPE_NEXT_DAY]
+                for start_part, end_part in six.moves.zip(parts, parts[1:]):
+                    if skip_next_pair:
+                        skip_next_pair = False
+                        continue
+                    start_date_list = self._date_dict_from_text(text=start_part, start_range_property=True)
+                    end_date_list = self._date_dict_from_text(text=end_part, end_range_property=True)
+                    if start_date_list and end_date_list:
+                        possible_start_date = start_date_list[0]
+                        possible_end_date = end_date_list[-1]
+                        start_date_type = possible_start_date[temporal_constant.DATE_VALUE]['type']
+                        end_date_type = possible_end_date[temporal_constant.DATE_VALUE]['type']
+                        if start_date_type in _day_of_week_types and end_date_type in _day_of_week_types:
+                            start_date_list, end_date_list = self._fix_day_range(start_date_dict=possible_start_date,
+                                                                                 end_date_dict=possible_end_date)
+                        else:
+                            # FIXME: Assumes end_date > start_date. Also can return dates in past when date detector
+                            # returns dates in the past
+                            start_date_list = [possible_start_date]
+                            end_date_list = [possible_end_date]
+                        date_dict_list.extend(start_date_list)
+                        date_dict_list.extend(end_date_list)
+                        skip_next_pair = True
         return date_dict_list
 
     def _fix_day_range(self, start_date_dict, end_date_dict):

From 20d4af0f8283aef721f52c8fb48cdb86c8e8826b Mon Sep 17 00:00:00 2001
From: chiragjn <jain.chirag925@gmail.com>
Date: Thu, 18 Apr 2019 13:45:14 +0530
Subject: [PATCH 07/12] Fix lint errors

---
 ner_v2/detectors/temporal/date/date_detection.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ner_v2/detectors/temporal/date/date_detection.py b/ner_v2/detectors/temporal/date/date_detection.py
index 815d0f3fd..20214e041 100644
--- a/ner_v2/detectors/temporal/date/date_detection.py
+++ b/ner_v2/detectors/temporal/date/date_detection.py
@@ -1,13 +1,14 @@
 # coding=utf-8
-import itertools
+from __future__ import absolute_import
 
-import six
 import copy
 import datetime
 import importlib
 import os
 import re
 
+import six
+
 import models.crf.constant as model_constant
 import ner_v2.detectors.temporal.constant as temporal_constant
 from language_utilities.constant import ENGLISH_LANG, TRANSLATED_TEXT

From a6fb328897e86035de5fc445c7dde35d61dd723f Mon Sep 17 00:00:00 2001
From: chiragjn <jain.chirag925@gmail.com>
Date: Thu, 25 Apr 2019 13:15:46 +0530
Subject: [PATCH 08/12] Consider only the number of digits in integer part of
 floating point number when validating

---
 .../numeral/number/number_detection.py        | 23 +++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/ner_v2/detectors/numeral/number/number_detection.py b/ner_v2/detectors/numeral/number/number_detection.py
index d2aec851b..cbfb0ebb8 100644
--- a/ner_v2/detectors/numeral/number/number_detection.py
+++ b/ner_v2/detectors/numeral/number/number_detection.py
@@ -1,8 +1,9 @@
 import importlib
+import math
 import os
 
-from ner_v2.detectors.base_detector import BaseDetector
 from language_utilities.constant import ENGLISH_LANG
+from ner_v2.detectors.base_detector import BaseDetector
 from ner_v2.detectors.numeral.constant import NUMBER_DETECTION_RETURN_DICT_VALUE, NUMBER_DETECTION_RETURN_DICT_UNIT
 from ner_v2.detectors.utils import get_lang_data_path
 
@@ -50,6 +51,7 @@ class NumberDetector(BaseDetector):
         max_digit: maximum digit that a number can take
 
     """
+
     @staticmethod
     def get_supported_languages():
         """
@@ -136,7 +138,7 @@ def detect_entity(self, text, **kwargs):
         for number_value_dict, original_text in zip(number_data[0], number_data[1]):
             number_value = number_value_dict[NUMBER_DETECTION_RETURN_DICT_VALUE]
             number_unit = number_value_dict[NUMBER_DETECTION_RETURN_DICT_UNIT]
-            if self.min_digit <= len(number_value) <= self.max_digit:
+            if self.min_digit <= self._num_digits(number_value) <= self.max_digit:
                 if self.unit_type and (number_unit is None or
                                        self.language_number_detector.units_map[number_unit].type != self.unit_type):
                     continue
@@ -165,3 +167,20 @@ def set_min_max_digits(self, min_digit, max_digit):
         """
         self.min_digit = min_digit
         self.max_digit = max_digit
+
+    @staticmethod
+    def _num_digits(value):
+        """
+        Calculate the number of digits in given number
+
+        Args:
+            value (str or float or int):
+
+        Returns:
+            int: number of digits in given number
+
+        Raises:
+            ValueError: if the given string cannot be cast to float
+        """
+        v = abs(float(value))
+        return 1 if int(v) == 0 else (1 + int(math.log10(v)))

From 07953e34b11c9e588712323cf899ad01abe6ddd8 Mon Sep 17 00:00:00 2001
From: chiragjn <jain.chirag925@gmail.com>
Date: Fri, 26 Apr 2019 15:24:33 +0530
Subject: [PATCH 09/12] Fetch value only in source_ and add deprecation
 warnings

---
 datastore/constants.py            |  1 +
 datastore/datastore.py            |  8 +++
 datastore/elastic_search/query.py | 95 +++++++++++++++++++------------
 3 files changed, 68 insertions(+), 36 deletions(-)

diff --git a/datastore/constants.py b/datastore/constants.py
index 41e656c36..f7f76da19 100644
--- a/datastore/constants.py
+++ b/datastore/constants.py
@@ -7,6 +7,7 @@
 ELASTICSEARCH = 'elasticsearch'
 ELASTICSEARCH_SEARCH_SIZE = ES_SEARCH_SIZE
 ELASTICSEARCH_BULK_HELPER_MESSAGE_SIZE = ES_BULK_MSG_SIZE
+ELASTICSEARCH_VALUES_SEARCH_SIZE = 300000
 
 # settings dictionary key constants
 ENGINE = 'engine'
diff --git a/datastore/datastore.py b/datastore/datastore.py
index 7799f383b..6f5680400 100644
--- a/datastore/datastore.py
+++ b/datastore/datastore.py
@@ -1,3 +1,5 @@
+import warnings
+
 import elastic_search
 from chatbot_ner.config import ner_logger, CHATBOT_NER_DATASTORE
 from lib.singleton import Singleton
@@ -120,6 +122,7 @@ def create(self, **kwargs):
                     **kwargs
                 )
 
+    # FIXME: repopulate does not consider language of the variants
     def populate(self, entity_data_directory_path=DEFAULT_ENTITY_DATA_DIRECTORY, csv_file_paths=None, **kwargs):
         """
         Populates the datastore from csv files stored in directory path indicated by entity_data_directory_path and
@@ -181,6 +184,7 @@ def delete(self, **kwargs):
                                                ignore=[400, 404],
                                                **kwargs)
 
+    # FIXME: Deprecated, remove
     def get_entity_dictionary(self, entity_name, **kwargs):
         """
         Args:
@@ -214,6 +218,7 @@ def get_entity_dictionary(self, entity_name, **kwargs):
                 ...
                 u'koramangala': [u'koramangala']}
         """
+        warnings.warn("get_entity_dictionary() is deprecated; Please use get_entity_data()", DeprecationWarning)
         if self._client_or_connection is None:
             self._connect()
         results_dictionary = {}
@@ -308,6 +313,7 @@ def delete_entity(self, entity_name, **kwargs):
                                                           ignore=[400, 404],
                                                           **kwargs)
 
+    # FIXME: repopulate does not consider language of the variants
     def repopulate(self, entity_data_directory_path=DEFAULT_ENTITY_DATA_DIRECTORY, csv_file_paths=None, **kwargs):
         """
         Deletes the existing data and repopulates it for entities from csv files stored in directory path indicated by
@@ -378,6 +384,7 @@ def exists(self):
 
         return False
 
+    # FIXME: Deprecated, remove
     def update_entity_data(self, entity_name, entity_data, language_script, **kwargs):
         """
         This method is used to populate the the entity dictionary
@@ -389,6 +396,7 @@ def update_entity_data(self, entity_name, entity_data, language_script, **kwargs
                 For Elasticsearch:
                 Refer http://elasticsearch-py.readthedocs.io/en/master/helpers.html#elasticsearch.helpers.bulk
         """
+        warnings.warn("update_entity_data() is deprecated; Please use add_entity_data()", DeprecationWarning)
         if self._client_or_connection is None:
             self._connect()
 
diff --git a/datastore/elastic_search/query.py b/datastore/elastic_search/query.py
index c26569e5d..da49d982c 100644
--- a/datastore/elastic_search/query.py
+++ b/datastore/elastic_search/query.py
@@ -1,21 +1,24 @@
 from __future__ import absolute_import
 
+import collections
 # std imports
 import copy
-from six import string_types
+import json
 import re
-import collections
+import warnings
+
+from six import string_types
 
 # Local imports
 from datastore import constants
 from external_api.constants import SENTENCE_LIST, ENTITY_LIST
 from language_utilities.constant import ENGLISH_LANG
 from lib.nlp.const import TOKENIZER
-import json
 
 log_prefix = 'datastore.elastic_search.query'
 
 
+# Deprecated
 def dictionary_query(connection, index_name, doc_type, entity_name, **kwargs):
     """
     Get all variants data for a entity stored in the index as a dictionary
@@ -32,6 +35,7 @@ def dictionary_query(connection, index_name, doc_type, entity_name, **kwargs):
         dictionary, search results of the 'term' query on entity_name, mapping keys to lists containing
         synonyms/variants of the key
     """
+    warnings.warn("dictionary_query() is deprecated; Please use get_entity_data()", DeprecationWarning)
     results_dictionary = {}
     data = {
         'query': {
@@ -197,7 +201,7 @@ def get_entity_unique_values(connection, index_name, doc_type, entity_name, valu
             "unique_values": {
                 "terms": {
                     "field": "value.keyword",
-                    "size": 300000
+                    "size": constants.ELASTICSEARCH_VALUES_SEARCH_SIZE,
                 }
             }
         },
@@ -283,12 +287,14 @@ def full_text_query(connection, index_name, doc_type, entity_name, sentences, fu
          u'mumbai': u'mumbai',
          u'pune': u'pune'}
     """
-    index = {'index': index_name, 'type': doc_type}
+    index_header = json.dumps({'index': index_name, 'type': doc_type})
     data = []
-    for sentence_ in sentences:
-        query = _generate_es_search_dictionary(entity_name, sentence_, fuzziness_threshold,
+    for sentence in sentences:
+        query = _generate_es_search_dictionary(entity_name=entity_name,
+                                               text=sentence,
+                                               fuzziness_threshold=fuzziness_threshold,
                                                language_script=search_language_script)
-        data.extend([json.dumps(index), json.dumps(query)])
+        data.extend([index_header, json.dumps(query)])
     data = '\n'.join(data)
 
     kwargs = dict(kwargs, body=data, doc_type=doc_type, index=index_name)
@@ -359,17 +365,25 @@ def _get_dynamic_fuzziness_threshold(fuzzy_setting):
     return fuzzy_setting
 
 
-def _generate_es_search_dictionary(entity_name, text, fuzziness_threshold, language_script=None):
+def _generate_es_search_dictionary(entity_name, text,
+                                   fuzziness_threshold=1,
+                                   language_script=ENGLISH_LANG,
+                                   size=constants.ELASTICSEARCH_SEARCH_SIZE,
+                                   as_json=False):
     """
     Generates compound elasticsearch boolean search query dictionary for the sentence. The query generated
     searches for entity_name in the index and returns search results for the matched word (of sentence)
      only if entity_name is found.
 
     Args:
-        entity_name: name of the entity to perform a 'term' query on
-        text: The text on which we need to identify the enitites.
-        fuzziness_threshold: fuzziness_threshold for elasticsearch match query 'fuzziness' parameter
-        language_script: language of documents to be searched, optional, defaults to None
+        entity_name (str): name of the entity to perform a 'term' query on
+        text (str): The text on which we need to identify the enitites.
+        fuzziness_threshold (int, optional): fuzziness_threshold for elasticsearch match query 'fuzziness' parameter.
+            Defaults to 1
+        language_script (str, optional): language of documents to be searched, optional, defaults to 'en'
+        size (int, optional): number of records to return, defaults to `ELASTICSEARCH_SEARCH_SIZE`
+        as_json (bool, optional): Return the generated query as json string. useful for debug purposes.
+            Defaults to False
 
     Returns:
         dictionary, the search query for the text
@@ -386,24 +400,18 @@ def _generate_es_search_dictionary(entity_name, text, fuzziness_threshold, langu
     must_terms.append(term_dict_entity_name)
 
     # search on language_script, add english as default search
-    if language_script is not None:
-        term_dict_language = {
-            'terms': {
-                'language_script': [language_script, ENGLISH_LANG]
-            }
+    term_dict_language = {
+        'terms': {
+            'language_script': [ENGLISH_LANG]
         }
-        must_terms.append(term_dict_language)
-
-    data = {
-        'query': {
-            'bool': {
-                'must': must_terms,
-                'should': [],
-                'minimum_should_match': 1
-            }
-        }, 'size': constants.ELASTICSEARCH_SEARCH_SIZE
     }
-    query_should_data = []
+
+    if language_script != ENGLISH_LANG:
+        term_dict_language['terms']['language_script'].append(language_script)
+
+    must_terms.append(term_dict_language)
+
+    should_terms = []
     query = {
         'match': {
             'variants': {
@@ -413,15 +421,30 @@ def _generate_es_search_dictionary(entity_name, text, fuzziness_threshold, langu
             }
         }
     }
-    query_should_data.append(query)
-    data['query']['bool']['should'] = query_should_data
-    data['highlight'] = {
-        'fields': {
-            'variants': {}
+    should_terms.append(query)
+
+    data = {
+        '_source': ['value'],
+        'query': {
+            'bool': {
+                'must': must_terms,
+                'should': should_terms,
+                'minimum_should_match': 1
+            },
+            'highlight': {
+                'fields': {
+                    'variants': {}
+                },
+                'order': 'score',
+                'number_of_fragments': 20
+            }
         },
-        'order': 'score',
-        'number_of_fragments': 20
+        'size': size
     }
+
+    if as_json:
+        data = json.dumps(data)
+
     return data
 
 

From c9592b13b1f17b07efdcbe9a5de31f3b35b98f92 Mon Sep 17 00:00:00 2001
From: chiragjn <jain.chirag925@gmail.com>
Date: Fri, 26 Apr 2019 15:30:18 +0530
Subject: [PATCH 10/12] Fix lint errors

---
 datastore/datastore.py               | 13 ++++++++-----
 datastore/elastic_search/__init__.py |  2 +-
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/datastore/datastore.py b/datastore/datastore.py
index 6f5680400..33e7f59d3 100644
--- a/datastore/datastore.py
+++ b/datastore/datastore.py
@@ -1,12 +1,15 @@
+from __future__ import absolute_import
+
 import warnings
 
-import elastic_search
 from chatbot_ner.config import ner_logger, CHATBOT_NER_DATASTORE
+from datastore import elastic_search
+from datastore.constants import (ELASTICSEARCH, ENGINE, ELASTICSEARCH_INDEX_NAME, DEFAULT_ENTITY_DATA_DIRECTORY,
+                                 ELASTICSEARCH_DOC_TYPE, ELASTICSEARCH_CRF_DATA_INDEX_NAME,
+                                 ELASTICSEARCH_CRF_DATA_DOC_TYPE)
+from datastore.exceptions import (DataStoreSettingsImproperlyConfiguredException, EngineNotImplementedException,
+                                  EngineConnectionException, NonESEngineTransferException, IndexNotFoundException)
 from lib.singleton import Singleton
-from .constants import (ELASTICSEARCH, ENGINE, ELASTICSEARCH_INDEX_NAME, DEFAULT_ENTITY_DATA_DIRECTORY,
-                        ELASTICSEARCH_DOC_TYPE, ELASTICSEARCH_CRF_DATA_INDEX_NAME, ELASTICSEARCH_CRF_DATA_DOC_TYPE)
-from .exceptions import (DataStoreSettingsImproperlyConfiguredException, EngineNotImplementedException,
-                         EngineConnectionException, NonESEngineTransferException, IndexNotFoundException)
 
 
 class DataStore(object):
diff --git a/datastore/elastic_search/__init__.py b/datastore/elastic_search/__init__.py
index 20b6d27e0..34654dbfb 100644
--- a/datastore/elastic_search/__init__.py
+++ b/datastore/elastic_search/__init__.py
@@ -2,4 +2,4 @@
 import create
 import populate
 import query
-import transfer
\ No newline at end of file
+import transfer

From 243f621e4025bbaf1715bf54a7da22fd7306edde Mon Sep 17 00:00:00 2001
From: chiragjn <jain.chirag925@gmail.com>
Date: Fri, 26 Apr 2019 15:41:27 +0530
Subject: [PATCH 11/12] Add highlight at the correct level in ES query

---
 datastore/elastic_search/query.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/datastore/elastic_search/query.py b/datastore/elastic_search/query.py
index da49d982c..aaf69bb8d 100644
--- a/datastore/elastic_search/query.py
+++ b/datastore/elastic_search/query.py
@@ -294,7 +294,8 @@ def full_text_query(connection, index_name, doc_type, entity_name, sentences, fu
                                                text=sentence,
                                                fuzziness_threshold=fuzziness_threshold,
                                                language_script=search_language_script)
-        data.extend([index_header, json.dumps(query)])
+        data.append(index_header)
+        data.append(json.dumps(query))
     data = '\n'.join(data)
 
     kwargs = dict(kwargs, body=data, doc_type=doc_type, index=index_name)
@@ -431,13 +432,13 @@ def _generate_es_search_dictionary(entity_name, text,
                 'should': should_terms,
                 'minimum_should_match': 1
             },
-            'highlight': {
-                'fields': {
-                    'variants': {}
-                },
-                'order': 'score',
-                'number_of_fragments': 20
-            }
+        },
+        'highlight': {
+            'fields': {
+                'variants': {}
+            },
+            'order': 'score',
+            'number_of_fragments': 20
         },
         'size': size
     }

From e4a4dae519532d85fe8349d5f08bbbfa5f796603 Mon Sep 17 00:00:00 2001
From: chiragjn <jain.chirag925@gmail.com>
Date: Fri, 26 Apr 2019 16:29:32 +0530
Subject: [PATCH 12/12] Switch to unified highlighter for faster search on
 larger documents

---
 datastore/elastic_search/query.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/datastore/elastic_search/query.py b/datastore/elastic_search/query.py
index aaf69bb8d..b6cbbac14 100644
--- a/datastore/elastic_search/query.py
+++ b/datastore/elastic_search/query.py
@@ -435,7 +435,9 @@ def _generate_es_search_dictionary(entity_name, text,
         },
         'highlight': {
             'fields': {
-                'variants': {}
+                'variants': {
+                    'type': 'unified'  # experimental in 5.x, default in 6.x and 7.x. Faster than 'plain'
+                }
             },
             'order': 'score',
             'number_of_fragments': 20