From 297eb9baea57e85e99d64ec2f0843ca863759122 Mon Sep 17 00:00:00 2001
From: harjinder7 <harjinder7@haptik.ai>
Date: Tue, 8 Nov 2022 05:46:30 +0000
Subject: [PATCH] removed the logs and cleaned the code

---
 language_utilities/constant.py                |  2 +-
 ner_v2/api.py                                 |  5 +-
 ner_v2/detectors/base_detector.py             | 12 ----
 .../numeral/number/number_detection.py        |  7 +--
 .../number/standard_number_detector.py        |  1 +
 .../numeral/number/zh-TW/number_detection.py  | 30 ++++------
 .../phone_number/phone_number_detection.py    | 59 ++++++-------------
 7 files changed, 34 insertions(+), 82 deletions(-)

diff --git a/language_utilities/constant.py b/language_utilities/constant.py
index e1fda55a..5e91a061 100644
--- a/language_utilities/constant.py
+++ b/language_utilities/constant.py
@@ -30,7 +30,7 @@
 PORTUGUESE_LANG = 'pt'
 TURKISH_LANG = 'tr'
 
-CHINESE_LANG = 'zh-TW'
+CHINESE_TRADITIONAL_LANG = 'zh-TW'
 
 # language translation status
 TRANSLATED_TEXT = 'translated_text'
diff --git a/ner_v2/api.py b/ner_v2/api.py
index bfb0e0f7..f80485f5 100644
--- a/ner_v2/api.py
+++ b/ner_v2/api.py
@@ -10,7 +10,7 @@
 
 from chatbot_ner.config import ner_logger
 from datastore.exceptions import DataStoreRequestException
-from language_utilities.constant import ENGLISH_LANG
+from language_utilities.constant import ENGLISH_LANG, CHINESE_TRADITIONAL_LANG
 from ner_constants import PARAMETER_MESSAGE, PARAMETER_ENTITY_NAME, PARAMETER_STRUCTURED_VALUE, \
     PARAMETER_FALLBACK_VALUE, \
     PARAMETER_BOT_MESSAGE, PARAMETER_TIMEZONE, PARAMETER_LANGUAGE_SCRIPT, PARAMETER_SOURCE_LANGUAGE, \
@@ -634,7 +634,7 @@ def phone_number(request):
         ner_logger.debug('Entity Name %s' % entity_name)
         ner_logger.debug('Source Language %s' % language)
 
-        if language == 'zh-TW':
+        if language == CHINESE_TRADITIONAL_LANG:
             phone_number_detection = ChinesePhoneDetector(entity_name=entity_name, language=language,
                                                            locale=parameters_dict[PARAMETER_LOCALE])
         else:
@@ -644,7 +644,6 @@ def phone_number(request):
 
         ner_logger.debug(parameters_dict)
         if isinstance(message, six.string_types):
-            ner_logger.debug(f'++ API msg : {message}')
             entity_output = phone_number_detection.detect(message=message,
                                                           structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE],
                                                           fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE],
diff --git a/ner_v2/detectors/base_detector.py b/ner_v2/detectors/base_detector.py
index 7bd5ae8b..8fc9c6ba 100644
--- a/ner_v2/detectors/base_detector.py
+++ b/ner_v2/detectors/base_detector.py
@@ -10,7 +10,6 @@
 from ner_constants import (FROM_STRUCTURE_VALUE_VERIFIED, FROM_STRUCTURE_VALUE_NOT_VERIFIED, FROM_MESSAGE,
                            FROM_FALLBACK_VALUE, ORIGINAL_TEXT, ENTITY_VALUE, DETECTION_METHOD,
                            DETECTION_LANGUAGE, ENTITY_VALUE_DICT_KEY)
-from chatbot_ner.config import ner_logger
 
 class BaseDetector(object):
     """
@@ -32,9 +31,7 @@ def __init__(self, language=ENGLISH_LANG, translation_enabled=False):
              language (str): ISO 639 language code of language of original query
              translation_enabled (bool): Decides to either enable or disable translation API
         """
-        ner_logger.debug(f'-= BASE : {language}')
         self._language = language
-        ner_logger.debug(f'-= PHONE : {self._language}')
         self._processing_language = ENGLISH_LANG
         self._translation_enabled = translation_enabled
         self._set_language_processing_script()
@@ -59,7 +56,6 @@ def detect_entity(self, text, **kwargs):
             tuple: Two lists of same length containing detected values and original substring from text which is used
             to derive the detected value respectively
         """
-        ner_logger.debug(f'>>> base detector detect entity')
         return [], []
 
     def _set_language_processing_script(self):
@@ -67,7 +63,6 @@ def _set_language_processing_script(self):
         This method is used to decide the language in which detector should run it's logic based on
         supported language and query language for which subclass is initialized
         """
-        ner_logger.debug(f'-+-+ {self._language} , {self.supported_languages}')
         if self._language in self.supported_languages:
             self._processing_language = self._language
         elif ENGLISH_LANG in self.supported_languages and self._translation_enabled:
@@ -135,8 +130,6 @@ def detect(self, message=None, structured_value=None, fallback_value=None, **kwa
                     >> [{'detection': 'message', 'original_text': 'inferno', 'entity_value': {'value': u'Inferno'}}]
 
         """
-
-        ner_logger.debug(f'==== M :{message}')
         if self._language != self._processing_language and self._translation_enabled:
             if structured_value:
                 translation_output = translate_text(structured_value, self._language,
@@ -148,23 +141,18 @@ def detect(self, message=None, structured_value=None, fallback_value=None, **kwa
                 message = translation_output[TRANSLATED_TEXT] if translation_output['status'] else None
 
         text = structured_value if structured_value else message
-        ner_logger.debug(f'==== M :{message}')
         entity_list, original_text_list = self.detect_entity(text=text, **kwargs)
         if structured_value:
-            ner_logger.debug(f'structured ==== {entity_list}, {original_text_list}')
             if entity_list:
                 value, method, original_text = entity_list, FROM_STRUCTURE_VALUE_VERIFIED, original_text_list
             else:
                 value, method, original_text = [structured_value], FROM_STRUCTURE_VALUE_NOT_VERIFIED, \
                                                [structured_value]
         elif entity_list:
-            ner_logger.debug(f'entity list ==== {entity_list}, {original_text_list}')
             value, method, original_text = entity_list, FROM_MESSAGE, original_text_list
         elif fallback_value:
-            ner_logger.debug(f'fallback value ==== {entity_list}, {original_text_list}')
             value, method, original_text = [fallback_value], FROM_FALLBACK_VALUE, [fallback_value]
         else:
-            ner_logger.debug(f'None ==== {entity_list}, {original_text_list}')
             return None
 
         return self.output_entity_dict_list(entity_value_list=value, original_text_list=original_text,
diff --git a/ner_v2/detectors/numeral/number/number_detection.py b/ner_v2/detectors/numeral/number/number_detection.py
index bc3b2eb7..ee97925d 100644
--- a/ner_v2/detectors/numeral/number/number_detection.py
+++ b/ner_v2/detectors/numeral/number/number_detection.py
@@ -23,8 +23,6 @@
 from ner_v2.detectors.numeral.constant import NUMBER_DETECTION_RETURN_DICT_VALUE, NUMBER_DETECTION_RETURN_DICT_UNIT
 from ner_v2.detectors.utils import get_lang_data_path
 
-from chatbot_ner.config import ner_logger
-
 COMMON_NON_NUMERIC_PUNCTUATIONS = re.escape('!"#%&\'()*/;<=>?@[\\]^_`{|}~।')
 
 
@@ -113,12 +111,10 @@ def __init__(self, entity_name, language=ENGLISH_LANG, unit_type=None, detect_wi
         self.detect_without_unit = detect_without_unit
         self.punctuations_to_filter = re.compile(f'[{COMMON_NON_NUMERIC_PUNCTUATIONS}]')
         try:
-            ner_logger.debug(f'MODEL LOADING FOR : {self.language}')
             number_detector_module = importlib.import_module(
                 'ner_v2.detectors.numeral.number.{0}.number_detection'.format(self.language))
             self.language_number_detector = number_detector_module.NumberDetector(entity_name=self.entity_name,
                                                                                   unit_type=self.unit_type)
-            ner_logger.debug(f'MODEL LOADED FOR : {self.language}')
         except ImportError:
             standard_number_regex = importlib.import_module(
                 'ner_v2.detectors.numeral.number.standard_number_detector'
@@ -131,6 +127,9 @@ def __init__(self, entity_name, language=ENGLISH_LANG, unit_type=None, detect_wi
             )
 
     def get_language_number_detector(self):
+        """
+        To get the language number detector being used by current Number detector
+        """
         return self.language_number_detector
 
     @property
diff --git a/ner_v2/detectors/numeral/number/standard_number_detector.py b/ner_v2/detectors/numeral/number/standard_number_detector.py
index 4dae7f91..32fc1774 100644
--- a/ner_v2/detectors/numeral/number/standard_number_detector.py
+++ b/ner_v2/detectors/numeral/number/standard_number_detector.py
@@ -318,6 +318,7 @@ def _detect_number_from_digit(self, number_list=None, original_list=None):
         start_span = 0
         end_span = -1
         spanned_text = self.processed_text
+
         regex_numeric_patterns = re.compile(r'(([\d,]+\.?[\d]*)\s?(' + self.scale_map_choices + r'))[\s\-\:]' +
                                             r'|([\d,]+\.?[\d]*)', re.UNICODE)
         patterns = regex_numeric_patterns.findall(processed_text)
diff --git a/ner_v2/detectors/numeral/number/zh-TW/number_detection.py b/ner_v2/detectors/numeral/number/zh-TW/number_detection.py
index 4ab4a6a3..a1404833 100644
--- a/ner_v2/detectors/numeral/number/zh-TW/number_detection.py
+++ b/ner_v2/detectors/numeral/number/zh-TW/number_detection.py
@@ -15,18 +15,6 @@
     NUMBER_DETECTION_RETURN_DICT_UNIT, NUMBER_DETECTION_RETURN_DICT_VALUE
 from ner_v2.detectors.numeral.number.standard_number_detector import BaseNumberDetector
 
-from chatbot_ner.config import ner_logger
-
-
-"""
-mapping some special character for chinese (traditional)
-use to replace in text string
-"""
-special_chars_mapping = {
-    ',' : '、', # comma character
-    '.' : '點' #dian ( period )
-}
-
 
 class NumberDetector(BaseNumberDetector):
     """
@@ -51,7 +39,11 @@ def __init__(self, entity_name='number', unit_type=None):
         self.detector_preferences = [
             self._detect_number_from_text
         ]
-        ner_logger.debug(f'-=-= CHINESE NUMBER DETECTOR')
+
+        self.special_chars_mapping = {
+            ',': '、', # comma character
+            '.': '點' #dian ( period )
+        }
 
     def _get_base_map_choices(self, base_map):
         number_set = set()
@@ -86,7 +78,7 @@ def _have_digits_only(self, text=None, scale_map=None):
     
     def replace_special_chars(self, text=None):
         text = text or ''
-        for _char, _native_char in special_chars_mapping.items():
+        for _char, _native_char in self.special_chars_mapping.items():
             text = text.replace(_native_char, _char)
         return text
             
@@ -109,13 +101,13 @@ def _detect_number_from_text(self, number_list=None, original_list=None):
         
         rgx_pattern = r'([{}]+)({}?([{}]*))'.format(
             self.base_numbers_map_full,
-            special_chars_mapping.get('.', '\.'),
+            self.special_chars_mapping.get('.', '\.'),
             self.base_numbers_map_full
         )
         regex_digit_patterns = re.compile(rgx_pattern)
         patterns = regex_digit_patterns.findall(self.processed_text)
         for pattern in patterns:
-            full_number = number, after_decimal, original_text =  None, None, None
+            full_number, number, original_text =  None, None, None
             if pattern[0].strip():
                 original_text = pattern[0].strip()
                 span = re.search(original_text, spanned_text).span()
@@ -129,7 +121,6 @@ def _detect_number_from_text(self, number_list=None, original_list=None):
                     
                 if number.isnumeric():
                     full_number = number
-            
 
             if full_number:
                 _pattern = re.compile(re.escape(original_text), flags=_re_flags)
@@ -144,9 +135,8 @@ def _detect_number_from_text(self, number_list=None, original_list=None):
         return number_list, original_list
     
     def extract_digits_only(self, text, with_scale=False):
-        ner_logger.debug(f'++++ extracting')
         text = text or ''
-        rgx_pattern = r'[\s-.+{}]+'
+        rgx_pattern = r'[-,.+\s{}]+'
         if not with_scale:
             rgx_pattern = re.compile(rgx_pattern.format(self.base_numbers_map_choices))
         else:
@@ -158,5 +148,5 @@ def get_number_digit_by_digit(self, text=''):
         
     def get_number_with_digit_scaling(self, text=''):
         # change the below logic to work with scaling
-        return ''
+        return text
         
\ No newline at end of file
diff --git a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py
index ba8f90af..b26aa0c5 100644
--- a/ner_v2/detectors/pattern/phone_number/phone_number_detection.py
+++ b/ner_v2/detectors/pattern/phone_number/phone_number_detection.py
@@ -13,12 +13,10 @@
 import phonenumbers
 from six.moves import zip
 
-from language_utilities.constant import ENGLISH_LANG, CHINESE_LANG
+from language_utilities.constant import ENGLISH_LANG, CHINESE_TRADITIONAL_LANG
 from ner_v2.detectors.base_detector import BaseDetector
 from ner_v2.detectors.numeral.number.number_detection import NumberDetector
 
-from chatbot_ner.config import ner_logger
-
 
 class PhoneDetector(BaseDetector):
     """
@@ -40,10 +38,8 @@ def __init__(self, entity_name, language=ENGLISH_LANG, locale=None):
             locale(str, optional): locale of the country from which you are dialing. Ex: 'en-IN'
         """
         self._supported_languages = NumberDetector.get_supported_languages()
-        ner_logger.debug(f'-= PHONE : {language}')
         super(PhoneDetector, self).__init__(language, locale)
         self.language = language
-        ner_logger.debug(f'-= PHONE : {self.language}')
         self.locale = locale or 'en-IN'
         if _regex_available:
             # This will replace all types of dashes(em or en) by hyphen.
@@ -105,7 +101,6 @@ def detect_entity(self, text, **kwargs):
         """
         self.text = " " + text.lower().strip() + " "
         self.phone, self.original_phone_text = [], []
-        ner_logger.debug(f'### PH :{self.text} {self.country_code}')
         for match in phonenumbers.PhoneNumberMatcher(self.text, self.country_code, leniency=0):
             if match.number.country_code == phonenumbers.country_code_for_region(self.country_code):
                 self.phone.append(self.check_for_country_code(str(match.number.national_number)))
@@ -115,7 +110,6 @@ def detect_entity(self, text, **kwargs):
                 self.phone.append({"country_calling_code": str(match.number.country_code),
                                    "value": str(match.number.national_number)})
                 self.original_phone_text.append(self.text[match.start:match.end])
-            ner_logger.info(f'### {self.phone} {self.original_phone_text}')
         self.phone, self.original_phone_text = self.check_for_alphas()
         return self.phone, self.original_phone_text
 
@@ -164,7 +158,7 @@ class ChinesePhoneDetector(PhoneDetector):
     This method is used to detect phone numbers present in chinese text.
     """
 
-    def __init__(self, entity_name, language=CHINESE_LANG, locale=None):
+    def __init__(self, entity_name, language=CHINESE_TRADITIONAL_LANG, locale=None):
         """
         Args:
             entity_name (str): A string by which the detected numbers would be replaced with
@@ -173,20 +167,7 @@ def __init__(self, entity_name, language=CHINESE_LANG, locale=None):
             locale(str, optional): locale of the country from which you are dialing. Ex: 'en-IN'
         """
         self._supported_languages = NumberDetector.get_supported_languages()
-        ner_logger.debug(f'-= CHINESE : {language}')
         super(ChinesePhoneDetector, self).__init__(entity_name, language, locale)
-        self.language = language
-        ner_logger.debug(f'-= CHINESE : {self.language}')
-        self.locale = locale or CHINESE_LANG
-        if _regex_available:
-            # This will replace all types of dashes(em or en) by hyphen.
-            self.locale = regex.sub('\\p{Pd}', '-', self.locale)
-
-        self.text = ''
-        self.phone, self.original_phone_text = [], []
-        self.country_code = self.get_country_code_from_locale()
-        self.entity_name = entity_name
-        self.tag = '__' + self.entity_name + '__'
         
         # Using Chinese number detector here
         self.number_detector = NumberDetector(self.entity_name, language=self.language)
@@ -201,31 +182,25 @@ def _text_list_for_detection(self, text=None):
         return : list[string]
         """
         text = text or ''
-        ner_logger.debug(f'<<< Sanitizeing text : {text}')
         matches = self.language_number_detector.extract_digits_only(text)
         return matches
 
     def detect_entity(self, text, **kwargs):
-        ner_logger.debug(f'<<< chinese phone number detect entity')
-        
+        """
+        This is to detect phone numbers from text by mapping chinese digits to numeric values
+        """
         number_matches = self._text_list_for_detection(text)
         self.phone, self.original_phone_text = [], []
-        try:
-            for _text in number_matches:
-                original_text = " " + _text.lower().strip() + " "
-                sanitized_text = self.language_number_detector.get_number_digit_by_digit(original_text)
-
-                ner_logger.debug(f'### PH : {sanitized_text} {self.country_code} {original_text}')
-                for match in phonenumbers.PhoneNumberMatcher(sanitized_text, self.country_code, leniency=0):
-                    if match.number.country_code == phonenumbers.country_code_for_region(self.country_code):
-                        self.phone.append(self.check_for_country_code(str(match.number.national_number)))
-                        self.original_phone_text.append(original_text[match.start:match.end])
-                    else:
-                        # This means our detector has detected some other country code.
-                        self.phone.append({"country_calling_code": str(match.number.country_code),
-                                        "value": str(match.number.national_number)})
-                        self.original_phone_text.append(original_text[match.start:match.end])
-        except Exception as exp:
-            ner_logger.error(f'Exception in detect_entity for ChinesePhoneDetector, {str(exp)}')
-        ner_logger.debug(f'==== {self.phone}, {self.original_phone_text}')
+        for _text in number_matches:
+            original_text = " " + _text.lower().strip() + " "
+            sanitized_text = self.language_number_detector.get_number_digit_by_digit(original_text)
+            for match in phonenumbers.PhoneNumberMatcher(sanitized_text, self.country_code, leniency=0):
+                if match.number.country_code == phonenumbers.country_code_for_region(self.country_code):
+                    self.phone.append(self.check_for_country_code(str(match.number.national_number)))
+                    self.original_phone_text.append(original_text[match.start:match.end])
+                else:
+                    # This means our detector has detected some other country code.
+                    self.phone.append({"country_calling_code": str(match.number.country_code),
+                                    "value": str(match.number.national_number)})
+                    self.original_phone_text.append(original_text[match.start:match.end])
         return self.phone, self.original_phone_text
\ No newline at end of file