Skip to content

Commit

Permalink
removed the logs and cleaned the code
Browse files Browse the repository at this point in the history
  • Loading branch information
harjinder7 committed Nov 8, 2022
1 parent 6340fe0 commit 297eb9b
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 82 deletions.
2 changes: 1 addition & 1 deletion language_utilities/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
PORTUGUESE_LANG = 'pt'
TURKISH_LANG = 'tr'

CHINESE_LANG = 'zh-TW'
CHINESE_TRADITIONAL_LANG = 'zh-TW'

# language translation status
TRANSLATED_TEXT = 'translated_text'
5 changes: 2 additions & 3 deletions ner_v2/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from chatbot_ner.config import ner_logger
from datastore.exceptions import DataStoreRequestException
from language_utilities.constant import ENGLISH_LANG
from language_utilities.constant import ENGLISH_LANG, CHINESE_TRADITIONAL_LANG
from ner_constants import PARAMETER_MESSAGE, PARAMETER_ENTITY_NAME, PARAMETER_STRUCTURED_VALUE, \
PARAMETER_FALLBACK_VALUE, \
PARAMETER_BOT_MESSAGE, PARAMETER_TIMEZONE, PARAMETER_LANGUAGE_SCRIPT, PARAMETER_SOURCE_LANGUAGE, \
Expand Down Expand Up @@ -634,7 +634,7 @@ def phone_number(request):
ner_logger.debug('Entity Name %s' % entity_name)
ner_logger.debug('Source Language %s' % language)

if language == 'zh-TW':
if language == CHINESE_TRADITIONAL_LANG:
phone_number_detection = ChinesePhoneDetector(entity_name=entity_name, language=language,
locale=parameters_dict[PARAMETER_LOCALE])
else:
Expand All @@ -644,7 +644,6 @@ def phone_number(request):

ner_logger.debug(parameters_dict)
if isinstance(message, six.string_types):
ner_logger.debug(f'++ API msg : {message}')
entity_output = phone_number_detection.detect(message=message,
structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE],
fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE],
Expand Down
12 changes: 0 additions & 12 deletions ner_v2/detectors/base_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from ner_constants import (FROM_STRUCTURE_VALUE_VERIFIED, FROM_STRUCTURE_VALUE_NOT_VERIFIED, FROM_MESSAGE,
FROM_FALLBACK_VALUE, ORIGINAL_TEXT, ENTITY_VALUE, DETECTION_METHOD,
DETECTION_LANGUAGE, ENTITY_VALUE_DICT_KEY)
from chatbot_ner.config import ner_logger

class BaseDetector(object):
"""
Expand All @@ -32,9 +31,7 @@ def __init__(self, language=ENGLISH_LANG, translation_enabled=False):
language (str): ISO 639 language code of language of original query
translation_enabled (bool): Decides to either enable or disable translation API
"""
ner_logger.debug(f'-= BASE : {language}')
self._language = language
ner_logger.debug(f'-= PHONE : {self._language}')
self._processing_language = ENGLISH_LANG
self._translation_enabled = translation_enabled
self._set_language_processing_script()
Expand All @@ -59,15 +56,13 @@ def detect_entity(self, text, **kwargs):
tuple: Two lists of same length containing detected values and original substring from text which is used
to derive the detected value respectively
"""
ner_logger.debug(f'>>> base detector detect entity')
return [], []

def _set_language_processing_script(self):
"""
This method is used to decide the language in which detector should run it's logic based on
supported language and query language for which subclass is initialized
"""
ner_logger.debug(f'-+-+ {self._language} , {self.supported_languages}')
if self._language in self.supported_languages:
self._processing_language = self._language
elif ENGLISH_LANG in self.supported_languages and self._translation_enabled:
Expand Down Expand Up @@ -135,8 +130,6 @@ def detect(self, message=None, structured_value=None, fallback_value=None, **kwa
>> [{'detection': 'message', 'original_text': 'inferno', 'entity_value': {'value': u'Inferno'}}]
"""

ner_logger.debug(f'==== M :{message}')
if self._language != self._processing_language and self._translation_enabled:
if structured_value:
translation_output = translate_text(structured_value, self._language,
Expand All @@ -148,23 +141,18 @@ def detect(self, message=None, structured_value=None, fallback_value=None, **kwa
message = translation_output[TRANSLATED_TEXT] if translation_output['status'] else None

text = structured_value if structured_value else message
ner_logger.debug(f'==== M :{message}')
entity_list, original_text_list = self.detect_entity(text=text, **kwargs)
if structured_value:
ner_logger.debug(f'structured ==== {entity_list}, {original_text_list}')
if entity_list:
value, method, original_text = entity_list, FROM_STRUCTURE_VALUE_VERIFIED, original_text_list
else:
value, method, original_text = [structured_value], FROM_STRUCTURE_VALUE_NOT_VERIFIED, \
[structured_value]
elif entity_list:
ner_logger.debug(f'entity list ==== {entity_list}, {original_text_list}')
value, method, original_text = entity_list, FROM_MESSAGE, original_text_list
elif fallback_value:
ner_logger.debug(f'fallback value ==== {entity_list}, {original_text_list}')
value, method, original_text = [fallback_value], FROM_FALLBACK_VALUE, [fallback_value]
else:
ner_logger.debug(f'None ==== {entity_list}, {original_text_list}')
return None

return self.output_entity_dict_list(entity_value_list=value, original_text_list=original_text,
Expand Down
7 changes: 3 additions & 4 deletions ner_v2/detectors/numeral/number/number_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
from ner_v2.detectors.numeral.constant import NUMBER_DETECTION_RETURN_DICT_VALUE, NUMBER_DETECTION_RETURN_DICT_UNIT
from ner_v2.detectors.utils import get_lang_data_path

from chatbot_ner.config import ner_logger

COMMON_NON_NUMERIC_PUNCTUATIONS = re.escape('!"#%&\'()*/;<=>?@[\\]^_`{|}~।')


Expand Down Expand Up @@ -113,12 +111,10 @@ def __init__(self, entity_name, language=ENGLISH_LANG, unit_type=None, detect_wi
self.detect_without_unit = detect_without_unit
self.punctuations_to_filter = re.compile(f'[{COMMON_NON_NUMERIC_PUNCTUATIONS}]')
try:
ner_logger.debug(f'MODEL LOADING FOR : {self.language}')
number_detector_module = importlib.import_module(
'ner_v2.detectors.numeral.number.{0}.number_detection'.format(self.language))
self.language_number_detector = number_detector_module.NumberDetector(entity_name=self.entity_name,
unit_type=self.unit_type)
ner_logger.debug(f'MODEL LOADED FOR : {self.language}')
except ImportError:
standard_number_regex = importlib.import_module(
'ner_v2.detectors.numeral.number.standard_number_detector'
Expand All @@ -131,6 +127,9 @@ def __init__(self, entity_name, language=ENGLISH_LANG, unit_type=None, detect_wi
)

def get_language_number_detector(self):
"""
To get the language number detector being used by current Number detector
"""
return self.language_number_detector

@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ def _detect_number_from_digit(self, number_list=None, original_list=None):
start_span = 0
end_span = -1
spanned_text = self.processed_text

regex_numeric_patterns = re.compile(r'(([\d,]+\.?[\d]*)\s?(' + self.scale_map_choices + r'))[\s\-\:]' +
r'|([\d,]+\.?[\d]*)', re.UNICODE)
patterns = regex_numeric_patterns.findall(processed_text)
Expand Down
30 changes: 10 additions & 20 deletions ner_v2/detectors/numeral/number/zh-TW/number_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,6 @@
NUMBER_DETECTION_RETURN_DICT_UNIT, NUMBER_DETECTION_RETURN_DICT_VALUE
from ner_v2.detectors.numeral.number.standard_number_detector import BaseNumberDetector

from chatbot_ner.config import ner_logger


"""
mapping some special character for chinese (traditional)
use to replace in text string
"""
special_chars_mapping = {
',' : '、', # comma character
'.' : '點' #dian ( period )
}


class NumberDetector(BaseNumberDetector):
"""
Expand All @@ -51,7 +39,11 @@ def __init__(self, entity_name='number', unit_type=None):
self.detector_preferences = [
self._detect_number_from_text
]
ner_logger.debug(f'-=-= CHINESE NUMBER DETECTOR')

self.special_chars_mapping = {
',': '、', # comma character
'.': '點' #dian ( period )
}

def _get_base_map_choices(self, base_map):
number_set = set()
Expand Down Expand Up @@ -86,7 +78,7 @@ def _have_digits_only(self, text=None, scale_map=None):

def replace_special_chars(self, text=None):
text = text or ''
for _char, _native_char in special_chars_mapping.items():
for _char, _native_char in self.special_chars_mapping.items():
text = text.replace(_native_char, _char)
return text

Expand All @@ -109,13 +101,13 @@ def _detect_number_from_text(self, number_list=None, original_list=None):

rgx_pattern = r'([{}]+)({}?([{}]*))'.format(
self.base_numbers_map_full,
special_chars_mapping.get('.', '\.'),
self.special_chars_mapping.get('.', '\.'),
self.base_numbers_map_full
)
regex_digit_patterns = re.compile(rgx_pattern)
patterns = regex_digit_patterns.findall(self.processed_text)
for pattern in patterns:
full_number = number, after_decimal, original_text = None, None, None
full_number, number, original_text = None, None, None
if pattern[0].strip():
original_text = pattern[0].strip()
span = re.search(original_text, spanned_text).span()
Expand All @@ -129,7 +121,6 @@ def _detect_number_from_text(self, number_list=None, original_list=None):

if number.isnumeric():
full_number = number


if full_number:
_pattern = re.compile(re.escape(original_text), flags=_re_flags)
Expand All @@ -144,9 +135,8 @@ def _detect_number_from_text(self, number_list=None, original_list=None):
return number_list, original_list

def extract_digits_only(self, text, with_scale=False):
ner_logger.debug(f'++++ extracting')
text = text or ''
rgx_pattern = r'[\s-.+{}]+'
rgx_pattern = r'[-,.+\s{}]+'
if not with_scale:
rgx_pattern = re.compile(rgx_pattern.format(self.base_numbers_map_choices))
else:
Expand All @@ -158,5 +148,5 @@ def get_number_digit_by_digit(self, text=''):

def get_number_with_digit_scaling(self, text=''):
# change the below logic to work with scaling
return ''
return text

59 changes: 17 additions & 42 deletions ner_v2/detectors/pattern/phone_number/phone_number_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,10 @@
import phonenumbers
from six.moves import zip

from language_utilities.constant import ENGLISH_LANG, CHINESE_LANG
from language_utilities.constant import ENGLISH_LANG, CHINESE_TRADITIONAL_LANG
from ner_v2.detectors.base_detector import BaseDetector
from ner_v2.detectors.numeral.number.number_detection import NumberDetector

from chatbot_ner.config import ner_logger


class PhoneDetector(BaseDetector):
"""
Expand All @@ -40,10 +38,8 @@ def __init__(self, entity_name, language=ENGLISH_LANG, locale=None):
locale(str, optional): locale of the country from which you are dialing. Ex: 'en-IN'
"""
self._supported_languages = NumberDetector.get_supported_languages()
ner_logger.debug(f'-= PHONE : {language}')
super(PhoneDetector, self).__init__(language, locale)
self.language = language
ner_logger.debug(f'-= PHONE : {self.language}')
self.locale = locale or 'en-IN'
if _regex_available:
# This will replace all types of dashes(em or en) by hyphen.
Expand Down Expand Up @@ -105,7 +101,6 @@ def detect_entity(self, text, **kwargs):
"""
self.text = " " + text.lower().strip() + " "
self.phone, self.original_phone_text = [], []
ner_logger.debug(f'### PH :{self.text} {self.country_code}')
for match in phonenumbers.PhoneNumberMatcher(self.text, self.country_code, leniency=0):
if match.number.country_code == phonenumbers.country_code_for_region(self.country_code):
self.phone.append(self.check_for_country_code(str(match.number.national_number)))
Expand All @@ -115,7 +110,6 @@ def detect_entity(self, text, **kwargs):
self.phone.append({"country_calling_code": str(match.number.country_code),
"value": str(match.number.national_number)})
self.original_phone_text.append(self.text[match.start:match.end])
ner_logger.info(f'### {self.phone} {self.original_phone_text}')
self.phone, self.original_phone_text = self.check_for_alphas()
return self.phone, self.original_phone_text

Expand Down Expand Up @@ -164,7 +158,7 @@ class ChinesePhoneDetector(PhoneDetector):
This method is used to detect phone numbers present in chinese text.
"""

def __init__(self, entity_name, language=CHINESE_LANG, locale=None):
def __init__(self, entity_name, language=CHINESE_TRADITIONAL_LANG, locale=None):
"""
Args:
entity_name (str): A string by which the detected numbers would be replaced with
Expand All @@ -173,20 +167,7 @@ def __init__(self, entity_name, language=CHINESE_LANG, locale=None):
locale(str, optional): locale of the country from which you are dialing. Ex: 'en-IN'
"""
self._supported_languages = NumberDetector.get_supported_languages()
ner_logger.debug(f'-= CHINESE : {language}')
super(ChinesePhoneDetector, self).__init__(entity_name, language, locale)
self.language = language
ner_logger.debug(f'-= CHINESE : {self.language}')
self.locale = locale or CHINESE_LANG
if _regex_available:
# This will replace all types of dashes(em or en) by hyphen.
self.locale = regex.sub('\\p{Pd}', '-', self.locale)

self.text = ''
self.phone, self.original_phone_text = [], []
self.country_code = self.get_country_code_from_locale()
self.entity_name = entity_name
self.tag = '__' + self.entity_name + '__'

# Using Chinese number detector here
self.number_detector = NumberDetector(self.entity_name, language=self.language)
Expand All @@ -201,31 +182,25 @@ def _text_list_for_detection(self, text=None):
return : list[string]
"""
text = text or ''
ner_logger.debug(f'<<< Sanitizeing text : {text}')
matches = self.language_number_detector.extract_digits_only(text)
return matches

def detect_entity(self, text, **kwargs):
ner_logger.debug(f'<<< chinese phone number detect entity')

"""
This is to detect phone numbers from text by mapping chinese digits to numeric values
"""
number_matches = self._text_list_for_detection(text)
self.phone, self.original_phone_text = [], []
try:
for _text in number_matches:
original_text = " " + _text.lower().strip() + " "
sanitized_text = self.language_number_detector.get_number_digit_by_digit(original_text)

ner_logger.debug(f'### PH : {sanitized_text} {self.country_code} {original_text}')
for match in phonenumbers.PhoneNumberMatcher(sanitized_text, self.country_code, leniency=0):
if match.number.country_code == phonenumbers.country_code_for_region(self.country_code):
self.phone.append(self.check_for_country_code(str(match.number.national_number)))
self.original_phone_text.append(original_text[match.start:match.end])
else:
# This means our detector has detected some other country code.
self.phone.append({"country_calling_code": str(match.number.country_code),
"value": str(match.number.national_number)})
self.original_phone_text.append(original_text[match.start:match.end])
except Exception as exp:
ner_logger.error(f'Exception in detect_entity for ChinesePhoneDetector, {str(exp)}')
ner_logger.debug(f'==== {self.phone}, {self.original_phone_text}')
for _text in number_matches:
original_text = " " + _text.lower().strip() + " "
sanitized_text = self.language_number_detector.get_number_digit_by_digit(original_text)
for match in phonenumbers.PhoneNumberMatcher(sanitized_text, self.country_code, leniency=0):
if match.number.country_code == phonenumbers.country_code_for_region(self.country_code):
self.phone.append(self.check_for_country_code(str(match.number.national_number)))
self.original_phone_text.append(original_text[match.start:match.end])
else:
# This means our detector has detected some other country code.
self.phone.append({"country_calling_code": str(match.number.country_code),
"value": str(match.number.national_number)})
self.original_phone_text.append(original_text[match.start:match.end])
return self.phone, self.original_phone_text

0 comments on commit 297eb9b

Please sign in to comment.