Skip to content

Commit

Permalink
[COSM] removal of PYTHON3 in utf8.py (#259)
Browse files Browse the repository at this point in the history
* [COSM] removal of python3 version checks in  utf8.py

* Version Checks PYTHON3 has been removed

* Encode2Unicode.py PYTHON3 Version handling is successfully passed all unit tests
  • Loading branch information
HariharanUmapathi authored Nov 9, 2024
1 parent 991dc6f commit 0188003
Show file tree
Hide file tree
Showing 19 changed files with 25 additions and 68 deletions.
4 changes: 0 additions & 4 deletions examples/solpattiyal.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@
import tamil
from transliterate import *

PYTHON3 = sys.version[0] > "2"
if not PYTHON3:
sys.stdout = codecs.getwriter("utf-8")(sys.stdout)


# use generators for better memory footprint -- 04/04/15
class WordFrequency(object):
Expand Down
10 changes: 3 additions & 7 deletions examples/tamilwordgrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@
import codecs
from math import sqrt

PYTHON3 = sys.version > "3"


# Vertical / Horizontal Word Grids
class Solver:
def __init__(self, wordgrid):
Expand Down Expand Up @@ -140,10 +137,9 @@ def precompute(self):
)
self.grid_size = 3 + int(self.max_word_len)
# sort words in order
if PYTHON3:
self.words = sorted(self.words, key=len)
else:
self.words.sort(cmp=WordGrid.sorter)
self.words = sorted(self.words, key=len)
#else:
# self.words.sort(cmp=WordGrid.sorter)
# prepare a random grid of dim [#words x #max-word-length]
# len(self.words)
for itr_r in range(self.grid_size):
Expand Down
2 changes: 1 addition & 1 deletion examples/wordlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import tamil

LINUX = not hasattr(sys, "getwindowsversion")
PYTHON3 = sys.version > "3"



# compute word intersection graph of the a wordlist
Expand Down
3 changes: 0 additions & 3 deletions examples/wordxsec.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@

import tamil

PYTHON3 = version > "3"


# compute word intersection graph of the a wordlist
# optimized for using the symmetry in computation but not space
class WordXSec:
Expand Down
8 changes: 2 additions & 6 deletions solthiruthi/Ezhimai.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@
from . import WordSpeller
from . import resources

PYTHON3 = sys.version > "3"


class PattiyalThiruthi(WordSpeller.ISpeller):
def __init__(self, option):
""" spell checker based on whitelist agarathi """
Expand All @@ -37,9 +34,8 @@ def loadWordFile(filename):
# words will be loaded from the file into the Trie structure
with codecs.open(filename, "r", "utf-8") as fp:
data = map(lambda word: word.strip(), fp.readlines())
if PYTHON3:
return frozenset(data)
return set(data)
return frozenset(data)
#return set(data)


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion solthiruthi/datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from tamil import utf8

PYTHON3 = sys.version[0] == "3"



class Queue(list):
Expand Down
2 changes: 1 addition & 1 deletion solthiruthi/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from . import datastore
from . import resources

PYTHON3 = sys.version[0] == "3"



# specify dictionary interface without specifying storage
Expand Down
3 changes: 0 additions & 3 deletions solthiruthi/heuristics.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@

from tamil import utf8

PYTHON3 = sys.version[0] == "3"


def get_letters(word):
if isinstance(word, list):
chars = word
Expand Down
5 changes: 1 addition & 4 deletions solthiruthi/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@

from tamil import utf8
from . import resources

PYTHON3 = sys.version[0] == "3"
if PYTHON3:
from functools import reduce
from functools import reduce


class NGStats:
Expand Down
2 changes: 1 addition & 1 deletion solthiruthi/solthiruthi.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

# from pprint import pprint

PYTHON3 = sys.version[0] == "3"



class Solthiruthi:
Expand Down
3 changes: 0 additions & 3 deletions spell/spell.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,6 @@

# Make Bi-Lingual dictionary

PYTHON3 = sys.version_info[0] == 3
assert PYTHON3, "சொல்திருத்தி செயலி பைத்தான் 3-இல் மற்றுமே இயங்கும்!"

_DEBUG = False


Expand Down
3 changes: 0 additions & 3 deletions tamil/date.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@
from datetime import datetime as datetime_cpy
from typing import Type

PYTHON3 = sys.version > "3"
assert PYTHON3, "This module requires Python 3"

TA_WEEKDAYS_SHORT = [
"திங்கள்",
"செவ்வாய்",
Expand Down
2 changes: 0 additions & 2 deletions tamil/numeral.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
import re
import sys

PYTHON3 = sys.version > "3"
assert PYTHON3, "Python3 or larger required for this module"
SPACE = re.compile("\s+")


Expand Down
10 changes: 3 additions & 7 deletions tamil/tscii2utf8.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@

import tamil

PYTHON3 = version[0] > "2"


def usage():
return u"tscii2utf8.py <filename-1> <filename-2> ... "

Expand All @@ -23,10 +20,9 @@ def usage():
try:
with codecs.open(fname, "r", "utf-8") as fileHandle:
output = tamil.tscii.convert_to_unicode(fileHandle.read())
if PYTHON3:
print(output)
else:
print(output.encode("utf-8"))
print(output)
# else:
# print(output.encode("utf-8"))
except Exception as fileOrConvException:
print(
u"tscii2utf8 error - file %s could not be processed due to - %s"
Expand Down
20 changes: 6 additions & 14 deletions tamil/txt2unicode/encode2unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,7 @@
# <http://www.gnu.org/licenses/>. #
# #
##############################################################################
from sys import version

PYTHON3 = version > "3"
del version

try:
# python 2
from .orddic import OrderedDict
except ImportError as ime:
# python 3
from collections import OrderedDict
from collections import OrderedDict

from .encode2utf8 import (
anjal2utf8,
Expand Down Expand Up @@ -281,8 +271,9 @@ def _get_unique_ch(text, all_common_encodes):
special_chars = [".", ",", ";", ":", "", " ", "\r", "\t", "=", "\n"]
for line in text:
for word in line.split(" "):
if not PYTHON3:
word = word.decode("utf-8")
#this place is little trickier how can we change it for python 3 ?
# if not PYTHON3:
# word = word.decode("utf-8")
for ch in all_common_encodes:
if ch in word:
word = word.replace(ch, "")
Expand Down Expand Up @@ -330,8 +321,9 @@ def _get_unique_common_encodes():
_all_common_encodes_single_char_ = set([])

for name, encode in _all_encodes_.items():
# This place has python3 unicode handling?
encode_utf8 = set(
[PYTHON3 and ch or ch.decode("utf-8") for ch in encode.keys()]
[ ch for ch in encode.keys()]
)
_all_unicode_encodes_[name] = encode_utf8
_all_unique_encodes_full_ = _all_unicode_encodes_.copy()
Expand Down
2 changes: 0 additions & 2 deletions tamil/utf8.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
from copy import copy
from sys import version

PYTHON3 = version > "3"
assert PYTHON3, "PYTHON3 required to operate Open-Tamil library"
import functools

## constants
Expand Down
1 change: 1 addition & 0 deletions tests/opentamiltests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
WINDOWS = sys.platform.find("win") != -1
LINUX = not WINDOWS


import tamil

import transliterate
Expand Down
1 change: 0 additions & 1 deletion tests/solthiruthi_suffixremoval.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def test_basic_plural_stripper(self):
for w, x in zip(words_list, expected):
rval = obj.removeSuffix(w)
self.assertTrue(rval[1])
# if not PYTHON3: print(utf8.get_letters(w),u'->',rval[1])
self.assertEqual(rval[0], x)
return

Expand Down
10 changes: 5 additions & 5 deletions webapp/opentamilapp/tamilwordgrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import tamil

PYTHON3 = sys.version > "3"



# Vertical / Horizontal Word Grids
Expand Down Expand Up @@ -129,10 +129,10 @@ def precompute(self):
)
self.grid_size = 3 + int(self.max_word_len)
# sort words in order
if PYTHON3:
self.words = sorted(self.words, key=len)
else:
self.words.sort(cmp=WordGrid.sorter)

self.words = sorted(self.words, key=len)
#else:
# self.words.sort(cmp=WordGrid.sorter)
# prepare a random grid of dim [#words x #max-word-length]
# len(self.words)
for itr_r in range(self.grid_size):
Expand Down

0 comments on commit 0188003

Please sign in to comment.