-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwordref.py
executable file
·141 lines (122 loc) · 4.3 KB
/
wordref.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# Author: Nicolas Chevalier
# https://github.com/shawone/sowordref.git
#
# Wordreference parsing
#! /bin/sh
""":"
exec python $0 ${1+"$@"}
"""
import urllib2
import contextlib
import sys
import cStringIO
import argparse
from HTMLParser import HTMLParser
from pprint import pprint
@contextlib.contextmanager
def nostdout():
save_stdout = sys.stdout
sys.stdout = cStringIO.StringIO()
yield
sys.stdout = save_stdout
class MyHTMLParser(HTMLParser):
flagtranslation = None
flagstrong = None
flagabbr = None
flagnewline = None
flagspan = None
splitfirstword = None
retline = None
listeline = []
def __init__(self, websrc):
HTMLParser.__init__(self)
self.feed(websrc.read())
def handle_starttag(self, tag, attrs):
if tag == 'tr':
attrD = dict(attrs)
for key in attrD:
if key == "id":
self.flagtranslation = True
else:
self.flagtranslation = None
if tag == 'em':
self.flagabbr = True
if tag == 'span':
self.flagspan = True
if tag == 'strong':
self.flagstrong = True
def handle_endtag(self, tag):
if tag == 'em':
self.flagabbr = None
if tag == 'span':
self.flagspan = None
if tag == 'tr':
for k in self.listeline:
if self.retline == True:
sys.stdout.write('\n')
self.retline = False
if len(k) != 2:
sys.stdout.write(k)
if len(k) == 2:
if len(k[0]) != 1 and len(k[1]) != 1:
sys.stdout.write(k)
self.listeline = []
if tag == 'strong':
self.flagstrong = None
def handle_data(self, data):
if self.flagtranslation == True and self.flagabbr == None:
if self.flagstrong == True:
self.splitfirstword = False
self.listeline.append("\033[31m"+data+"\033[0m ")
self.retline = True
else:
if data.startswith("Next"):
endprogram(False)
self.listeline.append(data)
if self.flagabbr == True:
if self.flagspan == True and self.splitfirstword == False:
self.listeline.append("\033[34m"+data+"\033[0m ")
self.splitfirstword = True
def endprogram(state):
if state == False:
print "\n\nsource: http://www.wordreference.com\n(END)\n"
exit(0)
else:
print "\n\nsource: http://www.wordreference.com\n(END)\n"
def argparser():
reload(sys)
sys.setdefaultencoding("UTF8")
flagerror = None
langsrc = "fr"
langdst = "en"
parser = argparse.ArgumentParser()
parser.add_argument("word", help="word to translate", type=str)
parser.add_argument("-i", "--langsource",
help="language source (ex: en) iso 639-1 https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes")
parser.add_argument("-o", "--langdestination",
help="language source (ex: fr) iso 639-1 https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes")
args = parser.parse_args()
if (args.langsource and not args.langdestination) or (args.langdestination and not args.langsource):
parser.error("Please use -i <language> and -o <language> together (fr => eng by default)")
if args.langsource and args.langdestination:
langsrc = args.langsource
langdst = args.langdestination
if args.word:
word = args.word
opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=1))
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
with nostdout():
try:
response = opener.open("http://www.wordreference.com/"+langsrc+langdst+"/"+word)
except urllib2.URLError:
flagerror = True
if flagerror == True:
print "Request Http Error, please check your connection"
exit (0)
print "========= Wordreference Translation of " + word + ": " + langsrc + " => " + langdst + " ==========="
myparser = MyHTMLParser(response)
endprogram(True)
def main():
argparser()
if __name__ == "__main__":
main()