Skip to content

Commit 2fbc1a1

Browse files
authored
Merge pull request #105 from mdevolde/master
Adding doc, removing unused parts, modernizing
2 parents 1ab1a30 + ac724f2 commit 2fbc1a1

11 files changed

+901
-440
lines changed

language_tool_python/__init__.py

-5
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,3 @@
1-
"""LanguageTool through server mode.
2-
3-
migration URL: https://languagetool.org/http-api/migration.php
4-
"""
5-
61
from .language_tag import LanguageTag
72
from .match import Match
83
from .server import LanguageTool, LanguageToolPublicAPI

language_tool_python/__main__.py

+69-16
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,27 @@
44
import locale
55
import re
66
import sys
7-
from importlib.metadata import version
7+
from importlib.metadata import version, PackageNotFoundError
88
import toml
9+
from typing import Any, Optional, Set, Union
910

1011
from .server import LanguageTool
1112
from .utils import LanguageToolError
1213

1314
try:
1415
__version__ = version("language_tool_python")
15-
except PackageNotFoundError:
16+
except PackageNotFoundError: # If the package is not installed in the environment, read the version from pyproject.toml
1617
with open("pyproject.toml", "rb") as f:
1718
__version__ = toml.loads(f.read().decode('utf-8'))["project"]["version"]
1819

1920

20-
def parse_args():
21+
def parse_args() -> argparse.Namespace:
22+
"""
23+
Parse command line arguments.
24+
25+
:return: parsed arguments
26+
:rtype: argparse.Namespace
27+
"""
2128
parser = argparse.ArgumentParser(
2229
description=__doc__.strip() if __doc__ else None,
2330
prog='language_tool_python')
@@ -42,7 +49,7 @@ def parse_args():
4249
help='If set, additional rules will be activated.')
4350
parser.add_argument(
4451
'--version', action='version',
45-
version='%(prog)s {}'.format(__version__),
52+
version=f'%(prog)s {__version__}',
4653
help='show version')
4754
parser.add_argument('-a', '--apply', action='store_true',
4855
help='automatically apply suggestions if available')
@@ -69,22 +76,71 @@ def parse_args():
6976

7077

7178
class RulesAction(argparse.Action):
72-
def __call__(self, parser, namespace, values, option_string=None):
79+
"""
80+
Custom argparse action to update a set of rules in the namespace.
81+
This action is used to modify the set of rules stored in the argparse
82+
namespace when the action is triggered. It updates the attribute specified
83+
by 'self.dest' with the provided values.
84+
85+
Attributes:
86+
dest (str): the destination attribute to update
87+
"""
88+
def __call__(self, parser: argparse.ArgumentParser, namespace: Any, values: Any, option_string: Optional[str] = None):
89+
"""
90+
This method is called when the action is triggered. It updates the set of rules
91+
in the namespace with the provided values. The method is invoked automatically
92+
by argparse when the corresponding command-line argument is encountered.
93+
94+
:param parser: The ArgumentParser object which contains this action.
95+
:type parser: argparse.ArgumentParser
96+
:param namespace: The namespace object that will be returned by parse_args().
97+
:type namespace: Any
98+
:param values: The argument values associated with the action.
99+
:type values: Any
100+
:param option_string: The option string that was used to invoke this action.
101+
:type option_string: Optional[str]
102+
"""
73103
getattr(namespace, self.dest).update(values)
74104

75105

76-
def get_rules(rules: str) -> set:
106+
def get_rules(rules: str) -> Set[str]:
107+
"""
108+
Parse a string of rules and return a set of rule IDs.
109+
110+
:param rules: A string containing rule IDs separated by non-word characters.
111+
:type rules: str
112+
:return: A set of rule IDs.
113+
:rtype: Set[str]
114+
"""
77115
return {rule.upper() for rule in re.findall(r"[\w\-]+", rules)}
78116

79117

80-
def get_text(filename, encoding, ignore):
118+
def get_text(filename: Union[str, int], encoding: Optional[str], ignore: Optional[str]) -> str:
119+
"""
120+
Read the content of a file and return it as a string, optionally ignoring lines that match a regular expression.
121+
122+
:param filename: The name of the file to read or file descriptor.
123+
:type filename: Union[str, int]
124+
:param encoding: The encoding to use for reading the file.
125+
:type encoding: Optional[str]
126+
:param ignore: A regular expression pattern to match lines that should be ignored.
127+
:type ignore: Optional[str]
128+
:return: The content of the file as a string.
129+
:rtype: str
130+
"""
81131
with open(filename, encoding=encoding) as f:
82132
text = ''.join('\n' if (ignore and re.match(ignore, line)) else line
83133
for line in f.readlines())
84134
return text
85135

86136

87-
def main():
137+
def main() -> int:
138+
"""
139+
Main function to parse arguments, process files, and check text using LanguageTool.
140+
141+
:return: Exit status code
142+
:rtype: int
143+
"""
88144
args = parse_args()
89145

90146
status = 0
@@ -106,7 +162,7 @@ def main():
106162
if args.remote_host is not None:
107163
remote_server = args.remote_host
108164
if args.remote_port is not None:
109-
remote_server += ':{}'.format(args.remote_port)
165+
remote_server += f':{args.remote_port}'
110166
lang_tool = LanguageTool(
111167
language=args.language,
112168
motherTongue=args.mother_tongue,
@@ -116,7 +172,7 @@ def main():
116172
try:
117173
text = get_text(filename, encoding, ignore=args.ignore_lines)
118174
except UnicodeError as exception:
119-
print('{}: {}'.format(filename, exception), file=sys.stderr)
175+
print(f'{filename}: {exception}', file=sys.stderr)
120176
continue
121177

122178
if not args.spell_check:
@@ -137,7 +193,7 @@ def main():
137193
rule_id = match.ruleId
138194

139195
replacement_text = ', '.join(
140-
"'{}'".format(word)
196+
f"'{word}'"
141197
for word in match.replacements).strip()
142198

143199
message = match.message
@@ -147,14 +203,11 @@ def main():
147203
if replacement_text and not message.endswith(('.', '?')):
148204
message += '; suggestions: ' + replacement_text
149205

150-
print('{}: {}: {}'.format(
151-
filename,
152-
rule_id,
153-
message))
206+
print(f'{filename}: {rule_id}: {message}')
154207

155208
status = 2
156209
except LanguageToolError as exception:
157-
print('{}: {}'.format(filename, exception), file=sys.stderr)
210+
print(f'{filename}: {exception}', file=sys.stderr)
158211
continue
159212

160213
return status

language_tool_python/config_file.py

+23-61
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import os
55
import tempfile
66

7+
# Allowed configuration keys for LanguageTool.
78
ALLOWED_CONFIG_KEYS = {
89
'maxTextLength', 'maxTextHardLength', 'maxCheckTimeMillis', 'maxErrorsPerWordRate',
910
'maxSpellingSuggestions', 'maxCheckThreads', 'cacheSize', 'cacheTTLSeconds', 'requestLimit',
@@ -12,10 +13,25 @@
1213
'blockedReferrers', 'premiumOnly', 'disabledRuleIds', 'pipelineCaching', 'maxPipelinePoolSize',
1314
'pipelineExpireTimeInSeconds', 'pipelinePrewarming'
1415
}
16+
1517
class LanguageToolConfig:
18+
"""
19+
Configuration class for LanguageTool.
20+
21+
:param config: Dictionary containing configuration keys and values.
22+
:type config: Dict[str, Any]
23+
24+
Attributes:
25+
config (Dict[str, Any]): Dictionary containing configuration keys and values.
26+
path (str): Path to the temporary file storing the configuration.
27+
"""
1628
config: Dict[str, Any]
1729
path: str
30+
1831
def __init__(self, config: Dict[str, Any]):
32+
"""
33+
Initialize the LanguageToolConfig object.
34+
"""
1935
assert set(config.keys()) <= ALLOWED_CONFIG_KEYS, f"unexpected keys in config: {set(config.keys()) - ALLOWED_CONFIG_KEYS}"
2036
assert len(config), "config cannot be empty"
2137
self.config = config
@@ -31,9 +47,15 @@ def __init__(self, config: Dict[str, Any]):
3147
self.path = self._create_temp_file()
3248

3349
def _create_temp_file(self) -> str:
50+
"""
51+
Create a temporary file to store the configuration.
52+
53+
:return: Path to the temporary file.
54+
:rtype: str
55+
"""
3456
tmp_file = tempfile.NamedTemporaryFile(delete=False)
3557

36-
# WRite key=value entries as lines in temporary file.
58+
# Write key=value entries as lines in temporary file.
3759
for key, value in self.config.items():
3860
next_line = f'{key}={value}\n'
3961
tmp_file.write(next_line.encode())
@@ -43,63 +65,3 @@ def _create_temp_file(self) -> str:
4365
atexit.register(lambda: os.unlink(tmp_file.name))
4466

4567
return tmp_file.name
46-
47-
48-
49-
"""
50-
❯ /usr/bin/java -cp /Users/johnmorris/.cache/language_tool_python/LanguageTool-5.6/languagetool-server.jar org.languagetool.server.HTTPServer --help
51-
Usage: HTTPServer [--config propertyFile] [--port|-p port] [--public]
52-
--config FILE a Java property file (one key=value entry per line) with values for:
53-
'maxTextLength' - maximum text length, longer texts will cause an error (optional)
54-
'maxTextHardLength' - maximum text length, applies even to users with a special secret 'token' parameter (optional)
55-
'secretTokenKey' - secret JWT token key, if set by user and valid, maxTextLength can be increased by the user (optional)
56-
'maxCheckTimeMillis' - maximum time in milliseconds allowed per check (optional)
57-
'maxErrorsPerWordRate' - checking will stop with error if there are more rules matches per word (optional)
58-
'maxSpellingSuggestions' - only this many spelling errors will have suggestions for performance reasons (optional,
59-
affects Hunspell-based languages only)
60-
'maxCheckThreads' - maximum number of threads working in parallel (optional)
61-
'cacheSize' - size of internal cache in number of sentences (optional, default: 0)
62-
'cacheTTLSeconds' - how many seconds sentences are kept in cache (optional, default: 300 if 'cacheSize' is set)
63-
'requestLimit' - maximum number of requests per requestLimitPeriodInSeconds (optional)
64-
'requestLimitInBytes' - maximum aggregated size of requests per requestLimitPeriodInSeconds (optional)
65-
'timeoutRequestLimit' - maximum number of timeout request (optional)
66-
'requestLimitPeriodInSeconds' - time period to which requestLimit and timeoutRequestLimit applies (optional)
67-
'languageModel' - a directory with '1grams', '2grams', '3grams' sub directories which contain a Lucene index
68-
each with ngram occurrence counts; activates the confusion rule if supported (optional)
69-
'word2vecModel' - a directory with word2vec data (optional), see
70-
https://github.com/languagetool-org/languagetool/blob/master/languagetool-standalone/CHANGES.md#word2vec
71-
'fasttextModel' - a model file for better language detection (optional), see
72-
https://fasttext.cc/docs/en/language-identification.html
73-
'fasttextBinary' - compiled fasttext executable for language detection (optional), see
74-
https://fasttext.cc/docs/en/support.html
75-
'maxWorkQueueSize' - reject request if request queue gets larger than this (optional)
76-
'rulesFile' - a file containing rules configuration, such as .langugagetool.cfg (optional)
77-
'warmUp' - set to 'true' to warm up server at start, i.e. run a short check with all languages (optional)
78-
'blockedReferrers' - a comma-separated list of HTTP referrers (and 'Origin' headers) that are blocked and will not be served (optional)
79-
'premiumOnly' - activate only the premium rules (optional)
80-
'disabledRuleIds' - a comma-separated list of rule ids that are turned off for this server (optional)
81-
'pipelineCaching' - set to 'true' to enable caching of internal pipelines to improve performance
82-
'maxPipelinePoolSize' - cache size if 'pipelineCaching' is set
83-
'pipelineExpireTimeInSeconds' - time after which pipeline cache items expire
84-
'pipelinePrewarming' - set to 'true' to fill pipeline cache on start (can slow down start a lot)
85-
Spellcheck-only languages: You can add simple spellcheck-only support for languages that LT doesn't
86-
support by defining two optional properties:
87-
'lang-xx' - set name of the language, use language code instead of 'xx', e.g. lang-tr=Turkish
88-
'lang-xx-dictPath' - absolute path to the hunspell .dic file, use language code instead of 'xx', e.g.
89-
lang-tr-dictPath=/path/to/tr.dic. Note that the same directory also needs to
90-
contain a common_words.txt file with the most common 10,000 words (used for better language detection)
91-
--port, -p PRT port to bind to, defaults to 8081 if not specified
92-
--public allow this server process to be connected from anywhere; if not set,
93-
it can only be connected from the computer it was started on
94-
--allow-origin [ORIGIN] set the Access-Control-Allow-Origin header in the HTTP response,
95-
used for direct (non-proxy) JavaScript-based access from browsers.
96-
Example: --allow-origin "https://my-website.org"
97-
Don't set a parameter for `*`, i.e. access from all websites.
98-
--verbose, -v in case of exceptions, log the input text (up to 500 characters)
99-
--languageModel a directory with '1grams', '2grams', '3grams' sub directories (per language)
100-
which contain a Lucene index (optional, overwrites 'languageModel'
101-
parameter in properties files)
102-
--word2vecModel a directory with word2vec data (optional), see
103-
https://github.com/languagetool-org/languagetool/blob/master/languagetool-standalone/CHANGES.md#word2vec
104-
--premiumAlways activate the premium rules even when user has no username/password - useful for API servers
105-
"""

language_tool_python/console_mode.py

-63
This file was deleted.

0 commit comments

Comments
 (0)