4
4
import os
5
5
import tempfile
6
6
7
+ # Allowed configuration keys for LanguageTool.
7
8
ALLOWED_CONFIG_KEYS = {
8
9
'maxTextLength' , 'maxTextHardLength' , 'maxCheckTimeMillis' , 'maxErrorsPerWordRate' ,
9
10
'maxSpellingSuggestions' , 'maxCheckThreads' , 'cacheSize' , 'cacheTTLSeconds' , 'requestLimit' ,
12
13
'blockedReferrers' , 'premiumOnly' , 'disabledRuleIds' , 'pipelineCaching' , 'maxPipelinePoolSize' ,
13
14
'pipelineExpireTimeInSeconds' , 'pipelinePrewarming'
14
15
}
16
+
15
17
class LanguageToolConfig :
18
+ """
19
+ Configuration class for LanguageTool.
20
+
21
+ :param config: Dictionary containing configuration keys and values.
22
+ :type config: Dict[str, Any]
23
+
24
+ Attributes:
25
+ config (Dict[str, Any]): Dictionary containing configuration keys and values.
26
+ path (str): Path to the temporary file storing the configuration.
27
+ """
16
28
config : Dict [str , Any ]
17
29
path : str
30
+
18
31
def __init__ (self , config : Dict [str , Any ]):
32
+ """
33
+ Initialize the LanguageToolConfig object.
34
+ """
19
35
assert set (config .keys ()) <= ALLOWED_CONFIG_KEYS , f"unexpected keys in config: { set (config .keys ()) - ALLOWED_CONFIG_KEYS } "
20
36
assert len (config ), "config cannot be empty"
21
37
self .config = config
@@ -31,9 +47,15 @@ def __init__(self, config: Dict[str, Any]):
31
47
self .path = self ._create_temp_file ()
32
48
33
49
def _create_temp_file (self ) -> str :
50
+ """
51
+ Create a temporary file to store the configuration.
52
+
53
+ :return: Path to the temporary file.
54
+ :rtype: str
55
+ """
34
56
tmp_file = tempfile .NamedTemporaryFile (delete = False )
35
57
36
- # WRite key=value entries as lines in temporary file.
58
+ # Write key=value entries as lines in temporary file.
37
59
for key , value in self .config .items ():
38
60
next_line = f'{ key } ={ value } \n '
39
61
tmp_file .write (next_line .encode ())
@@ -43,63 +65,3 @@ def _create_temp_file(self) -> str:
43
65
atexit .register (lambda : os .unlink (tmp_file .name ))
44
66
45
67
return tmp_file .name
46
-
47
-
48
-
49
- """
50
- ❯ /usr/bin/java -cp /Users/johnmorris/.cache/language_tool_python/LanguageTool-5.6/languagetool-server.jar org.languagetool.server.HTTPServer --help
51
- Usage: HTTPServer [--config propertyFile] [--port|-p port] [--public]
52
- --config FILE a Java property file (one key=value entry per line) with values for:
53
- 'maxTextLength' - maximum text length, longer texts will cause an error (optional)
54
- 'maxTextHardLength' - maximum text length, applies even to users with a special secret 'token' parameter (optional)
55
- 'secretTokenKey' - secret JWT token key, if set by user and valid, maxTextLength can be increased by the user (optional)
56
- 'maxCheckTimeMillis' - maximum time in milliseconds allowed per check (optional)
57
- 'maxErrorsPerWordRate' - checking will stop with error if there are more rules matches per word (optional)
58
- 'maxSpellingSuggestions' - only this many spelling errors will have suggestions for performance reasons (optional,
59
- affects Hunspell-based languages only)
60
- 'maxCheckThreads' - maximum number of threads working in parallel (optional)
61
- 'cacheSize' - size of internal cache in number of sentences (optional, default: 0)
62
- 'cacheTTLSeconds' - how many seconds sentences are kept in cache (optional, default: 300 if 'cacheSize' is set)
63
- 'requestLimit' - maximum number of requests per requestLimitPeriodInSeconds (optional)
64
- 'requestLimitInBytes' - maximum aggregated size of requests per requestLimitPeriodInSeconds (optional)
65
- 'timeoutRequestLimit' - maximum number of timeout request (optional)
66
- 'requestLimitPeriodInSeconds' - time period to which requestLimit and timeoutRequestLimit applies (optional)
67
- 'languageModel' - a directory with '1grams', '2grams', '3grams' sub directories which contain a Lucene index
68
- each with ngram occurrence counts; activates the confusion rule if supported (optional)
69
- 'word2vecModel' - a directory with word2vec data (optional), see
70
- https://github.com/languagetool-org/languagetool/blob/master/languagetool-standalone/CHANGES.md#word2vec
71
- 'fasttextModel' - a model file for better language detection (optional), see
72
- https://fasttext.cc/docs/en/language-identification.html
73
- 'fasttextBinary' - compiled fasttext executable for language detection (optional), see
74
- https://fasttext.cc/docs/en/support.html
75
- 'maxWorkQueueSize' - reject request if request queue gets larger than this (optional)
76
- 'rulesFile' - a file containing rules configuration, such as .langugagetool.cfg (optional)
77
- 'warmUp' - set to 'true' to warm up server at start, i.e. run a short check with all languages (optional)
78
- 'blockedReferrers' - a comma-separated list of HTTP referrers (and 'Origin' headers) that are blocked and will not be served (optional)
79
- 'premiumOnly' - activate only the premium rules (optional)
80
- 'disabledRuleIds' - a comma-separated list of rule ids that are turned off for this server (optional)
81
- 'pipelineCaching' - set to 'true' to enable caching of internal pipelines to improve performance
82
- 'maxPipelinePoolSize' - cache size if 'pipelineCaching' is set
83
- 'pipelineExpireTimeInSeconds' - time after which pipeline cache items expire
84
- 'pipelinePrewarming' - set to 'true' to fill pipeline cache on start (can slow down start a lot)
85
- Spellcheck-only languages: You can add simple spellcheck-only support for languages that LT doesn't
86
- support by defining two optional properties:
87
- 'lang-xx' - set name of the language, use language code instead of 'xx', e.g. lang-tr=Turkish
88
- 'lang-xx-dictPath' - absolute path to the hunspell .dic file, use language code instead of 'xx', e.g.
89
- lang-tr-dictPath=/path/to/tr.dic. Note that the same directory also needs to
90
- contain a common_words.txt file with the most common 10,000 words (used for better language detection)
91
- --port, -p PRT port to bind to, defaults to 8081 if not specified
92
- --public allow this server process to be connected from anywhere; if not set,
93
- it can only be connected from the computer it was started on
94
- --allow-origin [ORIGIN] set the Access-Control-Allow-Origin header in the HTTP response,
95
- used for direct (non-proxy) JavaScript-based access from browsers.
96
- Example: --allow-origin "https://my-website.org"
97
- Don't set a parameter for `*`, i.e. access from all websites.
98
- --verbose, -v in case of exceptions, log the input text (up to 500 characters)
99
- --languageModel a directory with '1grams', '2grams', '3grams' sub directories (per language)
100
- which contain a Lucene index (optional, overwrites 'languageModel'
101
- parameter in properties files)
102
- --word2vecModel a directory with word2vec data (optional), see
103
- https://github.com/languagetool-org/languagetool/blob/master/languagetool-standalone/CHANGES.md#word2vec
104
- --premiumAlways activate the premium rules even when user has no username/password - useful for API servers
105
- """
0 commit comments