-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtlon-fix.el
427 lines (378 loc) · 14.4 KB
/
tlon-fix.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
;;; tlon-fix.el --- Manual & auto fix functionality -*- lexical-binding: t; fill-column: 80 -*-
;; Copyright (C) 2025
;; Author: Pablo Stafforini
;; URL: https://github.com/tlon-team/tlon
;; This file is NOT part of GNU Emacs.
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <https://www.gnu.org/licenses/>.
;;; Commentary:
;; Manual & auto fix functionality
;;; Code:
(require 'tlon-core)
;;;;;; Variables
(defconst tlon-fix-french-translation
'((") - \\[" . ") • [")
(" - " . " — ")
(" — " . " - ")
("## Pour savoir plus" . "## Pour en savoir plus")
("type :" . "type:")
("title :" . "title:")
("\ntitre :" . "\ntitle:")
("\ntitre:" . "\ntitle:")
("\\*\\*L’" . "L’**")
("\\*\\*L'" . "L’**")
("\\*\\*La " . "La **")
("\\*\\*Le " . "Le **")
("\\*\\*Les " . "Les **")
("\\*\\*Un " . "Un **")
("\\*\\*Une " . "Une **")
("## Entrées associées \\." . "## Entrées associées")
("## Entrées associées\\." . "## Entrées associées")
(" \"" . " « ")
;; ("\" " . " » ")
(" » />" . "\" />")
(" » />" . "\" />")
(" » short />" . "\" short />")
(" » court />" . "\" short />")
("\" court />" . "\" short />")
("=« " . "=\"")
(" » />" . "\" />")
("\"\\." . " ».")
("\"\\?" . " »?")
("\"!" . " »!")
("\":" . " »:")
("\";" . " »;")
("\"," . " »,")
("\")" . " »)")
("(\"" . "(« ")
("(\"" . "(« ")
("] : <Footnote />" . "]: <Footnote />")
("] : <Sidenote />" . "]: <Sidenote />")
("] (\\." . "](.")
("] (htt" . "](htt")
("(\\. /" . "(./")
("\\. md)" . ".md)")
("\\. \\./" . "../")
("\\.\\. /" . "../")
(" :" . " :")
(" ;" . " ;")
(" \\?" . " ?")
(" !" . " !")
(" %" . " %")
("\\[\\[" . "[")
("\\]\\]" . "]")
("{“" . "\"")
("{« " . "\"")
("{«" . "\"")
("”}" . "\"")
(" »}" . "\"")
("»}" . "\"")
(">\\. />" . " />")
("\"})\\. />". "\" />")
("\"} })\\. />". "\" />")
("={" . "=")
("} />" . " />")
("<Note />" . "<Footnote />")
("\"}\\. />" . "\" />")
("\"\\[\\^" . " »[^")
("« " . "« ")
(" »" . " »")
("«" . "« ")
("»" . " »")
(" " . " "))
"Search and replace pairs for fixing common issues in French translations.")
(defconst tlon-fix-italian-translation
'((") - \\[" . ") • [")
(" - " . " — ")
(" — " . " - ")
;; ("tipo:" . "type:")
;; ("titolo:" . "title:")
;; ("\ntitre:" . "\ntitle:")
;; ("\\*\\*Il " . "Il **")
;; ("\\*\\*L’" . "L’**")
;; ("\\*\\*L'" . "L’**")
;; ("\\*\\*Un’" . "Un’**")
;; ("\\*\\*Un'" . "Un’**")
;; ("\\*\\*La " . "La **")
;; ("\\*\\*Le " . "Le **")
;; ("\\*\\*Gli " . "Gli **")
;; ("\\*\\*Un " . "Un **")
;; ("\\*\\*Una " . "Una **")
("## Voci correlate \\." . "## Voci correlate")
("## Voci correlate\\." . "## Voci correlate")
("] (\\." . "](.")
("] (htt" . "](htt")
("\\. md)" . ".md)")
("\\. \\./" . "../")
("\\.\\. /" . "../")
("(\\. /" . "(./")
("\\[\\[" . "[")
("\\]\\]" . "]")
("{“" . "{\"")
("\”}" . "\"}")
(">\\. />" . " />")
("\"})\\. />". "\"} />")
("\"} })\\. />". "\"} />")
("<Nota />" . "<Footnote />")
("\"}\\. />" . "\"} />"))
"Search and replace pairs for fixing common issues in Italian translations.")
(defconst tlon-fix-translations
`(("fr" . ,tlon-fix-french-translation)
("it" . ,tlon-fix-italian-translation)
;; ("es" . ,tlon-fix-spanish-translation)
;; ("de" . ,tlon-fix-german-translation)
)
"Alist of language codes and their corresponding fix variable.")
(defconst tlon-fix-numerals-sans-separator
"\\([ \\$£€][[:digit:]]\\{1,3\\}\\)\\([[:digit:]]\\{3\\}[\\.,:;!\\? ]\\)"
"Regular expression matching numerals without thousands separators.")
;;;; Functions
;;;;; autofix
(defun tlon-autofix (regexp-list newtext)
"Replace matches in REGEXP-LIST with NEWTEXT."
(widen)
(save-excursion
(dolist (regexp regexp-list)
(goto-char (point-min))
(while (re-search-forward regexp nil t)
(replace-match newtext)))))
(defun tlon-autofix-curly-quotes ()
"Replace straight quotes with curly quotes when appropriate."
(tlon-autofix '("\\([^\\.\\?]\"\\)\\[")
"\\1["))
(defun tlon-autofix-footnote-punctuation ()
"Place footnotes after punctuation mark."
(let ((language (tlon-get-language-in-file)))
(when (member language '("en" "es")) ; usage varies across languages
(tlon-autofix '("\\(.\\)\\(\\[\\^[[:digit:]]\\{1,3\\}\\]\\)\\([,\\.:;\\?!]\\)")
"\\1\\3\\2")
(tlon-autofix-footnote-punctuation-amend))))
(defun tlon-autofix-footnote-punctuation-amend ()
"Reverse undesired effects of `tlon-autofix-footnote-punctuation'.
Ideally the function should be amended so that it doesn’t introduce these
effects to begin with."
(tlon-autofix '("\\[\\[\\^\\([0-9]+\\)\\]\\^\\([0-9]+\\)\\]" ; fixes `[[^1]^2]'
"\\[\\^\\[\\^\\([0-9]+\\)\\]\\([0-9]+\\)\\]") ; fixes `[^[^1]2]'
"[^\\1][^\\2]"))
(defun tlon-autofix-periods-in-headings ()
"Remove periods at the end of headings."
(tlon-autofix '("^\\(#\\{2,6\\}.*\\)\\.$")
"\\1"))
(defun tlon-autofix-percent-signs ()
"Add non-breaking space before percent sign.
The character used is U+202F NARROW NO-BREAK SPACE."
(tlon-autofix '("\\([[:digit:],()]+\\)%\\([^\";[:alnum:]]\\)"
"\\([[:digit:],()]+\\) %\\([^\";[:alnum:]]\\)")
"\\1 %\\2"))
(defun tlon-autofix-thin-spaces ()
"Replace thin spaces with narrow spaces.
This replaces the character U+2009 THIN SPACE with U+202F NARROW NO-BREAK SPACE.
Thin and narrow spaces have exactly the same width, but only the latter are
non-breaking."
(tlon-autofix '(" ")
" "))
(defun tlon-autofix-superscripts ()
"Replace carets with `<sup'> tags."
(tlon-autofix '("\\^\\(?1:[[:digit:]]+\\)\\^")
"<sup>\\1</sup>"))
(defvar markdown-regex-link-inline)
(defvar ffap-url-regexp)
(autoload 'tlon-md-get-tag-pattern "tlon-md")
(defun tlon-autofix-replace-thousands-separators (&optional separator)
"Replace thousands SEPARATOR (typically, a comma or a period) with narrow spaces.
Do not perform these replacements if the terms occur in math formulae, links, or
match certain words that should not be altered, such as \"80,000 Hours\"."
(interactive)
(require 'ffap)
(let* ((exclusions `(,(tlon-md-get-tag-pattern "Math")
;; ,markdown-regex-link-inline ; why is this included?
,ffap-url-regexp
"80,000 Hours"))
(exclusion-patterns (mapconcat #'identity exclusions "\\|"))
protected-ranges)
(save-excursion
(goto-char (point-min))
(while (re-search-forward exclusion-patterns nil t)
(push (cons (match-beginning 0) (match-end 0)) protected-ranges))
(let ((separator (or separator (tlon-get-thousands-separator))))
(tlon-number-separators-perform-replacements separator protected-ranges)))))
(defun tlon-number-separators-perform-replacements (separator protected-ranges)
"Replace thousands SEPARATOR with default separator, except in PROTECTED-RANGES."
(goto-char (point-min))
(let* ((digit-pattern (tlon-get-number-separator-pattern nil separator nil 'bounded)))
(while (re-search-forward digit-pattern nil t)
(unless (tlon-is-in-protected-range-p (match-beginning 0) (match-end 0) protected-ranges)
(replace-match (replace-regexp-in-string (regexp-quote separator)
tlon-default-thousands-separator
(match-string-no-properties 1)))))))
(defun tlon-is-in-protected-range-p (start end protected-ranges)
"Check if range from START to END overlaps with any PROTECTED-RANGES."
(cl-loop for (pstart . pend) in protected-ranges
thereis (not (or (< end pstart) (> start pend)))))
;;;###autoload
(defun tlon-autofix-all ()
"Run all the `tlon-autofix' commands."
(interactive)
(tlon-autofix-curly-quotes)
(tlon-autofix-footnote-punctuation)
(tlon-autofix-periods-in-headings)
(tlon-autofix-percent-signs)
(tlon-autofix-replace-thousands-separators)
(tlon-autofix-thin-spaces)
(tlon-autofix-superscripts)
(let ((after-save-hook (remove #'tlon-autofix-all after-save-hook)))
(save-buffer)
(add-hook 'after-save-hook #'tlon-autofix-all nil t)))
;;;;; manual-fix
(defun tlon-manual-fix (regexp-list newtext &optional keep-case)
"Prompt user to replace matches in REGEXP-LIST with NEWTEXT.
If KEEP-CASE is non-nil, keep the case of the matched text."
(widen)
(save-excursion
(point-min)
(dolist (regexp regexp-list)
(goto-char (point-min))
(let ((case-replace keep-case))
(query-replace-regexp regexp newtext nil (point-min) (point-max))))))
(defun tlon-manual-fix-em-dashes ()
"Prompt the user to replace hyphens with em dashes, when appropriate."
(tlon-manual-fix '("\\([^ ][ ,)]\\)-\\([(\"[:alnum:]]\\)" ; opening dash
"\\([)\\.%\"[:alnum:]]\\)-\\([ ,(]\\)" ; closing dash
"\\([^ >)] \\)-\\( \\)")
"\\1—\\2"))
(defun tlon-manual-fix-number-ranges ()
"Prompt the user to replace hyphens with em dashes, when appropriate."
(tlon-manual-fix '("\\([ \\[]\\)\\([[:digit:]]\\{1,12\\}\\)-\\([[:digit:]]\\{1,12\\}\\)\\([,.:;?! ]\\)")
"\\1\\2–\\3\\4"))
(defun tlon-manual-fix-roman-numerals ()
"Prompt the user to add small caps tags to roman numerals."
(tlon-manual-fix '(" \\b\\([IVXLCDM]+\\)\\b")
" <abbr>\\1</abbr>"))
(defun tlon-manual-fix-add-thousands-separators ()
"Prompt the user to add thousands separators to numerals that lack them.
Unlike `tlon-autofix-replace-thousands-separators', which replaces an existing
separator, this function adds a separator where it is missing, and has to be run
manually because some numerals, such as dates, should not be separated."
(interactive)
(tlon-manual-fix `(,tlon-fix-numerals-sans-separator)
"\\1 \\2"))
(defun tlon-manual-fix-narrow-spaces ()
"Prompt user to add a narrow space between abbreviations followed by a period.
The character used is U+202F NARROW NO-BREAK SPACE."
(tlon-manual-fix '("\\([A-Z]\\.\\)\\([A-Z]\\)")
"\\1 \\2"))
(defun tlon-manual-fix-solo ()
"Prompt the user to replace `sólo' with `solo'."
(tlon-manual-fix '("sólo")
"solo"
'keep-case))
(defun tlon-manual-fix-podcast ()
"Prompt the user to replace `podcast' with `pódcast'.
Enchant/Aspell do not make the correct suggestion, so it's easier to use a
dedicated function."
(tlon-manual-fix '(" podcast")
" pódcast"
'keep-case))
(defvar markdown-regex-italic)
(declare-function tlon-md-return-tag "tlon-md")
(defun tlon-manual-fix-emphasis ()
"Prompt the user to add an `emphasis' tag around text enclosed in quotes."
(interactive)
(save-excursion
(goto-char (point-min))
(let (content replacement replace-p)
(while (re-search-forward markdown-regex-italic nil t)
(save-match-data
(setq content (match-string-no-properties 0))
(setq replacement (match-string-no-properties 2))
(if (yes-or-no-p (format "Enclose '%s' in `emphasis' tag?" content))
(setq replace-p t)
(setq replace-p nil)))
(when replace-p
(replace-match (format "%s%s" (match-string-no-properties 1)
(tlon-md-return-tag "emphasis" '("moderate") replacement 'get-values))
t t nil))))))
(declare-function thing-at-point-looking-at "thingatpt")
(defun tlon-manual-fix-quote ()
"Prompt the user to add a `quote' tag around text enclosed in quotes."
(interactive)
(goto-char (point-min))
(let (content replacement replace-p)
(while (re-search-forward "[\"“'‘\\*]\\(?1:.*?\\)[\"”'’\\*]" nil t)
(save-match-data
(setq content (match-string-no-properties 0))
(setq replacement (match-string-no-properties 1))
(unless (thing-at-point-looking-at "<.**>")
(if (yes-or-no-p (format "Enclose '%s' in `q' tag?" content))
(setq replace-p t)
(setq replace-p nil))))
(when replace-p
(replace-match (tlon-md-return-tag "q" nil replacement 'get-values)
t t nil)))))
;; TODO: write function
(defun tlon-manual-fix-foreign-words ()
"Prompt the user to set the language of foreign words."
(interactive))
(defun tlon-manual-fix-all ()
"Run all the `tlon-manual-fix' commands."
(interactive)
(tlon-manual-fix-em-dashes)
(tlon-manual-fix-number-ranges)
(tlon-manual-fix-roman-numerals)
(tlon-manual-fix-add-thousands-separators)
(tlon-manual-fix-narrow-spaces)
(tlon-manual-fix-solo)
(tlon-manual-fix-podcast)
(tlon-manual-fix-emphasis)
(tlon-manual-fix-quote)
(tlon-manual-fix-foreign-words))
(defun tlon-fix-internet-archive-urls ()
"Convert Internet Archive URLs in the current buffer into their originals."
(interactive)
(save-excursion
(goto-char (point-min))
(let ((cnt 0))
(while (re-search-forward "https://web\.archive\.org/web/[[:digit:]]*?/" nil t)
(replace-match "")
(setq cnt (1+ cnt)))
(message "Done. %d URLs were fixed." cnt))))
;;;;;; Language-specific
(declare-function dired-get-marked-files "dired")
(defun tlon-fix-translation (lang)
"Fix common issues in translations in LANG."
(if-let ((files (or (dired-get-marked-files) (list (buffer-file-name)))))
(dolist (file files)
(with-current-buffer (find-file-noselect file)
(tlon-fix-translation-in-file lang)
(message "Fixed translation in %s" file)))
(user-error "Buffer is not visiting a file")))
(defun tlon-fix-translation-in-file (lang)
"Fix common issues in translations in LANG."
(let ((var (alist-get lang tlon-fix-translations nil nil #'string=))
(case-fold-search nil)) ;; Make sure searches are case-sensitive
(dolist (cons var)
(let ((search (car cons))
(replace (cdr cons)))
(tlon-autofix (list search) replace)))))
;;;;;;; French
(defun tlon-fix-french-translation ()
"Fix common issues in French translations."
(interactive)
(tlon-fix-translation "fr"))
;;;;;;; Italian
(defun tlon-fix-italian-translation ()
"Fix common issues in Italian translations."
(interactive)
(tlon-fix-translation "it"))
(provide 'tlon-fix)
;;; tlon-fix.el ends here