CI: modernize

BoboTiG · Jan 20, 2024 · e77d814 · e77d814
1 parent 4215ad7
commit e77d814
Show file tree

Hide file tree

Showing 56 changed files with 237 additions and 604 deletions.
diff --git a/check.sh b/check.sh
@@ -2,7 +2,6 @@
 #
 # Small script to ensure quality checks pass before submitting a commit/PR.
 #
-python -m isort wikidict tests scripts
-python -m black wikidict tests scripts
-python -m flake8 wikidict tests scripts
+python -m ruff format wikidict tests scripts
+python -m ruff --fix wikidict tests scripts
 python -m mypy wikidict scripts tests
diff --git a/mypy.ini b/mypy.ini
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,54 @@
+[tool.mypy]
+# Ensure we know what we do
+warn_redundant_casts = true
+warn_unused_ignores = true
+warn_unused_configs = true
+
+# Imports management
+ignore_missing_imports = true
+follow_imports = "normal"
+
+# Ensure full coverage
+disallow_untyped_defs = true
+disallow_incomplete_defs = true
+disallow_untyped_calls = true
+
+# Restrict dynamic typing (a little)
+# e.g. `x: List[Any]` or x: List`
+disallow_any_generics = true
+
+# From functions not declared to return Any
+warn_return_any = true
+
+[tool.pytest.ini_options]
+addopts = """
+    --cov-report=term-missing:skip-covered
+    --cov=wikidict
+    --strict-markers
+    -vvv
+"""
+
+[tool.ruff]
+exclude = [
+    "data",
+    "docs",
+    ".git",
+    ".github",
+    ".mypy_cache",
+    ".pytest_cache",
+    ".ruff_cache",
+    "venv",
+]
+line-length = 120
+indent-width = 4
+target-version = "py311"
+
+[tool.ruff.lint]
+extend-select = ["I"]
+fixable = ["ALL"]
+
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
+line-ending = "auto"
diff --git a/requirements-tests.txt b/requirements-tests.txt
@@ -1,11 +1,8 @@
 -r requirements.txt
-black==23.12.1
-coverage!=6.3.0
-flake8==7.0.0
-isort==5.13.2
 mypy==1.8.0
 pytest==7.4.4
 pytest-cov==4.1.0 
 pytest-dependency==0.6.0
 responses==0.24.1
+ruff==0.1.14
 types-requests==2.31.0.20240106
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,3 @@
-setuptools>=36.2.1
 beautifulsoup4==4.12.3
 docopt==0.6.2
 Jinja2==3.1.3

diff --git a/scripts/__main__.py b/scripts/__main__.py
@@ -44,9 +44,7 @@ def replace(file: str, data: str) -> bool:
     if start == -1 or end == -1:
         return False
 
-    path.write_text(
-        f"{original_content[:start]}# START\n{data}{original_content[end:]}"
-    )
+    path.write_text(f"{original_content[:start]}# START\n{data}{original_content[end:]}")
     return True
 
 

diff --git a/scripts/all-namespaces.py b/scripts/all-namespaces.py
@@ -18,9 +18,7 @@
         if kind == "namespaces":
             result_discard_last.extend(data[str(id_)]["*"] for id_ in ids)
         else:
-            result_discard_last.extend(
-                namespace["*"] for namespace in data if namespace["id"] in ids
-            )
+            result_discard_last.extend(namespace["*"] for namespace in data if namespace["id"] in ids)
     results[locale] = sorted(result_discard_last)
 
 print("namespaces =", end=" ")

diff --git a/scripts/ca-labels.py b/scripts/ca-labels.py
@@ -3,9 +3,7 @@
 
 from scripts_utils import get_content
 
-url = (
-    "https://ca.wiktionary.org/w/index.php?title=M%C3%B2dul:etiquetes/dades&action=raw"
-)
+url = "https://ca.wiktionary.org/w/index.php?title=M%C3%B2dul:etiquetes/dades&action=raw"
 text = get_content(url)
 
 text = text.replace("local ", "")

diff --git a/scripts/de-langs.py b/scripts/de-langs.py
@@ -25,9 +25,7 @@ def process_page(page_url: str, languages: Dict[str, str]) -> str:
         li_url = ROOT_URL + link
         key = li.text.split(":")[1]
         sub_soup = get_soup(li_url)
-        content = sub_soup.find("div", {"class": "mw-parser-output"}).find(
-            "p", recursive=False
-        )
+        content = sub_soup.find("div", {"class": "mw-parser-output"}).find("p", recursive=False)
         value = content.text.strip()
         languages[key] = value
         a_url = ALIAS_URL.format(li.text)

diff --git a/scripts/el-langs.py b/scripts/el-langs.py
@@ -41,7 +41,16 @@
     script += line
 
 exec(script)
-languages = {key: {"name": Languages[key].get("name", ""), "frm": Languages[key].get("frm", ""), "from": Languages[key].get("from", ""), "apo": Languages[key].get("apo", ""), "family": Languages[key].get("family", "")} for key in Languages.keys()}  # type: ignore # noqa
+languages = {
+    key: {
+        "name": Languages[key].get("name", ""),  # type: ignore[name-defined] # noqa
+        "frm": Languages[key].get("frm", ""),  # type: ignore[name-defined] # noqa
+        "from": Languages[key].get("from", ""),  # type: ignore[name-defined] # noqa
+        "apo": Languages[key].get("apo", ""),  # type: ignore[name-defined] # noqa
+        "family": Languages[key].get("family", ""),  # type: ignore[name-defined] # noqa
+    }
+    for key in Languages.keys()  # type: ignore[name-defined] # noqa
+}
 print("from typing import Dict, Union")
 print("langs:Dict[str, Dict[str, Union[str, bool]]] = {")
 for key, value in sorted(languages.items()):

diff --git a/scripts/en-labels.py b/scripts/en-labels.py
@@ -51,9 +51,7 @@ def dialect_handler(text: str) -> Dict[str, str]:
         text_dialect = text_dialect.replace('"] =', '" :')
         text_dialect = text_dialect.replace('"}', '"]')
         for r in ["alts", "link", "plain_categories"]:
-            text_dialect = re.sub(
-                rf"[ \t]+{r}[\s]*= ", f'            "{r}":', text_dialect
-            )
+            text_dialect = re.sub(rf"[ \t]+{r}[\s]*= ", f'            "{r}":', text_dialect)
         text_dialect = text_dialect.replace('{"', '["')
         exec(text_dialect, globals())
         results: Dict[str, str] = {}

diff --git a/scripts/en-langs.py b/scripts/en-langs.py
@@ -7,9 +7,7 @@
 def read_all_lines_etym(lines: List[str]) -> Dict[str, Dict[str, str]]:
     # remove aliases
     lua_code = "\n".join(lines)
-    lua_code = re.sub(
-        r"aliases\s*=\s*{([^}]*)}", "", lua_code, 0, re.MULTILINE | re.DOTALL
-    )
+    lua_code = re.sub(r"aliases\s*=\s*{([^}]*)}", "", lua_code, 0, re.MULTILINE | re.DOTALL)
     lines = lua_code.split("\n")
 
     pattern = re.compile(r"(\w*)\s*=\s*([{|\"].*[}|\"])")
@@ -82,9 +80,7 @@ def read_all_lines_lang(lines: List[str]) -> Dict[str, str]:
 def get_content(url: str) -> List[str]:
     soup = get_soup(url)
     content_div = soup.find("div", "mw-parser-output")
-    content_div = content_div.findChild(
-        "div", {"class": "mw-highlight"}, recursive=False
-    )
+    content_div = content_div.findChild("div", {"class": "mw-highlight"}, recursive=False)
     return str(content_div.text).split("\n")
 
 
@@ -104,9 +100,7 @@ def process_lang_page(url: str) -> Dict[str, str]:
     languages[key] = list(val.keys())[0]
 
 languages |= process_lang_page("https://en.wiktionary.org/wiki/Module:languages/data/2")
-languages |= process_lang_page(
-    "https://en.wiktionary.org/wiki/Module:languages/data/exceptional"
-)
+languages |= process_lang_page("https://en.wiktionary.org/wiki/Module:languages/data/exceptional")
 
 for letter in "abcdefghijklmnopqrstuvwxyz":
     url = f"https://en.wiktionary.org/wiki/Module:languages/data/3/{letter}"

diff --git a/scripts/es-campos-semanticos.py b/scripts/es-campos-semanticos.py
@@ -2,9 +2,7 @@
 
 from scripts_utils import get_soup
 
-START_URL = (
-    "https://es.wiktionary.org/wiki/Categor%C3%ADa:Plantillas_de_campo_sem%C3%A1ntico"
-)
+START_URL = "https://es.wiktionary.org/wiki/Categor%C3%ADa:Plantillas_de_campo_sem%C3%A1ntico"
 ROOT_URL = "https://es.wiktionary.org/"
 ALIAS_URL = "https://es.wiktionary.org/w/index.php?title=Especial:LoQueEnlazaAqu%C3%AD/{}&hidetrans=1&hidelinks=1"
 NEXTPAGE_TEXT = "página siguiente"

diff --git a/scripts/fr-domain-templates.py b/scripts/fr-domain-templates.py
@@ -3,9 +3,7 @@
 from scripts_utils import get_soup
 
 ROOT = "https://fr.wiktionary.org"
-START_URL = (
-    "https://fr.wiktionary.org/wiki/Cat%C3%A9gorie:Mod%C3%A8les_de_th%C3%A9matique"
-)
+START_URL = "https://fr.wiktionary.org/wiki/Cat%C3%A9gorie:Mod%C3%A8les_de_th%C3%A9matique"
 NEXTPAGE_TEXT = "page suivante"
 ALIAS_URL = "https://fr.wiktionary.org/w/index.php?title=Sp%C3%A9cial:Pages_li%C3%A9es/Mod%C3%A8le:{}&limit=10&hidetrans=1&hidelinks=1"  # noqa
 

diff --git a/scripts/pt-langs.py b/scripts/pt-langs.py
@@ -11,7 +11,10 @@
 
 pattern = r"<li><a[^>]+>([^\<]+)</a>: <a[^>]+>([^\<]+)</a>"
 matches = re.findall(pattern, content)
+seen = set()
 print("langs = {")
 for lang, iso in sorted(matches, key=itemgetter(1)):
-    print(f'    "{iso}": "{lang}",')
-print(f"}}  # {len(matches):,}")
+    if iso not in seen:
+        print(f'    "{iso}": "{lang}",')
+        seen.add(iso)
+print(f"}}  # {len(seen):,}")
diff --git a/scripts/scripts_utils.py b/scripts/scripts_utils.py
@@ -6,9 +6,7 @@
 from requests.exceptions import HTTPError, RequestException
 
 
-def get_content(
-    url: str, max_retries: int = 5, sleep_time: int = 5, as_json: bool = False
-) -> str | Dict[str, Any]:
+def get_content(url: str, max_retries: int = 5, sleep_time: int = 5, as_json: bool = False) -> str | Dict[str, Any]:
     """Fetch given *url* content with retries mechanism."""
     retry = 0
     while retry < max_retries:

diff --git a/setup.cfg b/setup.cfg
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -10,9 +10,7 @@
 os.environ["CWD"] = str(Path(__file__).parent)
 
 
-XML = (
-    '<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.10/" xml:lang="{locale}">'
-)
+XML = '<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.10/" xml:lang="{locale}">'
 PAGE_XML = """<page>
     <title>{word}</title>
     <ns>0</ns>

diff --git a/tests/test_2_render.py b/tests/test_2_render.py
@@ -35,9 +35,7 @@ def test_render_word(page: Callable[[str, str], str]) -> None:
     assert words["π"]
 
 
-def test_render_word_sv_with_almost_empty_definition(
-    page: Callable[[str, str], str]
-) -> None:
+def test_render_word_sv_with_almost_empty_definition(page: Callable[[str, str], str]) -> None:
     word = ["Götet", page("Götet", "sv")]
     words: Words = {}
     render.render_word(word, words, "sv")
@@ -65,9 +63,7 @@ def test_render_word_with_empty_subdefinition(page: Callable[[str, str], str]) -
     assert subsubdefs[0]
 
 
-def test_find_section_definitions_and_es_replace_defs_list_with_numbered_lists() -> (
-    None
-):
+def test_find_section_definitions_and_es_replace_defs_list_with_numbered_lists() -> None:
     section = Section(
         "=== {{sustantivo propio|es|género=femenino}} ===\n"
         ";1 archipiélago de 2&nbsp;000 peñascos.\n"

diff --git a/tests/test_3_convert.py b/tests/test_3_convert.py
@@ -24,9 +24,7 @@
 WORDS = {
     "empty": Word.empty(),
     "foo": Word(["pron"], ["gender"], ["etyl"], ["def 1", ("sdef 1",)], []),
-    "foos": Word(
-        ["pron"], ["gender"], ["etyl"], ["def 1", ("sdef 1", ("ssdef 1",))], ["baz"]
-    ),
+    "foos": Word(["pron"], ["gender"], ["etyl"], ["def 1", ("sdef 1", ("ssdef 1",))], ["baz"]),
     "baz": Word(["pron"], ["gender"], ["etyl"], ["def 1", ("sdef 1",)], ["foobar"]),
     "empty1": Word([], [], [], [], ["foo"]),
     "empty2": Word([], [], [], [], ["empty1"]),
@@ -158,9 +156,7 @@ def test_no_json_file() -> None:
         (convert.KoboFormat, "dicthtml-fr-fr-noetym.zip", False),
     ],
 )
-def test_generate_primary_dict(
-    formatter: Type[convert.BaseFormat], filename: str, include_etymology: bool
-) -> None:
+def test_generate_primary_dict(formatter: Type[convert.BaseFormat], filename: str, include_etymology: bool) -> None:
     output_dir = Path(os.environ["CWD"]) / "data" / "fr"
     variants = convert.make_variants(WORDS)
     convert.run_formatter(
@@ -191,9 +187,7 @@ def test_generate_primary_dict(
         "test_generate_primary_dict[DictFileFormat-dict-fr-fr-noetym.df]",
     ]
 )
-def test_generate_secondary_dict(
-    formatter: Type[convert.BaseFormat], filename: str, include_etymology: bool
-) -> None:
+def test_generate_secondary_dict(formatter: Type[convert.BaseFormat], filename: str, include_etymology: bool) -> None:
     output_dir = Path(os.environ["CWD"]) / "data" / "fr"
     convert.run_formatter(
         formatter,
@@ -255,10 +249,6 @@ def test_word_rendering(
         include_etymology=include_etymology,
     )
 
-    kwargs = (
-        {"name": "mu", "words": WORDS} if isinstance(cls, convert.KoboFormat) else {}
-    )
-    content = next(
-        cls.handle_word("Multiple Etymologies", WORDS["Multiple Etymologies"], **kwargs)
-    )
+    kwargs = {"name": "mu", "words": WORDS} if isinstance(cls, convert.KoboFormat) else {}
+    content = next(cls.handle_word("Multiple Etymologies", WORDS["Multiple Etymologies"], **kwargs))
     assert content == expected
diff --git a/tests/test_4_check_word.py b/tests/test_4_check_word.py
@@ -23,9 +23,7 @@
 
 
 @pytest.fixture
-def craft_urls(
-    html: Callable[[str, str], str], page: Callable[[str, str], str]
-) -> Callable[[str, str], str]:
+def craft_urls(html: Callable[[str, str], str], page: Callable[[str, str], str]) -> Callable[[str, str], str]:
     def _craft_urls(locale: str, word: str) -> str:
         responses.add(
             responses.GET,

diff --git a/tests/test_4_check_words.py b/tests/test_4_check_words.py
@@ -8,10 +8,7 @@ def test_errors() -> None:
     word_count = 39
     return_value = 42
     with patch.object(check_words, "local_check", return_value=return_value):
-        assert (
-            check_words.main("fr", word_count, True, "", "")
-            == return_value * word_count
-        )
+        assert check_words.main("fr", word_count, True, "", "") == return_value * word_count
 
 
 def test_no_json_file() -> None:

diff --git a/tests/test_el.py b/tests/test_el.py
@@ -14,9 +14,7 @@
             "λαμβάνω",
             ["/laɱˈva.no/"],
             [],
-            [
-                "<b>λαμβάνω</b> < (διαχρονικό δάνειο) <i>αρχαία ελληνική</i> λαμβάνω < <i>(Ετυμ)</i> *<i>sleh₂gʷ</i>-"
-            ],
+            ["<b>λαμβάνω</b> < (διαχρονικό δάνειο) <i>αρχαία ελληνική</i> λαμβάνω < <i>(Ετυμ)</i> *<i>sleh₂gʷ</i>-"],
             [
                 "παίρνω, δέχομαι",
                 "εντοπίζω επιθυμητό σήμα (όπως από ασύρματο)",