Skip to content

Commit

Permalink
Merge pull request #540 from Ktyby21/feature/add-import-sorting
Browse files Browse the repository at this point in the history
Add isort for import sorting + EOF fixes
  • Loading branch information
andrewtavis authored Jan 4, 2025
2 parents cb2777b + 6dcc747 commit d49edce
Show file tree
Hide file tree
Showing 12 changed files with 83 additions and 73 deletions.
5 changes: 2 additions & 3 deletions .github/workflows/pr_ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ jobs:
echo PATH=$PATH >> $GITHUB_ENV
- name: Run ruff format - Formatting check
run: ruff check .
run: ruff check . --fix

- name: Run ruff - Linting and import sorting check
if: always()
- name: Run Ruff linting - Import sorting and linting
run: ruff check .
15 changes: 8 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,6 @@ repos:
- id: check-yaml
# - id: check-added-large-files

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.0
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
- id: ruff-format

- repo: https://github.com/tcort/markdown-link-check
rev: v3.13.6
hooks:
Expand All @@ -24,3 +17,11 @@ repos:
rev: v1.0.0
hooks:
- id: sphinx-lint

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.5
hooks:
- id: ruff
args: [--fix]

- id: ruff-format
4 changes: 1 addition & 3 deletions src/scribe_data/cli/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@
import os
from pathlib import Path

from scribe_data.cli.cli_utils import (
correct_data_type,
)
from scribe_data.cli.cli_utils import correct_data_type
from scribe_data.utils import (
LANGUAGE_DATA_EXTRACTION_DIR,
format_sublanguage_name,
Expand Down
17 changes: 10 additions & 7 deletions src/scribe_data/load/data_to_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,16 @@ def data_to_sqlite(
) -> None:
PATH_TO_SCRIBE_DATA = Path(__file__).parent.parent

with open(
PATH_TO_SCRIBE_DATA / "resources" / "language_metadata.json",
encoding="utf-8",
) as f_languages, open(
PATH_TO_SCRIBE_DATA / "resources" / "data_type_metadata.json",
encoding="utf-8",
) as f_data_types:
with (
open(
PATH_TO_SCRIBE_DATA / "resources" / "language_metadata.json",
encoding="utf-8",
) as f_languages,
open(
PATH_TO_SCRIBE_DATA / "resources" / "data_type_metadata.json",
encoding="utf-8",
) as f_data_types,
):
current_language_data = json.load(f_languages)
data_types = json.load(f_data_types).keys()

Expand Down
9 changes: 2 additions & 7 deletions src/scribe_data/unicode/process_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,8 @@

from tqdm.auto import tqdm

from scribe_data.unicode.unicode_utils import (
get_emoji_codes_to_ignore,
)
from scribe_data.utils import (
DEFAULT_JSON_EXPORT_DIR,
get_language_iso,
)
from scribe_data.unicode.unicode_utils import get_emoji_codes_to_ignore
from scribe_data.utils import DEFAULT_JSON_EXPORT_DIR, get_language_iso

emoji_codes_to_ignore = get_emoji_codes_to_ignore()

Expand Down
8 changes: 5 additions & 3 deletions src/scribe_data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,11 @@ def _load_json(package_path: str, file_name: str) -> Any:
-------
A python entity representing the JSON content.
"""
with resources.files(package_path).joinpath(file_name).open(
encoding="utf-8"
) as in_stream:
with (
resources.files(package_path)
.joinpath(file_name)
.open(encoding="utf-8") as in_stream
):
return json.load(in_stream)


Expand Down
12 changes: 7 additions & 5 deletions src/scribe_data/wikipedia/gen_autosuggestions.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
"\n",
"from tqdm.auto import tqdm\n",
"from IPython.core.display import display, HTML\n",
"\n",
"display(HTML(\"<style>.container { width:99% !important; }</style>\"))"
]
},
Expand Down Expand Up @@ -134,8 +135,8 @@
"files = download_wiki(\n",
" language=language,\n",
" target_dir=f\"./{language_abbr}wiki_dump\",\n",
" file_limit=None, # None is all files\n",
" dump_id=\"20220920\"\n",
" file_limit=None, # None is all files\n",
" dump_id=\"20220920\",\n",
")\n",
"print(f\"Number of files: {len(files)}\")"
]
Expand All @@ -156,7 +157,7 @@
" output_path=f\"./{language_abbr}wiki.ndjson\",\n",
" input_dir=f\"./{language_abbr}wiki_dump\",\n",
" partitions_dir=f\"./{language_abbr}wiki_partitions\",\n",
" article_limit=None, # None is all articles\n",
" article_limit=None, # None is all articles\n",
" delete_parsed_files=True,\n",
" multicore=True,\n",
" verbose=True,\n",
Expand Down Expand Up @@ -184,7 +185,8 @@
"source": [
"with open(f\"./{language_abbr}wiki.ndjson\", \"r\") as fin:\n",
" article_texts = [\n",
" json.loads(lang)[1] for lang in tqdm(fin, desc=\"Articles added\", unit=\"articles\")\n",
" json.loads(lang)[1]\n",
" for lang in tqdm(fin, desc=\"Articles added\", unit=\"articles\")\n",
" ]\n",
"\n",
"print(f\"Number of articles: {len(article_texts)}\")"
Expand Down Expand Up @@ -252,7 +254,7 @@
" num_words=1000,\n",
" ignore_words=None,\n",
" update_local_data=True,\n",
" verbose=True\n",
" verbose=True,\n",
")"
]
},
Expand Down
5 changes: 1 addition & 4 deletions src/scribe_data/wikipedia/process_wiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,7 @@
import regex
from tqdm.auto import tqdm

from scribe_data.utils import (
DEFAULT_JSON_EXPORT_DIR,
get_language_qid,
)
from scribe_data.utils import DEFAULT_JSON_EXPORT_DIR, get_language_qid
from scribe_data.wikidata.wikidata_utils import sparql

warnings.filterwarnings("ignore", message=r"Passing", category=FutureWarning)
Expand Down
77 changes: 44 additions & 33 deletions tests/cli/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,10 @@ def test_convert_to_json_standard_csv(self, mock_path_class):

mocked_open = mock_open()

with patch("pathlib.Path.open", mocked_open), patch(
"pathlib.Path.mkdir"
) as mock_mkdir:
with (
patch("pathlib.Path.open", mocked_open),
patch("pathlib.Path.mkdir") as mock_mkdir,
):
mock_mkdir.return_value = None
convert_to_json(
language="English",
Expand Down Expand Up @@ -220,9 +221,10 @@ def test_convert_to_json_with_multiple_keys(self, mock_path_class):
)

mocked_open = mock_open()
with patch("pathlib.Path.open", mocked_open), patch(
"pathlib.Path.mkdir"
) as mock_mkdir:
with (
patch("pathlib.Path.open", mocked_open),
patch("pathlib.Path.mkdir") as mock_mkdir,
):
mock_mkdir.return_value = None
convert_to_json(
language="English",
Expand Down Expand Up @@ -257,9 +259,10 @@ def test_convert_to_json_with_complex_structure(self, mock_path_class):
)

mocked_open = mock_open()
with patch("pathlib.Path.open", mocked_open), patch(
"pathlib.Path.mkdir"
) as mock_mkdir:
with (
patch("pathlib.Path.open", mocked_open),
patch("pathlib.Path.mkdir") as mock_mkdir,
):
mock_mkdir.return_value = None
convert_to_json(
language="English",
Expand Down Expand Up @@ -319,9 +322,10 @@ def test_convert_to_csv_or_tsv_standarddict_to_csv(self, mock_path_class):

mocked_open = mock_open()

with patch("pathlib.Path.open", mocked_open), patch(
"pathlib.Path.mkdir"
) as mock_mkdir:
with (
patch("pathlib.Path.open", mocked_open),
patch("pathlib.Path.mkdir") as mock_mkdir,
):
mock_mkdir.return_value = None

convert_to_csv_or_tsv(
Expand Down Expand Up @@ -361,9 +365,10 @@ def test_convert_to_csv_or_tsv_standarddict_to_tsv(self, mock_path_class):

mocked_open = mock_open()

with patch("pathlib.Path.open", mocked_open), patch(
"pathlib.Path.mkdir"
) as mock_mkdir:
with (
patch("pathlib.Path.open", mocked_open),
patch("pathlib.Path.mkdir") as mock_mkdir,
):
mock_mkdir.return_value = None
convert_to_csv_or_tsv(
language="English",
Expand Down Expand Up @@ -402,9 +407,10 @@ def test_convert_to_csv_or_tsv_nesteddict_to_csv(self, mock_path_class):
)

mocked_open = mock_open()
with patch("pathlib.Path.open", mocked_open), patch(
"pathlib.Path.mkdir"
) as mock_mkdir:
with (
patch("pathlib.Path.open", mocked_open),
patch("pathlib.Path.mkdir") as mock_mkdir,
):
mock_mkdir.return_value = None
convert_to_csv_or_tsv(
language="English",
Expand Down Expand Up @@ -443,9 +449,10 @@ def test_convert_to_csv_or_tsv_nesteddict_to_tsv(self, mock_path_class):
)

mocked_open = mock_open()
with patch("pathlib.Path.open", mocked_open), patch(
"pathlib.Path.mkdir"
) as mock_mkdir:
with (
patch("pathlib.Path.open", mocked_open),
patch("pathlib.Path.mkdir") as mock_mkdir,
):
mock_mkdir.return_value = None
convert_to_csv_or_tsv(
language="English",
Expand Down Expand Up @@ -484,9 +491,10 @@ def test_convert_to_csv_or_tsv_listofdicts_to_csv(self, mock_path_class):
)

mocked_open = mock_open()
with patch("pathlib.Path.open", mocked_open), patch(
"pathlib.Path.mkdir"
) as mock_mkdir:
with (
patch("pathlib.Path.open", mocked_open),
patch("pathlib.Path.mkdir") as mock_mkdir,
):
mock_mkdir.return_value = None
convert_to_csv_or_tsv(
language="English",
Expand Down Expand Up @@ -525,9 +533,10 @@ def test_convert_to_csv_or_tsv_listofdicts_to_tsv(self, mock_path_class):
)

mocked_open = mock_open()
with patch("pathlib.Path.open", mocked_open), patch(
"pathlib.Path.mkdir"
) as mock_mkdir:
with (
patch("pathlib.Path.open", mocked_open),
patch("pathlib.Path.mkdir") as mock_mkdir,
):
# Prevent actual directory creation
mock_mkdir.return_value = None
convert_to_csv_or_tsv(
Expand Down Expand Up @@ -568,9 +577,10 @@ def test_convert_to_csv_or_tsv_liststrings_to_csv(self, mock_path_class):

mocked_open = mock_open()

with patch("pathlib.Path.open", mocked_open), patch(
"pathlib.Path.mkdir"
) as mock_mkdir:
with (
patch("pathlib.Path.open", mocked_open),
patch("pathlib.Path.mkdir") as mock_mkdir,
):
mock_mkdir.return_value = None
convert_to_csv_or_tsv(
language="English",
Expand Down Expand Up @@ -610,9 +620,10 @@ def test_convert_to_csv_or_tsv_liststrings_to_tsv(self, mock_path_class):

mocked_open = mock_open()

with patch("pathlib.Path.open", mocked_open), patch(
"pathlib.Path.mkdir"
) as mock_mkdir:
with (
patch("pathlib.Path.open", mocked_open),
patch("pathlib.Path.mkdir") as mock_mkdir,
):
mock_mkdir.return_value = None
convert_to_csv_or_tsv(
language="English",
Expand Down
1 change: 1 addition & 0 deletions tests/cli/test_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from unittest.mock import patch

import pkg_resources

from scribe_data.cli.version import (
get_latest_version,
get_local_version,
Expand Down
2 changes: 1 addition & 1 deletion tests/resources/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
-->
"""

from unittest import TestCase
import pathlib
from unittest import TestCase

BASE_DIR = pathlib.Path(__file__).resolve().parent.parent.parent
LANGUAGE_METADATA_PATH = (
Expand Down
1 change: 1 addition & 0 deletions tests/wikidata/test_check_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from urllib.error import HTTPError

import pytest

from scribe_data.wikidata.check_query.check import (
all_queries,
changed_queries,
Expand Down

0 comments on commit d49edce

Please sign in to comment.