Skip to content

Commit

Permalink
Update non-English kaikki JSON file download link
Browse files Browse the repository at this point in the history
  • Loading branch information
xxyzz committed Feb 22, 2024
1 parent cdec50a commit 4cdb830
Showing 1 changed file with 4 additions and 9 deletions.
13 changes: 4 additions & 9 deletions src/proficiency/extract_kaikki.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,17 +62,12 @@ def download_kaikki_json(lang: str) -> Path:
def download_kaikki_non_en_json(gloss_lang: str) -> Path:
from .split_jsonl import split_kaikki_non_en_jsonl

jsonl_path = Path(f"build/{gloss_lang}-extract.json")
gz_path = jsonl_path.with_suffix(".json.gz")
url = f"https://kaikki.org/{gloss_lang}wiktionary/raw-wiktextract-data.json.gz"
gz_path = Path(f"build/{url.rsplit('/', 1)[1]}")
jsonl_path = gz_path.with_suffix(".json")
if not gz_path.exists() and not jsonl_path.exists():
subprocess.run(
[
"wget",
"-nv",
"-P",
"build",
f"https://kaikki.org/dictionary/downloads/{gloss_lang}/{gloss_lang}-extract.json.gz",
],
["wget", "-nv", "-P", "build", url],
check=True,
capture_output=True,
text=True,
Expand Down

0 comments on commit 4cdb830

Please sign in to comment.