diff --git a/scripts/crowdin/.env.sample b/scripts/crowdin/.env.sample new file mode 100644 index 0000000000..23d4493403 --- /dev/null +++ b/scripts/crowdin/.env.sample @@ -0,0 +1,2 @@ +CROWDIN_API_TOKEN= +STEP_CROWDIN_PROJECT_ID=10716 diff --git a/scripts/crowdin/.gitignore b/scripts/crowdin/.gitignore new file mode 100644 index 0000000000..c35eaf4756 --- /dev/null +++ b/scripts/crowdin/.gitignore @@ -0,0 +1,184 @@ +# This project specific stuff: +.env +.env* +!.env.sample +step_project.json +all_projects.json +tmp/*/*.properties +tmp/unzipped/** + + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc diff --git a/scripts/crowdin/README.md b/scripts/crowdin/README.md new file mode 100644 index 0000000000..1ac3e63bbe --- /dev/null +++ b/scripts/crowdin/README.md @@ -0,0 +1,110 @@ +# Description +## Purpose +To be ran in + +## What it does +Executes process to: +1) Download zip file from Crowdin with translation files for all languages +2) Unzip crowdin file +3) Rename files to match expected filenames for our STEP Bible process + + +# Instructions +## 1) Setup env +### 1a) Acquire API Key +See here to learn more: https://support.crowdin.com/enterprise/account-settings/#access-tokens + +Basically you just need to sign in and create a token. I named mine "STEP-Bible" +- https://crowdin.com/settings#api-key + +#### Required Permissions for your API Token: +I created a token with read-only access to the following (probably more than I needed): +- `Projects (List, Get, Create, Edit)` +- `Translation Status` (probably not needed) + +AND read AND write access to the following: +- `Translations` (needed, so can execute a `build` on the translations) + +This is the token you'll use in step `1b` below. + +### 1b) Set Env Vars + + +#### OPTION #1: Set directly in your terminal + +``` +export CROWDIN_API_TOKEN= +``` + +NOTE: Make sure you know what you're doing with this one, since if you don't set it in the right session, the env var might not carry over when you actually run the script. But in general it should work just fine as long as you're only working in a single terminal session and don't restart your computer etc. Otherwise you'll need to do this : + +#### OPTION #2: Use .env file +- First, copy the sample .env file + ``` + cp .env.sample .env + ``` +- Next, set the appropriate values + - `CROWDIN_API_TOKEN` for your crowdin API token from the previous step. + +(TODO if we want to, we can use `python-dotenv` lib instead in the future) + +### 1c) Install Python Pre-reqs +**If don't have it already**, install `pip install virtualenv` +``` +pip3 install virtualenv +``` + +Create and activate a virtual env for this subproject +``` +python3 -m venv venv +source venv/bin/activate +``` + +Then install the requirements +``` +pip3 install -r requirements.txt +``` + +## 2) Run the script +``` +# probably just `python download.py` should work, but it wasn't on my machine so putting down full path to the venv python. +./venv/bin/python download_bundle.py +``` + +### Options: +#### Option: Skip Download +Will not download zip file, but just use existing zip file instead. If this is set, will not run a build either +``` +./venv/bin/python download_bundle.py --skip-download +``` + +#### Option: Force Rebuild +Will not check for existing builds, will just build a new one +``` +./venv/bin/python download_bundle.py --force-rebuild +``` + +#### Option: Path +Specify a path to put `*.properties` files. Defaults to `step/scripts/crowdin/tmp/bundle_out` + +``` +./venv/bin/python download_bundle.py --path ../../step-core/src/main/resources/ +``` + +(This is what we would do during our build process) + +#### Option: Skip Existing File Check +Doesn't error out if there's a file in target dir already +``` +./venv/bin/python download_bundle.py --skip-existing-file-check +``` + + +### Use Case: Use updated `*.properties` files for project build +Basically make sure to skip existing file check and specify the `resources` dir path, and then the script will leave files that don't have replacements, but replace all `*.properties` files that do have replacements. +``` +./venv/bin/python download_bundle.py --path ../../step-core/src/main/resources/ --skip-existing-file-check +``` + +# TODOs +- [ ] See download_bundle.py and all the TODOs there diff --git a/scripts/crowdin/download_bundle.py b/scripts/crowdin/download_bundle.py new file mode 100755 index 0000000000..64abd7dfae --- /dev/null +++ b/scripts/crowdin/download_bundle.py @@ -0,0 +1,452 @@ +from crowdin_api import CrowdinClient +from dotenv import load_dotenv +import os +import json +import time +import requests +import zipfile +import glob +import sys +import shutil +import argparse +import re + + +from tqdm import tqdm +import pytz +utc=pytz.UTC +from datetime import datetime, timedelta + +from pathlib import Path + +load_dotenv() # take environment variables from .env. +STEP_CROWDIN_PROJECT_ID=os.environ['STEP_CROWDIN_PROJECT_ID'] + +class STEPCrowdinClient(CrowdinClient): + TOKEN = os.environ['CROWDIN_API_TOKEN'] + + PROJECT_ID = STEP_CROWDIN_PROJECT_ID + # ORGANIZATION = "organizationName" # Optional, for Crowdin Enterprise only + # TIMEOUT = 60 # Optional, sets http request timeout. + # RETRY_DELAY = 0.1 # Optional, sets the delay between failed requests + # MAX_RETRIES = 5 # Optional, sets the number of retries + # HEADERS = {"Some-Header": ""} # Optional, sets additional http request headers + # PAGE_SIZE = 25 # Optional, sets default page siKe + # EXTENDED_REQUEST_PARAMS = {"some-parameters": ""} # Optional, sets additional parameters for request + +client = STEPCrowdinClient() + + +class DownloadAndMoveJob: + def __init__(self, skip_existing_file_check=False): + self.projectId = STEP_CROWDIN_PROJECT_ID + self.buildId = None + self.progress = None + self.status = "not yet started" + self.download_url = None + self.existing_builds = None + self.skip_existing_file_check = skip_existing_file_check + + def run_build(self): + """ + Executes a "project build", which creates a snapshot of translations at current point of time, in preparation for a download. + - Currently, we're doing all target languages, so don't need to specify. + - https://support.crowdin.com/developer/api/v2/#tag/Translations/operation/api.projects.translations.builds.post + - This will make the build integer which will be used when doing the download. + """ + print("\n***********") + print("running a build for STEP Bible project (project id:", STEP_CROWDIN_PROJECT_ID, ")...") + # setting this initial status, then after that, all statuses will be from CrowdIn's API + self.status = "about to request build" + + + build_result = client.translations.build_crowdin_project_translation( + projectId=self.projectId, + # skipUntranslatedStrings=True, + # NOTE if this option is enabled, it overrides the effect of the Skip untranslated strings option. + # so if set this, Python lib actually requires you to not set skipUntranslatedStrings at all + skipUntranslatedFiles=True, + # Requires files and strings to be translated AND approved + # if this option is enabled, it overrides the effect of the Skip untranslated strings option. + #exportApprovedOnly=True, + #exportApprovedOnly=False, + ) + + print(build_result) + with open('tmp.build-result.json', 'w', encoding='utf-8') as f: + json.dump(build_result, f, + ensure_ascii=False, + indent=4, + # for the datetime obj + default=str) + + data = build_result["data"] + print(data) + + self.buildId = data["id"] + self.status = data["status"] + self.progress = data["progress"] + + print("done.") + + def build_is_done(self): + return str(self.progress) == "100" + + def check_build_status(self): + """ + Checks build status + - Will just keep running this until build is complete and ready to download + - https://support.crowdin.com/developer/api/v2/#tag/Translations/operation/api.projects.translations.builds.get + """ + print("\n***********") + print("checking build status ...(Build id:", self.buildId, ")") + + result = client.translations.check_project_build_status( + buildId=self.buildId, + projectId=self.projectId, + ) + + with open("tmp.build-status.json", 'w', encoding='utf-8') as f: + json.dump(result, f, + ensure_ascii=False, + indent=4, + # for the datetime obj + default=str) + + data = result["data"] + + # print(data) + # will return one of: "created" "inProgress" "canceled" "failed" "finished" + self.status = data["status"] + print("Status:", self.status) + # percentage + self.progress = data["progress"] + print("Progress:", self.progress, "%") + + + def get_download_url(self): + """ + returns link to download the zip of all translations for all languages from Crowdin's File API (v2) + @return str download_url to download the zip + """ + print("\n***********") + print("downloading translations for STEP Bible project...(id:", STEP_CROWDIN_PROJECT_ID, ")") + + + result = client.translations.download_project_translations( + buildId=self.buildId, + projectId=self.projectId + ) + print(result) + self.download_url = result["data"]["url"] + + return self.download_url + + + def get_zip_filename_base(self): + return f"crowdin-step.build_{self.buildId}" + + def get_zip_filename(self): + return f"{self.get_zip_filename_base()}.zip" + + def get_zip_dir_path(self): + # just using a tmp dir + return Path.joinpath(Path().resolve(), "tmp") + + def get_zip_filepath(self): + return Path.joinpath(self.get_zip_dir_path(), self.get_zip_filename()) + + def get_unzipped_dir_path(self): + return Path.joinpath(self.get_zip_dir_path(), "unzipped") + + + def download(self): + """ + downloads the zip of all translations for all languages from Crowdin's File API (v2) + """ + + response = requests.get(self.download_url, stream=True) + + print(f"now downloading to {self.get_zip_filepath()}\n") + with open(self.get_zip_filepath(), "wb") as handle: + for data in tqdm(response.iter_content()): + handle.write(data) + + print("\n") + + def unzip(self): + print(f"now unzipping to {self.get_unzipped_dir_path()}") + + with zipfile.ZipFile(self.get_zip_filepath(), 'r') as zip_ref: + zip_ref.extractall(self.get_unzipped_dir_path()) + + return + + def move(self, newPath): + """ + copy unzipped files to target folder (final step!) + """ + + # TODO make windows compatible + crowdin_export_path_to_glob = f"{self.get_unzipped_dir_path()}/**" + print("checking", crowdin_export_path_to_glob) + + for lang_folder_path in glob.glob(crowdin_export_path_to_glob, recursive=False): + lang_folder_path_list = Path(lang_folder_path).parts + + # skipping this precaution for now + # if len(lang_folder_path_list) != 3: + # print("This folder name is not in the expected format", lang_folder_path, len(lang_folder_path_list), "Exit program") + # sys.exit() + + # gets the top level dir, and that's the langname base + lang_folder_name = lang_folder_path_list[-1] + # print("lang_folder_name:", lang_folder_name) + + # change crowdin language codes to STEP Bible standards + + if lang_folder_name == "zh-TW": + # for Chinese (Taiwan), use zh_TW (for mainland mandarin, using zh) + langName = "zh_TW" + elif lang_folder_name == "he": + # for Hebrew , just use in "iw" + langName = "iw" + elif lang_folder_name == "id": + # for Indian, just use in + langName = "in" + else: + # for the rest, just take the first part, don't need specific dialect + langName = lang_folder_name.split("-")[0] + + print("\n*****") + print("langName", langName) + print("*****") + + property_files_in_lang_dir = glob.iglob(f"{lang_folder_path}/*.properties") + # sorting, particularly to make sure MorphologyBundle comes AFTER InteractiveBundle, since the MorphologyBundle needs to be appended onto the InteractiveBundle + sorted_property_files_in_lang_dir = sorted(property_files_in_lang_dir, key=str.lower) + + # iterate over each property file in the folder for that language, and move to target dir + for property_file_path in sorted_property_files_in_lang_dir: + print("now copying .properties file", property_file_path, "to target directory") + # Our java script is looking for a different filenaming system, so renaming when we move the file to match that. + properties_filename = os.path.basename(property_file_path) + + targetFilePrefix = properties_filename.split("_")[0] + + targetFile = f"{targetFilePrefix}_{langName}.properties" + + targetPath = Path.joinpath(newPath, targetFile) + + if os.path.exists(targetPath) and not self.skip_existing_file_check: + print("ERROR !! File already exists, something went wrong", targetPath, "\nExiting program...") + sys.exit() + else: + # print("- moving to:", targetPath) + if targetFilePrefix == "LangSpecificBundle" or targetFilePrefix == "MorphologyBundle": + appendToFile = Path.joinpath(newPath, "InteractiveBundle_" + langName + ".properties") + print("Found", property_file_path, "will append to", appendToFile) + my_file = Path(appendToFile) + if my_file.is_file(): + f1 = open(appendToFile, 'a+') + f2 = open(property_file_path, 'r') + f1.write(f2.read()) + f1.write("\n"); + f1.close() + f2.close() + else: + print("Cannot find correspond InteractiveBundle file") + sys.exit() + else: + # print("copying", property_file_path, "to", targetPath) + shutil.copyfile(property_file_path, targetPath) + continue + + + + + + + def list_builds(self): + """ + List builds for STEP Crowdin project + - Required to download the translations + - https://support.crowdin.com/developer/api/v2/#tag/Translations/operation/api.projects.translations.builds.getMany + """ + print("\n***********") + print("listing builds for STEP Bible project...(id:", STEP_CROWDIN_PROJECT_ID, ")") + + builds = client.translations.list_project_builds(STEP_CROWDIN_PROJECT_ID) + + with open('tmp.builds.json', 'w', encoding='utf-8') as f: + json.dump(builds, f, + ensure_ascii=False, + indent=4, + # for the datetime obj + default=str) + + data = builds["data"] + + self.existing_builds = data + + + + def check_builds_for_existing(self): + """ + Iterate over builds and see if there's any that aren't expired. + """ + + # a build that is done, and not yet expired + now = utc.localize(datetime.now()) + thirty_min_ago = now + timedelta(minutes = -30) + + for build in self.existing_builds: + data = build["data"] + print("checking build:", data) + # completed_at_str = data["finishedAt"] + # completed_at = datetime.strptime(completed_at_str, "%Y-%m-%d %H:%M:%S+00:00") + + # this should already be a datetime obj + completed_at = data["finishedAt"] + + if thirty_min_ago < completed_at: + available_build = True + self.status = data["status"] + self.progress = data["progress"] + self.buildId = data["id"] + + print("found available build") + print(data) + + # TODO better would be to find the latest among these and return that. This just naively returns any build in last 30 min. + return data + + else: + print("checking next build...") + + print("\n***\nno build found from last 30 minutes") + return None + + def set_build(self, force_build=False): + """ + either run a new build or find existing build for this run. + """ + if force_build: + downloadAndMoveJob.run_build() + else: + downloadAndMoveJob.list_builds() + available_build = downloadAndMoveJob.check_builds_for_existing() + + if available_build: + print("found available build", downloadAndMoveJob.buildId) + print("not making a new build, just using previous build") + else: + downloadAndMoveJob.run_build() + + def use_latest_zip(self): + """ + - goes through zip files and finds teh latest one (i.e., zip with largest build number) + - Main thing we need here is the build id, so we can use that and identify which zip to unzip in a later step. + """ + print("finding zips in dir using glob", f"{self.get_zip_dir_path()}/*.zip") + zip_files_in_download_dir = glob.iglob(f"{self.get_zip_dir_path()}/*.zip") + + # The above returns a generator type, so turning into list + zip_files_in_download_dir_list = list(zip_files_in_download_dir) + if len(zip_files_in_download_dir_list) == 0: + print("ERROR no zips found. Try again without --skip-download") + sys + + sorted_zips = sorted(zip_files_in_download_dir_list) + # get the last file in list, which should be the one with higest build id (and is therefore the latest) + print(sorted_zips) + latest_zip_path = sorted_zips[-1] + + # extract the build-id from the filename + latest_zip_filename = os.path.basename(latest_zip_path) + match = re.search(r"crowdin-step.build_(\d+)", latest_zip_filename) + self.buildId = match.group(1) + + + def download_zip(self): + """ + Either downloads the file for the given build + """ + while self.build_is_done() != True: + print("waiting 3 seconds...") + time.sleep(3) # Sleep for 3 seconds + self.check_build_status() + + if self.build_is_done(): + break + else: + print("not yet done, waiting 3 seconds...") + + self.get_download_url() + + self.download() + +if __name__ == '__main__': + parser = argparse.ArgumentParser("download_bundle") + parser.add_argument("--path", + dest="path", + help="a string path to dir where to move files from the zip. Should work for relative or absolute paths", + type=str) + + parser.add_argument("--skip-download", + dest="skip_download", + action="store_true", + help="will not download zip file, but just use existing zip file instead. If this is set, will not run a build either", + ) + + parser.add_argument("--skip-existing-file-check", + action="store_true", + dest="skip_existing_file_check", + help="will not check for existing file in target directory", + ) + + parser.add_argument("--force-build", + action="store_true", + dest="force_build", + help="will not check for existing builds, will just build a new one", + ) + + args = parser.parse_args() + + if args.path: + print("what got passed in", args.path) + # should work for absolute or relative paths. + newPath = Path(args.path).resolve() + + print("new path", newPath) + + else: + # set default target bundle dir + # TODO + # In the end will need to go here: "../../step-core/src/main/resources/" + # newPath = Path.joinpath(Path().resolve(), "..", "..", "step-core", "src", main", "resources") + newPath = Path.joinpath(Path().resolve(), "tmp", "bundle_out") + print("new path", newPath) + + + if args.force_build and args.skip_download: + print("ERROR can't force build AND skip download!") + sys.exit() + + downloadAndMoveJob = DownloadAndMoveJob(skip_existing_file_check=args.skip_existing_file_check) + if args.skip_download: + downloadAndMoveJob.use_latest_zip() + + else: + downloadAndMoveJob.set_build(force_build=args.force_build) + downloadAndMoveJob.download_zip() + + downloadAndMoveJob.unzip() + + print("Files will be output to", newPath, "folder") + downloadAndMoveJob.move(newPath) + + print("\n***********") + print("ALL DONE") + print("\n***********") + diff --git a/scripts/crowdin/requirements.txt b/scripts/crowdin/requirements.txt new file mode 100644 index 0000000000..6f289d56b8 --- /dev/null +++ b/scripts/crowdin/requirements.txt @@ -0,0 +1,5 @@ +crowdin_api_client==1.12.1 +python_dotenv==0.21.1 +requests +tqdm==4.67.1 +pytz==2024.2 diff --git a/scripts/crowdin/testing.py b/scripts/crowdin/testing.py new file mode 100755 index 0000000000..b8322987bf --- /dev/null +++ b/scripts/crowdin/testing.py @@ -0,0 +1,87 @@ +from crowdin_api import CrowdinClient +from dotenv import load_dotenv +import os +import json + +dotenv_path = join(dirname(__file__), '.env') +load_dotenv(dotenv_path) # take environment variables from .env. +STEP_CROWDIN_PROJECT_ID=os.environ['STEP_CROWDIN_PROJECT_ID'] + +class STEPCrowdinClient(CrowdinClient): + TOKEN = os.environ['CROWDIN_API_TOKEN'] + # PROJECT_ID = STEP_CROWDIN_PROJECT_ID # Optional, set project id for all API's + + # ORGANIZATION = "organizationName" # Optional, for Crowdin Enterprise only + # TIMEOUT = 60 # Optional, sets http request timeout. + # RETRY_DELAY = 0.1 # Optional, sets the delay between failed requests + # MAX_RETRIES = 5 # Optional, sets the number of retries + # HEADERS = {"Some-Header": ""} # Optional, sets additional http request headers + # PAGE_SIZE = 25 # Optional, sets default page size + # EXTENDED_REQUEST_PARAMS = {"some-parameters": ""} # Optional, sets additional parameters for request + +client = STEPCrowdinClient() + +def run(): + # list_projects() + # get_project() + get_project_branches() + +def list_projects(): + """ + writes to file metadata for all projects for current user + """ + print("\n***********") + print("listing projects...") + + # Get list of Projects + # (assumes you don't have TOO many projects...) + projects = client.projects.with_fetch_all().list_projects() + + with open('tmp.all_projects.json', 'w', encoding='utf-8') as f: + json.dump(projects, f, + ensure_ascii=False, + indent=4, + # for the datetime obj + default=str) + + print("done.") + +def get_project(): + """ + writes to file metadata for STEP Bible Crowdin project + """ + print("\n***********") + print("getting STEP CrowdIn project data...") + step_project_data = client.projects.get_project(STEP_CROWDIN_PROJECT_ID) + + print("writing to step_project.json...") + with open('step_project.json', 'w', encoding='utf-8') as f: + json.dump(step_project_data, f, + ensure_ascii=False, + indent=4, + # for the datetime obj + default=str) + + print("done.") + +def get_project_branches(): + """ + writes to file metadata for all branches of STEP Bible Crowdin project + """ + print("\n***********") + print("getting STEP CrowdIn project Branches data...") + branches_data = client.projects.branches(STEP_CROWDIN_PROJECT_ID) + + print("writing to step_project.json...") + with open('branches_data.json', 'w', encoding='utf-8') as f: + json.dump(branches_data, f, + ensure_ascii=False, + indent=4, + # for the datetime obj + default=str) + + print("done.") + +if __name__ == '__main__': + run() + diff --git a/scripts/crowdin/tmp/bundle_out/.placeholder b/scripts/crowdin/tmp/bundle_out/.placeholder new file mode 100644 index 0000000000..e69de29bb2 diff --git a/scripts/crowdin/unzip_and_move_bundle.py b/scripts/crowdin/unzip_and_move_bundle.py new file mode 100755 index 0000000000..4d16d81fda --- /dev/null +++ b/scripts/crowdin/unzip_and_move_bundle.py @@ -0,0 +1,69 @@ +""" +downloads the zip file from Crowdin using the v2 file api +""" + +import glob +import sys +import shutil +import os +from pathlib import Path + + +def unzip(newPath): + +def move(newPath): + """ + """ + + for filePath1 in glob.glob('crowdin\\**\\', recursive=False): + folderName = filePath1.split('\\') + if len(folderName) != 3: + print("This folder name is not in the expected format", filePath1, len(folderName), "Exit program") + sys.exit() + langName = folderName[1].split('-') + if folderName[1] == "zh-TW": + langName[0] = "zh_TW" + elif folderName[1] == "he": + langName[0] = "iw" + elif folderName[1] == "id": + langName[0] = "in" + for filePath2 in glob.iglob(f"crowdin\\{folderName[1]}\\*.properties"): + folderName2 = filePath2.split('\\') + targetFilePrefix = folderName2[2].split("_")[0] + targetFile = "\\" + targetFilePrefix + "_" + langName[0] + ".properties" + targetPath = newPath + targetFile + if os.path.exists(targetPath): + print("already exist", targetPath, "Exit program") + sys.exit() + else: + if targetFilePrefix == "LangSpecificBundle" or targetFilePrefix == "MorphologyBundle": + appendToFile = newPath + "\\InteractiveBundle_" + langName[0] + ".properties" + print("Found", filePath2, "will append to", appendToFile) + my_file = Path(appendToFile) + if my_file.is_file(): + f1 = open(appendToFile, 'a+') + f2 = open(filePath2, 'r') + f1.write(f2.read()) + f1.write("\n"); + f1.close() + f2.close() + else: + print("Cannot find correspond InteractiveBundle file") + sys.exit() + else: + print("copying", filePath2, "to", targetPath) + shutil.copyfile(filePath2, targetPath) + continue + +if __name__ == '__main__': + if len(sys.argv) == 2: + newPath = sys.argv[1] + print("new path", newPath) + else: + # set default target bundle dir + newPath = "bundle_out" + + print("Files will be output to", newPath, "folder") + + unzip() + move(newPath)