Skip to content

Commit

Permalink
make dictionaries updatable (#117)
Browse files Browse the repository at this point in the history
* updatable metadata and files

* mvp

* release test

* fixes

* fix folder

* try npm installing

* echo and fix

* fix mb

* axlr8

* try printf

* last fix of the night

* try curl

* fix mb

* fix maybe

* fix maybe

* try fix

* make curl follow redirects

* fix merge-ipa mkdir

* cleanup
  • Loading branch information
StefanVukovic99 authored Jul 20, 2024
1 parent cf96143 commit 1374083
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 51 deletions.
62 changes: 28 additions & 34 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
- name: Load Languages
id: load-languages
run: |
languages=$(jq -r '.[].language' languages.json | jq -R -s -c 'split("\n") | map(select(. != ""))')
languages=$(jq '[.[] | select(.hasEdition == true) | .language]' languages.json | jq -c 'map(.)')
echo "languages=$languages" >> $GITHUB_OUTPUT
isos=$(jq -r '.[].iso' languages.json | jq -R -s -c 'split("\n") | map(select(. != ""))')
echo "isos=$isos" >> $GITHUB_OUTPUT
Expand All @@ -56,21 +56,12 @@ jobs:
run: |
./tools/generate-downloads-table.sh
- name: Create pull request
- name: Create pull request if necessary
uses: peter-evans/create-pull-request@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
title: "Update downloads.md with list of .zip files"
body: "This PR updates the downloads.md file with a table listing all .zip files."

- name: Release
uses: softprops/action-gh-release@v2
with:
name: ${{ steps.tag.outputs.calver }}
tag_name: ${{ steps.tag.outputs.tag }}
prerelease: true
generate_release_notes: true

body: "This PR updates the downloads.md file with a table listing all .zip files."

convert:
needs: prepare
Expand Down Expand Up @@ -106,15 +97,6 @@ jobs:

- name: Run auto.sh script
run: ./auto.sh "${{ matrix.edition_language }}" ? ?

- name: Generate list of .zip files
id: generate_file_list
run: |
find data/language -type f -name '*.zip' > zip_files.txt
cat zip_files.txt
echo "zip_files<<EOF" >> $GITHUB_OUTPUT
cat zip_files.txt >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT

- name: Upload dictionary files
uses: softprops/action-gh-release@v2
Expand All @@ -123,7 +105,16 @@ jobs:
tag_name: ${{ needs.prepare.outputs.tag }}
prerelease: true
generate_release_notes: true
files: ${{ steps.generate_file_list.outputs.zip_files }}
files: "data/language/**/*.zip"

- name: Upload index.json files
uses: softprops/action-gh-release@v2
with:
name: ${{ needs.prepare.outputs.calver }}
tag_name: ${{ needs.prepare.outputs.tag }}
prerelease: true
generate_release_notes: true
files: "data/language/**/*-index.json"

merge-ipa:
needs: ["prepare", "convert"]
Expand All @@ -136,10 +127,13 @@ jobs:
uses: actions/setup-node@v3
with:
node-version: '20'

- name: Install npm dependencies
run: npm install

- name: Download IPA dicts
run: |
mapfile -t iso_array < <(echo "$isos" | jq -r '.[]')
mapfile -t iso_array < <(printf '%s' '${{needs.prepare.outputs.isos}}' | jq -r '.[]')
supported_editions="de en es fr ru zh"
for source_iso in "${iso_array[@]}"; do
for target_iso in "${iso_array[@]}"; do
Expand All @@ -152,7 +146,7 @@ jobs:
continue
fi
url="https://github.com/themoeway/kaikki-to-yomitan/releases/download/${{needs.prepare.outputs.tag}}/kty-${source_iso}-${target_iso}-ipa.zip"
wget -nv "$url"
curl -s -O -f -L "$url" || echo "Skipping $url due to an error."
done
done
Expand All @@ -161,15 +155,6 @@ jobs:

- name: Delete downloaded IPA files
run: rm *.zip

- name: Generate list of .zip files
id: generate_file_list
run: |
find data/language -type f -name '*.zip' > zip_files.txt
cat zip_files.txt
echo "zip_files<<EOF" >> $GITHUB_OUTPUT
cat zip_files.txt >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT

- name: Upload dictionary files
uses: softprops/action-gh-release@v2
Expand All @@ -178,4 +163,13 @@ jobs:
tag_name: ${{ needs.prepare.outputs.tag }}
prerelease: true
generate_release_notes: true
files: ${{ steps.generate_file_list.outputs.zip_files }}
files: "data/language/**/*.zip"

- name: Upload index.json files
uses: softprops/action-gh-release@v2
with:
name: ${{ needs.prepare.outputs.calver }}
tag_name: ${{ needs.prepare.outputs.tag }}
prerelease: true
generate_release_notes: true
files: "data/language/**/*-index.json"
10 changes: 8 additions & 2 deletions 4-make-yomitan.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ const {
temp_folder: writeFolder
} = process.env;

const latestDownloadLink = 'https://github.com/themoeway/kaikki-to-yomitan/releases/latest/download/';

const indexJson = {
format: 3,
revision: currentDate,
Expand Down Expand Up @@ -438,10 +440,14 @@ function getTagStyles(folder){
}

function writeIndex(folder) {
const title = `${DICT_NAME}-${source_iso}-${target_iso}` + (folder === 'dict' ? '' : '-ipa');
writeFileSync(`${writeFolder}/${folder}/index.json`, JSON.stringify({
...indexJson,
title: `${DICT_NAME}-${source_iso}-${target_iso}` + (folder === 'dict' ? '' : '-ipa'),
}));
title,
isUpdatable: true,
indexUrl: `${latestDownloadLink}${title}-index.json`,
downloadUrl: `${latestDownloadLink}${title}.zip`,
}));
}

function processTags(lemmaTags, senseTags, parenthesesTags, pos) {
Expand Down
22 changes: 18 additions & 4 deletions auto.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ convertMainDict(){
export temp_folder="data/temp"
temp_dict_folder="$temp_folder/dict"
temp_ipa_folder="$temp_folder/ipa"
dict_file="${DICT_NAME}-$source_iso-$target_iso.zip"
ipa_file="${DICT_NAME}-$source_iso-$target_iso-ipa.zip"
dict_title=${DICT_NAME}-$source_iso-$target_iso
dict_file="${dict_title}.zip"
ipa_file="${dict_title}-ipa.zip"

# Step 5: Create Yomitan files
if \
Expand All @@ -49,6 +50,14 @@ convertMainDict(){
mv "$ipa_file" "data/language/$source_iso/$target_iso/"
fi

if [ -f "$temp_dict_folder/index.json" ]; then
mv "$temp_dict_folder/index.json" "data/language/$source_iso/$target_iso/$dict_title-index.json"
fi

if [ -f "$temp_ipa_folder/index.json" ]; then
mv "$temp_ipa_folder/index.json" "data/language/$source_iso/$target_iso/${dict_title}-ipa-index.json"
fi

echo "----------------------------------------------------------------------------------"
return 0
}
Expand All @@ -60,7 +69,8 @@ convertGlossary(){
export target_iso="$gloss_iso"
export temp_folder="data/temp"

dict_file="${DICT_NAME}-$source_iso-$target_iso-gloss.zip"
dict_title="${DICT_NAME}-$source_iso-$target_iso-gloss"
dict_file="${dict_title}.zip"

# Step 4: Create Yomitan files
echo "Creating Yomitan dict files"
Expand All @@ -79,6 +89,10 @@ convertGlossary(){
if [ -f "$dict_file" ]; then
mv "$dict_file" "$output_folder"
fi

if [ -f "$temp_folder/dict/index.json" ]; then
mv "$temp_folder/dict/index.json" "$output_folder/${dict_title}-index.json"
fi
}

source .env
Expand Down Expand Up @@ -255,7 +269,7 @@ for edition_lang in "${languages[@]}"; do
export gloss_iso=$(echo "${gloss_lang}" | jq -r '.iso')
gloss_lang_name=$(echo "${gloss_lang}" | jq -r '.language')

if [ "$gloss_lang" = "$edition_name" ]; then
if [ "$gloss_lang_name" = "$edition_name" ]; then
continue
fi

Expand Down
11 changes: 9 additions & 2 deletions make-glossary.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
const LineByLineReader = require('line-by-line');
const { consoleOverwrite, clearConsoleLine, logProgress, findPartOfSpeech, loadJsonArray, writeInBatches, currentDate } = require('./util/util');
const { readdirSync, unlinkSync, writeFileSync } = require('fs');
const { isUint16Array } = require('util/types');

const {
source_iso: sourceIso,
Expand Down Expand Up @@ -41,17 +42,23 @@ function processTranslations(translations, glosses, senses, sense){
const partsOfSpeech = loadJsonArray(`data/language/target-language-tags/en/parts_of_speech.json`);
const skippedPartsOfSpeech = {};

const url = 'https://github.com/themoeway/kaikki-to-yomitan';
const latestReleaseUrl = `${url}/releases/latest/download/`;
const title = `kty-${sourceIso}-${targetIso}-gloss`;
const indexJson = {
title: `kty-${sourceIso}-${targetIso}-gloss`,
title: title,
format: 3,
revision: currentDate,
sequenced: true,
author: 'Kaikki-to-Yomitan contributors',
url: 'https://github.com/themoeway/kaikki-to-yomitan',
url,
description: 'Dictionaries for various language pairs generated from Wiktionary data, via Kaikki and Kaikki-to-Yomitan.',
attribution: 'https://kaikki.org/',
sourceLanguage: sourceIso,
targetLanguage: targetIso,
isUpdatable: true,
indexUrl: `${latestReleaseUrl}${title}-index.json`,
downloadUrl: `${latestReleaseUrl}${title}.zip`,
};

const ymtLemmas = [];
Expand Down
31 changes: 22 additions & 9 deletions merge-ipa.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ const { writeInBatches } = require('./util/util');
const date = require('date-and-time');
const now = new Date();

const tempFolder = 'data/temp/ipa';
mkdirSync(tempFolder, { recursive: true });

async function main(){
const languages = JSON.parse(readFileSync('languages.json', 'utf8'));

Expand Down Expand Up @@ -76,26 +79,36 @@ async function main(){
if(globalIpaLength) console.log("globalIpa", globalIpaLength);
const globalTagsLength = globalTags.length;
if(globalTagsLength) console.log("globalTags", globalTagsLength);


const url = 'https://github.com/themoeway/kaikki-to-yomitan';
const title = `kty-${sourceIso}-ipa`;
const latestReleaseUrl = `${url}/releases/latest/download/${title}`;
const globalIndex = {
"format": 3,
"revision": date.format(now, 'YYYY.MM.DD'),
"sequenced": true,
"title": `kty-${sourceIso}-ipa`
title,
url,
"isUpdatable": true,
"indexUrl": `${latestReleaseUrl}-index.json`,
"downloadUrl": `${latestReleaseUrl}.zip`,
}

if(globalIpaLength){

for (const file of readdirSync('data/temp/ipa')) {
unlinkSync(`data/temp/ipa/${file}`);
for (const file of readdirSync(tempFolder)) {
unlinkSync(`${tempFolder}/${file}`);
}

writeFileSync(`data/temp/ipa/index.json`, JSON.stringify(globalIndex, null, 4));
writeInBatches('data/temp/ipa', Object.values(globalIpa), 'term_meta_bank_', 500000);
writeInBatches('data/temp/ipa', globalTags, 'tag_bank_', 50000);

writeFileSync(`${tempFolder}/index.json`, JSON.stringify(globalIndex, null, 4));
writeInBatches(tempFolder, Object.values(globalIpa), 'term_meta_bank_', 500000);
writeInBatches(tempFolder, globalTags, 'tag_bank_', 50000);

mkdirSync(`data/language/${sourceIso}`, { recursive: true });
execSync(`zip -j data/language/${sourceIso}/kty-${sourceIso}-ipa.zip data/temp/ipa/*`);
outputFolder = `data/language/${sourceIso}/`;
mkdirSync(outputFolder, { recursive: true });
execSync(`zip -j ${outputFolder}/${title}.zip ${tempFolder}/*`);
writeFileSync(`${outputFolder}/${title}-index.json`, JSON.stringify(globalIndex, null, 4));
}
}
}
Expand Down

0 comments on commit 1374083

Please sign in to comment.