-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathproject.yml
67 lines (61 loc) · 2.04 KB
/
project.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
title: "Essay Grammar Checker"
description: >
Essay Grammar Checker trained on Russian Error-Annotated Learner English Corpus (REALEC).
vars:
config: "default"
lang: "en"
corpus: "realec"
train_name: "train"
dev_name: "dev"
package_name: "grammar_checker"
package_version: "1.0.2"
errors: grammar_major grammar_minor vocabulary spelling punctuation articles
gpu: 0
directories: ["assets", "metrics", "configs", "training", "packages"]
assets:
- dest: 'assets/${vars.corpus}/data_realec.tar.bz2'
url: 'https://www.dropbox.com/s/sde5lfe56m348u8/data_realec.tar.bz2?dl=1'
workflows:
all:
- preprocess
- generate_configs
- train_pipelines
- evaluate_pipelines
- assemble_pipelines
- package
commands:
- name: preprocess
help: "Convert data to spaCy format required for SpanCategorizer"
script:
- tar xvf assets/${vars.corpus}/data_realec.tar.bz2 -C assets/${vars.corpus}
- python scripts/spans_to_dict.py assets/${vars.corpus} ${vars.lang}
- rm assets/${vars.corpus}/data_realec.tar.bz2
- mv -v assets/${vars.corpus} ./
deps:
- "assets/${vars.corpus}/data_realec.tar.bz2"
- name: generate_configs
help: "Config class weights update"
script:
- "python scripts/generate_configs.py configs/default.cfg assets"
deps:
- "${vars.corpus}"
- "configs/default.cfg"
- name: train_pipelines
help: "Training"
script:
- sh scripts/train.sh ${vars.errors}
- name: evaluate_pipelines
help: "Evaluate pipelines"
script:
- sh scripts/evaluate.sh ${vars.errors}
- name: assemble_pipelines
help: "Model Assembly"
script:
- sh scripts/save_configs.sh ${vars.errors}
- "python scripts/assemble_pipelines.py grammar_checker_nlp"
- name: package
help: "Package an assembled model"
script:
- "python -m spacy package grammar_checker_nlp packages --name ${vars.package_name} --version ${vars.package_version} --code scripts/custom_factories.py --force --build wheel"
deps:
- "grammar_checker_nlp"