diff --git a/CHANGELOG.md b/CHANGELOG.md index d0e694f93..f47f1c94d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,12 @@ Note that Sockeye has checks in place to not translate with an old model that wa Each version section may have have subsections for: _Added_, _Changed_, _Removed_, _Deprecated_, and _Fixed_. +## [2.1.26] + +### Fixed + +- Fixes a backwards incompatibility introduced in 2.1.17, which would prevent models trained with prior versions to be used for inference. + ## [2.1.25] ### Changed diff --git a/MANIFEST.in b/MANIFEST.in index f8ba0012b..e307a5fa7 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -7,6 +7,12 @@ include pylintrc include .flake8 include typechecked-files include test/data/config_with_missing_attributes.yaml +include test/data/model_2.1.x/config +include test/data/model_2.1.x/params.best +include test/data/model_2.1.x/model_input +include test/data/model_2.1.x/vocab* +include test/data/model_2.1.x/version +include test/data/model_2.1.x/README.md include sockeye/git_version.py include *.bib recursive-include .github * diff --git a/sockeye/__init__.py b/sockeye/__init__.py index e2782f453..34f6d9f04 100644 --- a/sockeye/__init__.py +++ b/sockeye/__init__.py @@ -11,4 +11,4 @@ # express or implied. See the License for the specific language governing # permissions and limitations under the License. -__version__ = '2.1.25' +__version__ = '2.1.26' diff --git a/sockeye/transformer.py b/sockeye/transformer.py index d764b0ec5..fdc5fc799 100644 --- a/sockeye/transformer.py +++ b/sockeye/transformer.py @@ -158,6 +158,15 @@ def __init__(self, dropout=config.dropout_prepost, prefix=self.autoregr_layer.prefix + "post_", num_hidden=config.model_size) + # TODO (tdomhan): Remove with next major version bump. + # For backwards compatibility with versions prior to 2.1.17 we also store the layers under to previous + # attribute name. This way parameters can be loaded as either decoder.layers.0.autoregr_layer.ff_out.weight + # or decoder.layers.0.self_attention.ff_out.weight. Parameter deduplication makes sure parameters are stored + # and loaded once only. + if self.decoder_type == C.TRANSFORMER_TYPE: + self.self_attention = self.autoregr_layer + self.pre_self_attention = self.pre_autoregr_layer + self.post_self_attention = self.post_autoregr_layer self.pre_enc_attention = TransformerProcessBlock(sequence=config.preprocess_sequence, dropout=config.dropout_prepost, diff --git a/test/data/model_2.1.x/README.md b/test/data/model_2.1.x/README.md new file mode 100644 index 000000000..8b78b6b06 --- /dev/null +++ b/test/data/model_2.1.x/README.md @@ -0,0 +1,22 @@ +The model was generated with the following command: +``` +-s docs/tutorials/seqcopy/data/dev.source +-t docs/tutorials/seqcopy/data/dev.target +-vs docs/tutorials/seqcopy/data/dev.source +-vt docs/tutorials/seqcopy/data/dev.target +--transformer-model-size 16 +--num-layers 1:1 +--transformer-attention-heads 2 +--transformer-feed-forward-num-hidden 16 +--overwrite-output +--use-cpu +--batch-type sentence +--batch-size 32 +--decode-and-evaluate 400 +--checkpoint-interval 500 +--initial-learning-rate 0.01 +--max-num-checkpoint-not-improved 4 +-o model +``` + +The model_input is just `head dev.source`. diff --git a/test/data/model_2.1.x/config b/test/data/model_2.1.x/config new file mode 100644 index 000000000..2d644b413 --- /dev/null +++ b/test/data/model_2.1.x/config @@ -0,0 +1,157 @@ +!ModelConfig +config_data: !DataConfig + data_statistics: !DataStatistics + average_len_target_per_bucket: + - null + - 13.44736842105263 + - 20.568421052631578 + - 28.053672316384176 + - null + - null + - null + - null + - null + - null + - null + - null + buckets: + - !!python/tuple + - 8 + - 8 + - !!python/tuple + - 16 + - 16 + - !!python/tuple + - 24 + - 24 + - !!python/tuple + - 32 + - 32 + - !!python/tuple + - 40 + - 40 + - !!python/tuple + - 48 + - 48 + - !!python/tuple + - 56 + - 56 + - !!python/tuple + - 64 + - 64 + - !!python/tuple + - 72 + - 72 + - !!python/tuple + - 80 + - 80 + - !!python/tuple + - 88 + - 88 + - !!python/tuple + - 96 + - 96 + length_ratio_mean: 1.0 + length_ratio_stats_per_bucket: + - &id001 !!python/tuple + - null + - null + - !!python/tuple + - 1.0 + - 0.0 + - !!python/tuple + - 1.0 + - 0.0 + - !!python/tuple + - 1.0 + - 0.0 + - *id001 + - *id001 + - *id001 + - *id001 + - *id001 + - *id001 + - *id001 + - *id001 + length_ratio_std: 0.0 + max_observed_len_source: 31 + max_observed_len_target: 31 + num_discarded: 0 + num_sents: 1000 + num_sents_per_bucket: + - 0 + - 266 + - 380 + - 354 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + num_tokens_source: 21324 + num_tokens_target: 21324 + num_unks_source: 0 + num_unks_target: 0 + size_vocab_source: 15 + size_vocab_target: 15 + max_seq_len_source: 96 + max_seq_len_target: 96 + num_source_factors: 1 +config_decoder: !TransformerConfig + act_type: relu + attention_heads: 2 + depth_key_value: 16 + dropout_act: 0.1 + dropout_attention: 0.1 + dropout_prepost: 0.1 + feed_forward_num_hidden: 16 + lhuc: false + max_seq_len_source: 96 + max_seq_len_target: 96 + model_size: 16 + num_layers: 1 + positional_embedding_type: fixed + postprocess_sequence: dr + preprocess_sequence: n + use_lhuc: false +config_embed_source: !EmbeddingConfig + allow_sparse_grad: true + dropout: 0.0 + factor_configs: null + num_embed: 16 + num_factors: 1 + vocab_size: 15 +config_embed_target: !EmbeddingConfig + allow_sparse_grad: true + dropout: 0.0 + factor_configs: null + num_embed: 16 + num_factors: 1 + vocab_size: 15 +config_encoder: !TransformerConfig + act_type: relu + attention_heads: 2 + depth_key_value: 0 + dropout_act: 0.1 + dropout_attention: 0.1 + dropout_prepost: 0.1 + feed_forward_num_hidden: 16 + lhuc: false + max_seq_len_source: 96 + max_seq_len_target: 96 + model_size: 16 + num_layers: 1 + positional_embedding_type: fixed + postprocess_sequence: dr + preprocess_sequence: n + use_lhuc: false +config_length_task: null +dtype: float32 +intgemm_custom_lib: /Volumes/CaseSensitive/Projects/CoreMT/sockeye-github/sockeye/libintgemm.so +lhuc: false +vocab_source_size: 15 +vocab_target_size: 15 +weight_tying_type: src_trg_softmax diff --git a/test/data/model_2.1.x/model_input b/test/data/model_2.1.x/model_input new file mode 100644 index 000000000..8e94f7366 --- /dev/null +++ b/test/data/model_2.1.x/model_input @@ -0,0 +1,10 @@ +6 3 0 9 3 2 0 5 3 8 0 1 0 4 1 6 2 8 9 10 3 7 0 4 9 7 5 2 7 7 +2 1 7 7 5 0 5 7 1 7 10 4 0 9 10 5 0 5 +6 8 4 6 1 8 1 3 2 4 0 1 6 4 6 1 0 6 5 4 7 0 5 +8 1 7 6 9 10 10 3 4 7 8 1 9 6 9 5 2 3 1 1 +0 6 4 2 0 6 8 1 0 8 3 7 4 0 8 0 1 2 0 0 8 9 4 1 7 4 +10 1 8 2 3 4 2 3 7 6 6 +7 7 0 1 5 8 8 8 10 1 7 6 7 4 4 0 9 4 2 7 6 3 8 2 +5 7 3 5 3 7 5 5 9 9 7 5 5 5 8 0 10 8 8 5 3 10 5 6 2 9 8 3 7 7 +6 9 4 4 7 6 9 4 5 9 10 1 8 2 +7 0 1 6 0 6 9 7 2 4 3 diff --git a/test/data/model_2.1.x/params.best b/test/data/model_2.1.x/params.best new file mode 100644 index 000000000..13e33ec10 Binary files /dev/null and b/test/data/model_2.1.x/params.best differ diff --git a/test/data/model_2.1.x/version b/test/data/model_2.1.x/version new file mode 100644 index 000000000..91dbb1711 --- /dev/null +++ b/test/data/model_2.1.x/version @@ -0,0 +1 @@ +2.1.16 \ No newline at end of file diff --git a/test/data/model_2.1.x/vocab.src.0.json b/test/data/model_2.1.x/vocab.src.0.json new file mode 100644 index 000000000..b2e392270 --- /dev/null +++ b/test/data/model_2.1.x/vocab.src.0.json @@ -0,0 +1,17 @@ +{ + "": 0, + "": 1, + "": 2, + "": 3, + "5": 4, + "3": 5, + "9": 6, + "1": 7, + "10": 8, + "6": 9, + "7": 10, + "2": 11, + "4": 12, + "0": 13, + "8": 14 +} \ No newline at end of file diff --git a/test/data/model_2.1.x/vocab.trg.0.json b/test/data/model_2.1.x/vocab.trg.0.json new file mode 100644 index 000000000..b2e392270 --- /dev/null +++ b/test/data/model_2.1.x/vocab.trg.0.json @@ -0,0 +1,17 @@ +{ + "": 0, + "": 1, + "": 2, + "": 3, + "5": 4, + "3": 5, + "9": 6, + "1": 7, + "10": 8, + "6": 9, + "7": 10, + "2": 11, + "4": 12, + "0": 13, + "8": 14 +} \ No newline at end of file diff --git a/test/integration/test_backwards_compatibility.py b/test/integration/test_backwards_compatibility.py new file mode 100644 index 000000000..7dd15b997 --- /dev/null +++ b/test/integration/test_backwards_compatibility.py @@ -0,0 +1,55 @@ +# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not +# use this file except in compliance with the License. A copy of the License +# is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. + +import logging +import os +import sys +from tempfile import TemporaryDirectory + +import sockeye.translate +from unittest.mock import patch + +logger = logging.getLogger(__name__) + +EXPECTED_OUTPUT = """6 3 0 9 3 2 0 5 3 8 0 1 0 4 1 6 2 8 9 10 3 7 0 4 9 7 5 2 7 7 +2 1 7 7 5 0 5 7 1 7 10 4 0 9 10 5 0 5 +6 8 4 6 1 8 1 3 2 4 0 1 6 4 6 1 0 6 5 4 7 0 5 +8 1 7 6 9 10 10 3 4 7 8 1 9 6 9 5 2 3 1 1 +0 6 4 2 0 6 8 1 0 8 3 7 4 0 8 0 1 2 0 0 8 9 4 1 7 4 +10 1 8 2 3 4 2 3 7 6 6 +7 7 0 1 5 8 8 8 10 1 7 6 7 4 4 0 9 4 2 7 6 3 8 2 +5 7 3 5 3 7 5 5 9 9 7 5 5 5 8 0 10 8 8 5 3 10 5 6 2 9 8 3 7 7 +6 9 4 4 7 6 9 4 5 9 10 1 8 2 +7 0 1 6 0 6 9 7 2 4 3 +""" + + +def test_backwards_compatibility(): + """ + This test checks whether the current code can still produce translations with a model that was trained with the + same major version. + """ + with TemporaryDirectory() as work_dir: + output_file = os.path.join(work_dir, "out") + params = """{sockeye} --use-cpu --models {model} --input {input} --output {output} """.format( + sockeye=sockeye.translate.__file__, + model="test/data/model_2.1.x", + input="test/data/model_2.1.x/model_input", + output=output_file + ) + logger.info("Translating with params %s", params) + with patch.object(sys, "argv", params.split()): + sockeye.translate.main() + + with open(output_file) as model_out: + assert model_out.read() == EXPECTED_OUTPUT