diff --git a/CHANGELOG.md b/CHANGELOG.md
index d0e694f93..f47f1c94d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,12 @@ Note that Sockeye has checks in place to not translate with an old model that wa
 Each version section may have have subsections for: _Added_, _Changed_, _Removed_, _Deprecated_, and _Fixed_.
 
 
+## [2.1.26]
+
+### Fixed
+
+- Fixes a backwards incompatibility introduced in 2.1.17, which would prevent models trained with prior versions to be used for inference.
+
 ## [2.1.25]
 
 ### Changed
diff --git a/MANIFEST.in b/MANIFEST.in
index f8ba0012b..e307a5fa7 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -7,6 +7,12 @@ include pylintrc
 include .flake8
 include typechecked-files
 include test/data/config_with_missing_attributes.yaml
+include test/data/model_2.1.x/config
+include test/data/model_2.1.x/params.best
+include test/data/model_2.1.x/model_input
+include test/data/model_2.1.x/vocab*
+include test/data/model_2.1.x/version
+include test/data/model_2.1.x/README.md
 include sockeye/git_version.py
 include *.bib
 recursive-include .github *
diff --git a/sockeye/__init__.py b/sockeye/__init__.py
index e2782f453..34f6d9f04 100644
--- a/sockeye/__init__.py
+++ b/sockeye/__init__.py
@@ -11,4 +11,4 @@
 # express or implied. See the License for the specific language governing
 # permissions and limitations under the License.
 
-__version__ = '2.1.25'
+__version__ = '2.1.26'
diff --git a/sockeye/transformer.py b/sockeye/transformer.py
index d764b0ec5..fdc5fc799 100644
--- a/sockeye/transformer.py
+++ b/sockeye/transformer.py
@@ -158,6 +158,15 @@ def __init__(self,
                                                                dropout=config.dropout_prepost,
                                                                prefix=self.autoregr_layer.prefix + "post_",
                                                                num_hidden=config.model_size)
+            # TODO (tdomhan): Remove with next major version bump.
+            # For backwards compatibility with versions prior to 2.1.17 we also store the layers under to previous
+            # attribute name. This way parameters can be loaded as either decoder.layers.0.autoregr_layer.ff_out.weight
+            # or decoder.layers.0.self_attention.ff_out.weight. Parameter deduplication makes sure parameters are stored
+            # and loaded once only.
+            if self.decoder_type == C.TRANSFORMER_TYPE:
+                self.self_attention = self.autoregr_layer
+                self.pre_self_attention = self.pre_autoregr_layer
+                self.post_self_attention = self.post_autoregr_layer
 
             self.pre_enc_attention = TransformerProcessBlock(sequence=config.preprocess_sequence,
                                                              dropout=config.dropout_prepost,
diff --git a/test/data/model_2.1.x/README.md b/test/data/model_2.1.x/README.md
new file mode 100644
index 000000000..8b78b6b06
--- /dev/null
+++ b/test/data/model_2.1.x/README.md
@@ -0,0 +1,22 @@
+The model was generated with the following command:
+```
+-s docs/tutorials/seqcopy/data/dev.source
+-t docs/tutorials/seqcopy/data/dev.target
+-vs docs/tutorials/seqcopy/data/dev.source
+-vt docs/tutorials/seqcopy/data/dev.target
+--transformer-model-size 16
+--num-layers 1:1
+--transformer-attention-heads 2
+--transformer-feed-forward-num-hidden 16
+--overwrite-output
+--use-cpu
+--batch-type sentence
+--batch-size 32
+--decode-and-evaluate 400
+--checkpoint-interval 500
+--initial-learning-rate 0.01
+--max-num-checkpoint-not-improved 4
+-o model
+```
+
+The model_input is just `head dev.source`.
diff --git a/test/data/model_2.1.x/config b/test/data/model_2.1.x/config
new file mode 100644
index 000000000..2d644b413
--- /dev/null
+++ b/test/data/model_2.1.x/config
@@ -0,0 +1,157 @@
+!ModelConfig
+config_data: !DataConfig
+  data_statistics: !DataStatistics
+    average_len_target_per_bucket:
+    - null
+    - 13.44736842105263
+    - 20.568421052631578
+    - 28.053672316384176
+    - null
+    - null
+    - null
+    - null
+    - null
+    - null
+    - null
+    - null
+    buckets:
+    - !!python/tuple
+      - 8
+      - 8
+    - !!python/tuple
+      - 16
+      - 16
+    - !!python/tuple
+      - 24
+      - 24
+    - !!python/tuple
+      - 32
+      - 32
+    - !!python/tuple
+      - 40
+      - 40
+    - !!python/tuple
+      - 48
+      - 48
+    - !!python/tuple
+      - 56
+      - 56
+    - !!python/tuple
+      - 64
+      - 64
+    - !!python/tuple
+      - 72
+      - 72
+    - !!python/tuple
+      - 80
+      - 80
+    - !!python/tuple
+      - 88
+      - 88
+    - !!python/tuple
+      - 96
+      - 96
+    length_ratio_mean: 1.0
+    length_ratio_stats_per_bucket:
+    - &id001 !!python/tuple
+      - null
+      - null
+    - !!python/tuple
+      - 1.0
+      - 0.0
+    - !!python/tuple
+      - 1.0
+      - 0.0
+    - !!python/tuple
+      - 1.0
+      - 0.0
+    - *id001
+    - *id001
+    - *id001
+    - *id001
+    - *id001
+    - *id001
+    - *id001
+    - *id001
+    length_ratio_std: 0.0
+    max_observed_len_source: 31
+    max_observed_len_target: 31
+    num_discarded: 0
+    num_sents: 1000
+    num_sents_per_bucket:
+    - 0
+    - 266
+    - 380
+    - 354
+    - 0
+    - 0
+    - 0
+    - 0
+    - 0
+    - 0
+    - 0
+    - 0
+    num_tokens_source: 21324
+    num_tokens_target: 21324
+    num_unks_source: 0
+    num_unks_target: 0
+    size_vocab_source: 15
+    size_vocab_target: 15
+  max_seq_len_source: 96
+  max_seq_len_target: 96
+  num_source_factors: 1
+config_decoder: !TransformerConfig
+  act_type: relu
+  attention_heads: 2
+  depth_key_value: 16
+  dropout_act: 0.1
+  dropout_attention: 0.1
+  dropout_prepost: 0.1
+  feed_forward_num_hidden: 16
+  lhuc: false
+  max_seq_len_source: 96
+  max_seq_len_target: 96
+  model_size: 16
+  num_layers: 1
+  positional_embedding_type: fixed
+  postprocess_sequence: dr
+  preprocess_sequence: n
+  use_lhuc: false
+config_embed_source: !EmbeddingConfig
+  allow_sparse_grad: true
+  dropout: 0.0
+  factor_configs: null
+  num_embed: 16
+  num_factors: 1
+  vocab_size: 15
+config_embed_target: !EmbeddingConfig
+  allow_sparse_grad: true
+  dropout: 0.0
+  factor_configs: null
+  num_embed: 16
+  num_factors: 1
+  vocab_size: 15
+config_encoder: !TransformerConfig
+  act_type: relu
+  attention_heads: 2
+  depth_key_value: 0
+  dropout_act: 0.1
+  dropout_attention: 0.1
+  dropout_prepost: 0.1
+  feed_forward_num_hidden: 16
+  lhuc: false
+  max_seq_len_source: 96
+  max_seq_len_target: 96
+  model_size: 16
+  num_layers: 1
+  positional_embedding_type: fixed
+  postprocess_sequence: dr
+  preprocess_sequence: n
+  use_lhuc: false
+config_length_task: null
+dtype: float32
+intgemm_custom_lib: /Volumes/CaseSensitive/Projects/CoreMT/sockeye-github/sockeye/libintgemm.so
+lhuc: false
+vocab_source_size: 15
+vocab_target_size: 15
+weight_tying_type: src_trg_softmax
diff --git a/test/data/model_2.1.x/model_input b/test/data/model_2.1.x/model_input
new file mode 100644
index 000000000..8e94f7366
--- /dev/null
+++ b/test/data/model_2.1.x/model_input
@@ -0,0 +1,10 @@
+6 3 0 9 3 2 0 5 3 8 0 1 0 4 1 6 2 8 9 10 3 7 0 4 9 7 5 2 7 7
+2 1 7 7 5 0 5 7 1 7 10 4 0 9 10 5 0 5
+6 8 4 6 1 8 1 3 2 4 0 1 6 4 6 1 0 6 5 4 7 0 5
+8 1 7 6 9 10 10 3 4 7 8 1 9 6 9 5 2 3 1 1
+0 6 4 2 0 6 8 1 0 8 3 7 4 0 8 0 1 2 0 0 8 9 4 1 7 4
+10 1 8 2 3 4 2 3 7 6 6
+7 7 0 1 5 8 8 8 10 1 7 6 7 4 4 0 9 4 2 7 6 3 8 2
+5 7 3 5 3 7 5 5 9 9 7 5 5 5 8 0 10 8 8 5 3 10 5 6 2 9 8 3 7 7
+6 9 4 4 7 6 9 4 5 9 10 1 8 2
+7 0 1 6 0 6 9 7 2 4 3
diff --git a/test/data/model_2.1.x/params.best b/test/data/model_2.1.x/params.best
new file mode 100644
index 000000000..13e33ec10
Binary files /dev/null and b/test/data/model_2.1.x/params.best differ
diff --git a/test/data/model_2.1.x/version b/test/data/model_2.1.x/version
new file mode 100644
index 000000000..91dbb1711
--- /dev/null
+++ b/test/data/model_2.1.x/version
@@ -0,0 +1 @@
+2.1.16
\ No newline at end of file
diff --git a/test/data/model_2.1.x/vocab.src.0.json b/test/data/model_2.1.x/vocab.src.0.json
new file mode 100644
index 000000000..b2e392270
--- /dev/null
+++ b/test/data/model_2.1.x/vocab.src.0.json
@@ -0,0 +1,17 @@
+{
+    "<pad>": 0,
+    "<unk>": 1,
+    "<s>": 2,
+    "</s>": 3,
+    "5": 4,
+    "3": 5,
+    "9": 6,
+    "1": 7,
+    "10": 8,
+    "6": 9,
+    "7": 10,
+    "2": 11,
+    "4": 12,
+    "0": 13,
+    "8": 14
+}
\ No newline at end of file
diff --git a/test/data/model_2.1.x/vocab.trg.0.json b/test/data/model_2.1.x/vocab.trg.0.json
new file mode 100644
index 000000000..b2e392270
--- /dev/null
+++ b/test/data/model_2.1.x/vocab.trg.0.json
@@ -0,0 +1,17 @@
+{
+    "<pad>": 0,
+    "<unk>": 1,
+    "<s>": 2,
+    "</s>": 3,
+    "5": 4,
+    "3": 5,
+    "9": 6,
+    "1": 7,
+    "10": 8,
+    "6": 9,
+    "7": 10,
+    "2": 11,
+    "4": 12,
+    "0": 13,
+    "8": 14
+}
\ No newline at end of file
diff --git a/test/integration/test_backwards_compatibility.py b/test/integration/test_backwards_compatibility.py
new file mode 100644
index 000000000..7dd15b997
--- /dev/null
+++ b/test/integration/test_backwards_compatibility.py
@@ -0,0 +1,55 @@
+# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You may not
+# use this file except in compliance with the License. A copy of the License
+# is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is distributed on
+# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied. See the License for the specific language governing
+# permissions and limitations under the License.
+
+import logging
+import os
+import sys
+from tempfile import TemporaryDirectory
+
+import sockeye.translate
+from unittest.mock import patch
+
+logger = logging.getLogger(__name__)
+
+EXPECTED_OUTPUT = """6 3 0 9 3 2 0 5 3 8 0 1 0 4 1 6 2 8 9 10 3 7 0 4 9 7 5 2 7 7
+2 1 7 7 5 0 5 7 1 7 10 4 0 9 10 5 0 5
+6 8 4 6 1 8 1 3 2 4 0 1 6 4 6 1 0 6 5 4 7 0 5
+8 1 7 6 9 10 10 3 4 7 8 1 9 6 9 5 2 3 1 1
+0 6 4 2 0 6 8 1 0 8 3 7 4 0 8 0 1 2 0 0 8 9 4 1 7 4
+10 1 8 2 3 4 2 3 7 6 6
+7 7 0 1 5 8 8 8 10 1 7 6 7 4 4 0 9 4 2 7 6 3 8 2
+5 7 3 5 3 7 5 5 9 9 7 5 5 5 8 0 10 8 8 5 3 10 5 6 2 9 8 3 7 7
+6 9 4 4 7 6 9 4 5 9 10 1 8 2
+7 0 1 6 0 6 9 7 2 4 3
+"""
+
+
+def test_backwards_compatibility():
+    """
+    This test checks whether the current code can still produce translations with a model that was trained with the
+    same major version.
+    """
+    with TemporaryDirectory() as work_dir:
+        output_file = os.path.join(work_dir, "out")
+        params = """{sockeye} --use-cpu --models {model} --input {input} --output {output} """.format(
+            sockeye=sockeye.translate.__file__,
+            model="test/data/model_2.1.x",
+            input="test/data/model_2.1.x/model_input",
+            output=output_file
+        )
+        logger.info("Translating with params %s", params)
+        with patch.object(sys, "argv", params.split()):
+            sockeye.translate.main()
+
+        with open(output_file) as model_out:
+            assert model_out.read() == EXPECTED_OUTPUT