From 516f8d28ac0695f6a22f01ff28ee2ecb5c3f960f Mon Sep 17 00:00:00 2001
From: Hung-chih Yang <370110+hcyang@users.noreply.github.com>
Date: Mon, 22 Feb 2021 13:13:58 -0800
Subject: [PATCH] EN entity models and revert 20210211 to 20210205 (#6214)

* EN entity models and revert 20210211 to 20210205

* defaults update
---
 Orchestrator/docs/NLRModels.md      | 22 +++++++++++++++-------
 Orchestrator/v0.2/nlr_versions.json | 26 +++++++++++++++++++-------
 2 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/Orchestrator/docs/NLRModels.md b/Orchestrator/docs/NLRModels.md
index bb8658c77..560e391ac 100644
--- a/Orchestrator/docs/NLRModels.md
+++ b/Orchestrator/docs/NLRModels.md
@@ -16,7 +16,7 @@ It is a 6-layer pretrained [Transformer][7] model optimized for conversation.
 Its architecture is pretrained for example-based use ([KNN][3]),
 thus it can be used out of box. This is the default model used if none explicitly specified.
 
-### pretrained.20210211.microsoft.dte.00.06.unicoder_multilingual.onnx
+### pretrained.20210205.microsoft.dte.00.06.unicoder_multilingual.onnx
 This is a high quality multilingual base model for intent detection. It's smaller and faster than its 12-layer alternative.
 It is a 6-layer pretrained pretrained [Transformer][7] model optimized for conversation.
 Its architecture is pretrained for example-based use ([KNN][3]), thus it can be used out of box. The model supports in total 100 languages (full list can be found at [XLMR Supported Languages][8]). 8 languages (EN, ES, DE, FR, IT, JA, PT, and ZH) are fine-tuned with additional data (performance can be found [here](#multilingual-intent-detection-models-evaluation)). 
@@ -40,8 +40,6 @@ This is a high quality multilingual base model for intent detection.
 It is a 12-layer pretrained pretrained [Transformer][7] model optimized for conversation.
 Its architecture is pretrained for example-based use ([KNN][3]), thus it can be used out of box. The model supports in total 100 languages (full list can be found at [XLMR Supported Languages][8]). 8 languages (EN, ES, DE, FR, IT, JA, PT, and ZH) are fine-tuned with additional data (performance can be found [here](#multilingual-intent-detection-models-evaluation)). 
 
-
-
 ## Experimental Models
 
 ### pretrained.20210205.microsoft.dte.00.12.bert_example_ner.en.onnx (experimental)
@@ -49,6 +47,11 @@ This is a high quality EN-only base model for entity extraction.
 It is a 12-layer pretrained pretrained [Transformer][7] model optimized for conversation.
 Its architecture is pretrained for example-based use ([KNN][3]), thus it can be used out of box.
 
+### pretrained.20210218.microsoft.dte.00.12.bert_example_ner.en.onnx (experimental)
+This is a yet another high quality EN-only base model for entity extraction.
+It is a 12-layer pretrained pretrained [Transformer][7] model optimized for conversation.
+Its architecture is pretrained for example-based use ([KNN][3]), thus it can be used out of box.
+
 ### pretrained.20210105.microsoft.dte.00.12.bert_example_ner_multilingual.onnx (experimental)
 This is a high quality multilingual base model for entity extraction.
 It is a 12-layer pretrained pretrained [Transformer][7] model optimized for conversation.
@@ -64,7 +67,12 @@ This is a high quality EN-only base model for entity extraction. It's smaller an
 It is a 6-layer pretrained pretrained [Transformer][7] model optimized for conversation.
 Its architecture is pretrained for example-based use ([KNN][3]), thus it can be used out of box.
 
-### pretrained.20210211.microsoft.dte.00.06.bert_example_ner_multilingual.onnx (experimental)
+### pretrained.20210218.microsoft.dte.00.06.bert_example_ner.en.onnx (experimental)
+This is a high quality EN-only base model for entity extraction. It's smaller and faster than its 12-layer alternative.
+It is a 6-layer pretrained pretrained [Transformer][7] model optimized for conversation.
+Its architecture is pretrained for example-based use ([KNN][3]), thus it can be used out of box.
+
+### pretrained.20210205.microsoft.dte.00.06.bert_example_ner_multilingual.onnx (experimental)
 This is a high quality multilingual base model for entity extraction. It's smaller and faster than its 12-layer alternative.
 It is a 6-layer pretrained pretrained [Transformer][7] model optimized for conversation.
 Its architecture is pretrained for example-based use ([KNN][3]), thus it can be used out of box.
@@ -103,21 +111,21 @@ For a more quantitative comparison analysis of the different models see the foll
 
 | Model                                                        | Base Model | Layers | Encoding time per query | Disk Allocation |
 | ------------------------------------------------------------ | ---------- | ------ | ----------------------- | --------------- |
-| pretrained.20210211.microsoft.dte.00.06.unicoder_multilingual.onnx | Unicoder   | 6      | ~ 16 ms                 | 896M            |
+| pretrained.20210205.microsoft.dte.00.06.unicoder_multilingual.onnx | Unicoder   | 6      | ~ 16 ms                 | 896M            |
 | pretrained.20201210.microsoft.dte.00.12.unicoder_multilingual.onnx | Unicoder   | 12     | ~ 30 ms                 | 1.08G           |
 
 - The following table shows how accurate is each model by training and testing on the same language, evaluated by **micro-average-accuracy** on an internal dataset.
 
 | Model                                                        | de-de | en-us | es-es | es-mx | fr-ca | fr-fr | it-it | ja-jp | pt-br | zh-cn |
 | ------------------------------------------------------------ | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- |
-| pretrained.20210211.microsoft.dte.00.06.unicoder_multilingual.onnx | 0.638 | 0.785 | 0.662 | 0.760 | 0.723 | 0.661 | 0.701 | 0.786 | 0.735 | 0.805 |
+| pretrained.20210205.microsoft.dte.00.06.unicoder_multilingual.onnx | 0.638 | 0.785 | 0.662 | 0.760 | 0.723 | 0.661 | 0.701 | 0.786 | 0.735 | 0.805 |
 | pretrained.20201210.microsoft.dte.00.12.unicoder_multilingual.onnx | 0.642 | 0.764 | 0.646 | 0.754 | 0.722 | 0.636 | 0.689 | 0.789 | 0.725 | 0.809 |
 
 - The following table shows how accurate is each model by training on **en-us** and testing on the different languages, evaluated by **micro-average-accuracy** on an internal dataset.
 
 | Model                                                        | de-de | en-us | es-es | es-mx | fr-ca | fr-fr | it-it | ja-jp | pt-br | zh-cn |
 | ------------------------------------------------------------ | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- |
-| pretrained.20210211.microsoft.dte.00.06.unicoder_multilingual.onnx | 0.495 | 0.785 | 0.530 | 0.621 | 0.560 | 0.518 | 0.546 | 0.663 | 0.568 | 0.687 |
+| pretrained.20210205.microsoft.dte.00.06.unicoder_multilingual.onnx | 0.495 | 0.785 | 0.530 | 0.621 | 0.560 | 0.518 | 0.546 | 0.663 | 0.568 | 0.687 |
 | pretrained.20201210.microsoft.dte.00.12.unicoder_multilingual.onnx | 0.499 | 0.764 | 0.529 | 0.604 | 0.562 | 0.515 | 0.547 | 0.646 | 0.555 | 0.681 |
 
 ### English Entity Extraction Models Evaluation
diff --git a/Orchestrator/v0.2/nlr_versions.json b/Orchestrator/v0.2/nlr_versions.json
index 2d34425af..7df25f7df 100644
--- a/Orchestrator/v0.2/nlr_versions.json
+++ b/Orchestrator/v0.2/nlr_versions.json
@@ -2,7 +2,7 @@
   "version": "0.2",
   "defaults": {
     "en_intent": "pretrained.20200924.microsoft.dte.00.06.en.onnx",
-    "multilingual_intent": "pretrained.20210211.microsoft.dte.00.06.unicoder_multilingual.onnx"
+    "multilingual_intent": "pretrained.20210205.microsoft.dte.00.06.unicoder_multilingual.onnx"
   },
   "models": {
     "pretrained.20200924.microsoft.dte.00.03.en.onnx": {
@@ -29,6 +29,12 @@
       "description": "(experimental) Bot Framework SDK release 4.10 - English ONNX V1.4 12-layer per-token entity base model",
       "minSDKVersion": "4.10.0"
     },
+    "pretrained.20210218.microsoft.dte.00.12.bert_example_ner.en.onnx": {
+      "releaseDate": "02/18/2021",
+      "modelUri": "https://models.botframework.com/models/dte/onnx/pretrained.20210218.microsoft.dte.00.12.bert_example_ner.en.onnx.zip",
+      "description": "(experimental) Bot Framework SDK release 4.10 - English ONNX V1.4 12-layer per-token entity base model",
+      "minSDKVersion": "4.10.0"
+    },
     "pretrained.20201210.microsoft.dte.00.12.unicoder_multilingual.onnx": {
       "releaseDate": "12/10/2020",
       "modelUri": "https://models.botframework.com/models/dte/onnx/pretrained.20201210.microsoft.dte.00.12.unicoder_multilingual.onnx.zip",
@@ -53,15 +59,21 @@
       "description": "(experimental) Bot Framework SDK release 4.10 - English ONNX V1.4 6-layer per-token entity base model",
       "minSDKVersion": "4.10.0"
     },
-    "pretrained.20210211.microsoft.dte.00.06.unicoder_multilingual.onnx": {
-      "releaseDate": "02/11/2021",
-      "modelUri": "https://models.botframework.com/models/dte/onnx/pretrained.20210211.microsoft.dte.00.06.unicoder_multilingual.onnx.zip",
+    "pretrained.20210218.microsoft.dte.00.06.bert_example_ner.en.onnx": {
+      "releaseDate": "02/18/2021",
+      "modelUri": "https://models.botframework.com/models/dte/onnx/pretrained.20210218.microsoft.dte.00.06.bert_example_ner.en.onnx.zip",
+      "description": "(experimental) Bot Framework SDK release 4.10 - English ONNX V1.4 6-layer per-token entity base model",
+      "minSDKVersion": "4.10.0"
+    },
+    "pretrained.20210205.microsoft.dte.00.06.unicoder_multilingual.onnx": {
+      "releaseDate": "02/05/2021",
+      "modelUri": "https://models.botframework.com/models/dte/onnx/pretrained.20210205.microsoft.dte.00.06.unicoder_multilingual.onnx.zip",
       "description": "Bot Framework SDK release 4.10 - Multilingual ONNX V1.4 6-layer per-token intent base model",
       "minSDKVersion": "4.10.0"
     },
-    "pretrained.20210211.microsoft.dte.00.06.bert_example_ner_multilingual.onnx": {
-      "releaseDate": "02/11/2021",
-      "modelUri": "https://models.botframework.com/models/dte/onnx/pretrained.20210211.microsoft.dte.00.06.bert_example_ner_multilingual.onnx.zip",
+    "pretrained.20210205.microsoft.dte.00.06.bert_example_ner_multilingual.onnx": {
+      "releaseDate": "02/05/2021",
+      "modelUri": "https://models.botframework.com/models/dte/onnx/pretrained.20210205.microsoft.dte.00.06.bert_example_ner_multilingual.onnx.zip",
       "description": "(experimental) Bot Framework SDK release 4.10 - Multilingual ONNX V1.4 6-layer per-token entity base model",
       "minSDKVersion": "4.10.0"
     },