diff --git a/docs/articles_en/about-openvino/performance-benchmarks.rst b/docs/articles_en/about-openvino/performance-benchmarks.rst index 233067735eae43..5d414ab4909ecb 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks.rst @@ -160,7 +160,7 @@ For a listing of all platforms and configurations used for testing, refer to the **Disclaimers** * Intel® Distribution of OpenVINO™ toolkit performance results are based on release - 2025.0, as of February 05, 2025. + 2025.0 as of January 28, 2025. * OpenVINO Model Server performance results are based on release 2024.5, as of November 20, 2024. diff --git a/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst b/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst index cc702d5eddec73..1163fced6220a5 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/model-accuracy-int8-fp32.rst @@ -66,6 +66,7 @@ the table for more information. - -0.09% - -0.02% - -0.04% + .. list-table:: Model Accuracy for BF16, FP32 and FP16 (FP16: Arc only. BF16: Xeon® 6972P only) :header-rows: 1 @@ -125,7 +126,8 @@ the table for more information. - 0.01% - - -0.03% -.. list-table:: Model Accuracy for AMX-FP16, AMX-INT4, Arc-FP16 and Arc-INT4 (Arc™ A-series) + +.. list-table:: Model Accuracy for AMX-FP16, AMX-INT4, Arc-FP16 and Arc-INT4 (Arc™ B-series) :header-rows: 1 * - OpenVINO™ Model name @@ -135,6 +137,13 @@ the table for more information. - B, AMX-INT4 - C, Arc-FP16 - D, Arc-INT4 + * - GLM4-9B-Chat + - Data Default WWB + - Similarity + - 6.9% + - 3.8% + - 6.3% + - 15.1% * - Qwen-2.5-7B-instruct - Data Default WWB - Similarity @@ -209,6 +218,7 @@ the table for more information. Notes: For all accuracy metrics a "-", (minus sign), indicates an accuracy drop. The Similarity metric is the distance from "perfect" and as such always positive. Similarity is cosine similarity - the dot product of two vectors divided by the product of their lengths. + .. raw:: html diff --git a/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst b/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst index 9d5f92dbadbad2..00815e9f1ffecd 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/performance-benchmarks-faq.rst @@ -63,15 +63,15 @@ Performance Information F.A.Q. - Meta AI - Auto regressive language - 8K - * - `Llama-3.2-3B `__ + * - `Llama-3.2-3B-Instruct `__ - Meta AI - Auto regressive language - 128K - * - `Mistral-7b-V0.1 `__ + * - `Mistral-7b-Instruct-V0.2 `__ - Mistral AI - Auto regressive language - - 4096 - * - `Phi3-4k-mini `__ + - 32K + * - `Phi3-4k-mini-Instruct `__ - Huggingface - Auto regressive language - 4096 @@ -79,20 +79,43 @@ Performance Information F.A.Q. - Huggingface - Auto regressive language - 128K + * - `Qwen-2.5-7B-Instruct `__ + - Huggingface + - Auto regressive language + - 128K * - `Stable-Diffusion-V1-5 `__ - Hugginface - Latent Diffusion Model - 77 + * - `FLUX.1-schnell `__ + - Hugginface + - Latent Adversarial Diffusion Distillation Model + - 256 * - `bert-base-cased `__ - BERT - question / answer - 128 + * - `mask_rcnn_resnet50_atrous_coco `__ + - Mask R-CNN ResNet 50 Atrous + - object instance segmentation + - 800x1365 + * - `mobilenet-v2 `__ + - Mobilenet V2 PyTorch + - classification + - 224x224 + * - `resnet-50 `__ + - ResNet-50_v1_ILSVRC-2012 + - classification + - 224x224 + * - `ssd-resnet34-1200-onnx `__ + - ssd-resnet34 onnx model + - object detection + - 1200x1200 * - `yolov8n `__ - Yolov8nano - object detection - 608x608 - .. dropdown:: Where can I purchase the specific hardware used in the benchmarking? Intel partners with vendors all over the world. For a list of Hardware Manufacturers, see the diff --git a/docs/sphinx_setup/_static/benchmarks_files/data/graph-data-ov.json b/docs/sphinx_setup/_static/benchmarks_files/data/graph-data-ov.json index 9b765d15b1a8b3..55919d39e6cacb 100644 --- a/docs/sphinx_setup/_static/benchmarks_files/data/graph-data-ov.json +++ b/docs/sphinx_setup/_static/benchmarks_files/data/graph-data-ov.json @@ -10,7 +10,7 @@ "Precisions": [ { "int4": "", - "int8": 0.04, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -220,7 +220,7 @@ "Precisions": [ { "int4": "", - "int8": 0.04, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -233,7 +233,7 @@ "Precisions": [ { "int4": "", - "int8": 20456.96, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -501,7 +501,7 @@ { "int4": "", "int8": "", - "fp16": 2.83, + "fp16": "", "fp32": "", "bf16": "" } @@ -799,7 +799,7 @@ "bf16": "" } ], - "Unit": "ms", + "Unit": "sec", "UnitDesc": "lower is better" } } @@ -955,7 +955,7 @@ "Precisions": [ { "int4": "", - "int8": 1.78, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -968,13 +968,13 @@ "Precisions": [ { "int4": "", - "int8": 561.11, + "int8": 16.0, "fp16": "", "fp32": "", "bf16": "" } ], - "Unit": "ms", + "Unit": "sec", "UnitDesc": "lower is better" } } @@ -1429,42 +1429,7 @@ "bf16": "" } ], - "Unit": "ms", - "UnitDesc": "lower is better" - } - } - }, - { - "Platform": "Intel® Arc™ A-Series Graphics dGPU", - "Model": "stable-diffusion-v1-5", - "featured_SKU": true, - "whats_new_model": false, - "PlatformType": "Accelerator Platforms", - "Parameters": { - "throughput": { - "Precisions": [ - { - "int4": "", - "int8": "", - "fp16": "", - "fp32": "", - "bf16": "" - } - ], - "Unit": "Tokens per sec", - "UnitDesc": "higher is better" - }, - "latency": { - "Precisions": [ - { - "int4": "", - "int8": 2.18, - "fp16": 2.19, - "fp32": "", - "bf16": "" - } - ], - "Unit": "ms", + "Unit": "sec", "UnitDesc": "lower is better" } } @@ -1865,7 +1830,7 @@ "Precisions": [ { "int4": "", - "int8": 0.14, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -1878,7 +1843,7 @@ "Precisions": [ { "int4": "", - "int8": 6835.33, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -2075,7 +2040,7 @@ "Precisions": [ { "int4": "", - "int8": 0.06, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -2088,7 +2053,7 @@ "Precisions": [ { "int4": "", - "int8": 16196.18, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -2285,9 +2250,9 @@ "Precisions": [ { "int4": "", - "int8": 0.18, + "int8": "", "fp16": "", - "fp32": 0.35, + "fp32": "", "bf16": "" } ], @@ -2530,9 +2495,9 @@ "Precisions": [ { "int4": "", - "int8": 0.17, + "int8": "", "fp16": "", - "fp32": 0.04, + "fp32": "", "bf16": "" } ], @@ -2543,7 +2508,7 @@ "Precisions": [ { "int4": "", - "int8": 5759.21, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -2952,7 +2917,7 @@ "int4": "", "int8": "", "fp16": "", - "fp32": 0.87, + "fp32": "", "bf16": "" } ], @@ -3197,7 +3162,7 @@ "int4": "", "int8": "", "fp16": "", - "fp32": 0.33, + "fp32": "", "bf16": "" } ], @@ -4094,41 +4059,6 @@ } } }, - { - "Platform": "Intel® Core™ Ultra 7 processor 155H iGPU-only", - "Model": "stable-diffusion-v1-5", - "featured_SKU": true, - "whats_new_model": false, - "PlatformType": "Intel® Core™, iGPU-only", - "Parameters": { - "throughput": { - "Precisions": [ - { - "int4": "", - "int8": "", - "fp16": "", - "fp32": "", - "bf16": "" - } - ], - "Unit": "Tokens per sec", - "UnitDesc": "higher is better" - }, - "latency": { - "Precisions": [ - { - "int4": "", - "int8": 11.12, - "fp16": 10.73, - "fp32": "", - "bf16": "" - } - ], - "Unit": "ms", - "UnitDesc": "lower is better" - } - } - }, { "Platform": "Intel® Core™ Ultra 7 processor 155H iGPU-only", "Model": "stable-diffusion-v1-5", @@ -4159,7 +4089,7 @@ "bf16": "" } ], - "Unit": "ms", + "Unit": "sec", "UnitDesc": "lower is better" } } @@ -4282,7 +4212,7 @@ "int4": "", "int8": "", "fp16": "", - "fp32": 0.54, + "fp32": "", "bf16": "" } ], @@ -4527,7 +4457,7 @@ "int4": "", "int8": "", "fp16": "", - "fp32": 0.23, + "fp32": "", "bf16": "" } ], @@ -5191,7 +5121,7 @@ { "int4": "", "int8": "", - "fp16": 0.75, + "fp16": "", "fp32": "", "bf16": "" } @@ -5459,41 +5389,6 @@ } } }, - { - "Platform": "Intel® Core™ Ultra 9 processor 288V iGPU-only", - "Model": "stable-diffusion-v1-5", - "featured_SKU": true, - "whats_new_model": false, - "PlatformType": "Intel® Core™, iGPU-only", - "Parameters": { - "throughput": { - "Precisions": [ - { - "int4": "", - "int8": "", - "fp16": "", - "fp32": "", - "bf16": "" - } - ], - "Unit": "Tokens per sec", - "UnitDesc": "higher is better" - }, - "latency": { - "Precisions": [ - { - "int4": "", - "int8": 5.46, - "fp16": 5.61, - "fp32": "", - "bf16": "" - } - ], - "Unit": "ms", - "UnitDesc": "lower is better" - } - } - }, { "Platform": "Intel® Core™ Ultra 9 processor 288V iGPU-only", "Model": "stable-diffusion-v1-5", @@ -5524,7 +5419,7 @@ "bf16": "" } ], - "Unit": "ms", + "Unit": "sec", "UnitDesc": "lower is better" } } @@ -5612,7 +5507,7 @@ "int4": "", "int8": "", "fp16": "", - "fp32": 0.15, + "fp32": "", "bf16": "" } ], @@ -5855,7 +5750,7 @@ "Precisions": [ { "int4": "", - "int8": 0.43, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -5868,7 +5763,7 @@ "Precisions": [ { "int4": "", - "int8": 2514.0, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -6277,7 +6172,7 @@ "int4": "", "int8": "", "fp16": "", - "fp32": 0.22, + "fp32": "", "bf16": "" } ], @@ -6520,7 +6415,7 @@ "Precisions": [ { "int4": "", - "int8": 0.57, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -6533,7 +6428,7 @@ "Precisions": [ { "int4": "", - "int8": 2127.48, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -6940,9 +6835,9 @@ "Precisions": [ { "int4": "", - "int8": 1.68, + "int8": "", "fp16": "", - "fp32": 0.49, + "fp32": "", "bf16": "" } ], @@ -6953,7 +6848,7 @@ "Precisions": [ { "int4": "", - "int8": 734.76, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -7185,9 +7080,9 @@ "Precisions": [ { "int4": "", - "int8": 0.59, + "int8": "", "fp16": "", - "fp32": 0.39, + "fp32": "", "bf16": "" } ], @@ -7430,9 +7325,9 @@ "Precisions": [ { "int4": "", - "int8": 0.71, + "int8": "", "fp16": "", - "fp32": 0.19, + "fp32": "", "bf16": "" } ], @@ -7443,7 +7338,7 @@ "Precisions": [ { "int4": "", - "int8": 1352.54, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -7852,7 +7747,7 @@ "int4": "", "int8": "", "fp16": "", - "fp32": 0.14, + "fp32": "", "bf16": "" } ], @@ -8095,9 +7990,9 @@ "Precisions": [ { "int4": "", - "int8": 0.5, + "int8": "", "fp16": "", - "fp32": 0.13, + "fp32": "", "bf16": "" } ], @@ -8108,7 +8003,7 @@ "Precisions": [ { "int4": "", - "int8": 1834.27, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -8515,7 +8410,7 @@ "Precisions": [ { "int4": "", - "int8": 1.14, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -8760,9 +8655,9 @@ "Precisions": [ { "int4": "", - "int8": 1.35, + "int8": "", "fp16": "", - "fp32": 0.35, + "fp32": "", "bf16": "" } ], @@ -8773,7 +8668,7 @@ "Precisions": [ { "int4": "", - "int8": 895.22, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -9145,9 +9040,9 @@ "Precisions": [ { "int4": "", - "int8": 0.54, + "int8": "", "fp16": "", - "fp32": 0.48, + "fp32": "", "bf16": "" } ], @@ -9392,7 +9287,7 @@ "int4": "", "int8": "", "fp16": "", - "fp32": 0.18, + "fp32": "", "bf16": "" } ], @@ -9810,7 +9705,7 @@ "Precisions": [ { "int4": "", - "int8": 1.3, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -9823,13 +9718,13 @@ "Precisions": [ { "int4": "", - "int8": 765.77, + "int8": 36.8, "fp16": "", "fp32": "", "bf16": "" } ], - "Unit": "ms", + "Unit": "sec", "UnitDesc": "lower is better" } } @@ -10254,41 +10149,6 @@ } } }, - { - "Platform": "Intel® Data Center GPU Flex 170 dGPU", - "Model": "stable-diffusion-v1-5", - "featured_SKU": false, - "whats_new_model": false, - "PlatformType": "Accelerator Platforms", - "Parameters": { - "throughput": { - "Precisions": [ - { - "int4": "", - "int8": "", - "fp16": "", - "fp32": "", - "bf16": "" - } - ], - "Unit": "Tokens per sec", - "UnitDesc": "higher is better" - }, - "latency": { - "Precisions": [ - { - "int4": "", - "int8": 1.91, - "fp16": 1.92, - "fp32": "", - "bf16": "" - } - ], - "Unit": "ms", - "UnitDesc": "lower is better" - } - } - }, { "Platform": "Intel® Data Center GPU Flex 170 dGPU", "Model": "stable-diffusion-v1-5", @@ -10319,7 +10179,7 @@ "bf16": "" } ], - "Unit": "ms", + "Unit": "sec", "UnitDesc": "lower is better" } } @@ -10545,7 +10405,7 @@ "Precisions": [ { "int4": "", - "int8": 0.16, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -10558,7 +10418,7 @@ "Precisions": [ { "int4": "", - "int8": 6247.36, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -10768,7 +10628,7 @@ "Precisions": [ { "int4": "", - "int8": 21420.36, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -10920,7 +10780,7 @@ } }, { - "Platform": "Intel® Xeon® GNR29 CPU-only", + "Platform": "Intel® Xeon® Gold 5218T CPU-only", "Model": "bert-base-cased", "featured_SKU": false, "whats_new_model": false, @@ -10930,9 +10790,9 @@ "Precisions": [ { "int4": "", - "int8": "", + "int8": 216.65, "fp16": "", - "fp32": "", + "fp32": 80.03, "bf16": "" } ], @@ -10943,7 +10803,7 @@ "Precisions": [ { "int4": "", - "int8": 4.47, + "int8": 14.43, "fp16": "", "fp32": "", "bf16": "" @@ -10955,8 +10815,8 @@ } }, { - "Platform": "Intel® Xeon® GNR29 CPU-only", - "Model": "mobilenet-v2", + "Platform": "Intel® Xeon® Gold 5218T CPU-only", + "Model": "mask_rcnn_resnet50_atrous_coco", "featured_SKU": false, "whats_new_model": false, "PlatformType": "Intel® Xeon®, CPU-only", @@ -10978,7 +10838,7 @@ "Precisions": [ { "int4": "", - "int8": 1.93, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -10990,8 +10850,8 @@ } }, { - "Platform": "Intel® Xeon® GNR29 CPU-only", - "Model": "resnet-50", + "Platform": "Intel® Xeon® Gold 5218T CPU-only", + "Model": "mobilenet-v2", "featured_SKU": false, "whats_new_model": false, "PlatformType": "Intel® Xeon®, CPU-only", @@ -11000,9 +10860,9 @@ "Precisions": [ { "int4": "", - "int8": "", + "int8": 5447.03, "fp16": "", - "fp32": "", + "fp32": 1910.7, "bf16": "" } ], @@ -11013,7 +10873,7 @@ "Precisions": [ { "int4": "", - "int8": 1.88, + "int8": 1.38, "fp16": "", "fp32": "", "bf16": "" @@ -11025,8 +10885,8 @@ } }, { - "Platform": "Intel® Xeon® GNR29 CPU-only", - "Model": "ssd-resnet34-1200", + "Platform": "Intel® Xeon® Gold 5218T CPU-only", + "Model": "resnet-50", "featured_SKU": false, "whats_new_model": false, "PlatformType": "Intel® Xeon®, CPU-only", @@ -11035,9 +10895,9 @@ "Precisions": [ { "int4": "", - "int8": "", + "int8": 958.08, "fp16": "", - "fp32": "", + "fp32": 269.68, "bf16": "" } ], @@ -11048,7 +10908,7 @@ "Precisions": [ { "int4": "", - "int8": 9.35, + "int8": 3.08, "fp16": "", "fp32": "", "bf16": "" @@ -11060,8 +10920,8 @@ } }, { - "Platform": "Intel® Xeon® GNR29 CPU-only", - "Model": "yolo_v8n", + "Platform": "Intel® Xeon® Gold 5218T CPU-only", + "Model": "ssd-resnet34-1200", "featured_SKU": false, "whats_new_model": false, "PlatformType": "Intel® Xeon®, CPU-only", @@ -11070,9 +10930,9 @@ "Precisions": [ { "int4": "", - "int8": "", + "int8": 17.6, "fp16": "", - "fp32": "", + "fp32": 4.58, "bf16": "" } ], @@ -11083,7 +10943,7 @@ "Precisions": [ { "int4": "", - "int8": 3.9, + "int8": 116.38, "fp16": "", "fp32": "", "bf16": "" @@ -11096,18 +10956,18 @@ }, { "Platform": "Intel® Xeon® Gold 5218T CPU-only", - "Model": "bert-base-cased", + "Model": "yolo11", "featured_SKU": false, - "whats_new_model": false, + "whats_new_model": true, "PlatformType": "Intel® Xeon®, CPU-only", "Parameters": { "throughput": { "Precisions": [ { "int4": "", - "int8": 216.65, + "int8": "", "fp16": "", - "fp32": 80.03, + "fp32": 206.34, "bf16": "" } ], @@ -11118,7 +10978,7 @@ "Precisions": [ { "int4": "", - "int8": 14.43, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -11131,7 +10991,7 @@ }, { "Platform": "Intel® Xeon® Gold 5218T CPU-only", - "Model": "mask_rcnn_resnet50_atrous_coco", + "Model": "yolo_v8n", "featured_SKU": false, "whats_new_model": false, "PlatformType": "Intel® Xeon®, CPU-only", @@ -11140,9 +11000,9 @@ "Precisions": [ { "int4": "", - "int8": 3.15, + "int8": 451.74, "fp16": "", - "fp32": 0.91, + "fp32": 174.3, "bf16": "" } ], @@ -11153,7 +11013,7 @@ "Precisions": [ { "int4": "", - "int8": "", + "int8": 5.96, "fp16": "", "fp32": "", "bf16": "" @@ -11165,8 +11025,8 @@ } }, { - "Platform": "Intel® Xeon® Gold 5218T CPU-only", - "Model": "mobilenet-v2", + "Platform": "Intel® Xeon® Gold 6338N CPU-only", + "Model": "bert-base-cased", "featured_SKU": false, "whats_new_model": false, "PlatformType": "Intel® Xeon®, CPU-only", @@ -11175,9 +11035,9 @@ "Precisions": [ { "int4": "", - "int8": 5447.03, + "int8": 622.61, "fp16": "", - "fp32": 1910.7, + "fp32": 241.02, "bf16": "" } ], @@ -11188,182 +11048,7 @@ "Precisions": [ { "int4": "", - "int8": 1.38, - "fp16": "", - "fp32": "", - "bf16": "" - } - ], - "Unit": "ms", - "UnitDesc": "lower is better" - } - } - }, - { - "Platform": "Intel® Xeon® Gold 5218T CPU-only", - "Model": "resnet-50", - "featured_SKU": false, - "whats_new_model": false, - "PlatformType": "Intel® Xeon®, CPU-only", - "Parameters": { - "throughput": { - "Precisions": [ - { - "int4": "", - "int8": 958.08, - "fp16": "", - "fp32": 269.68, - "bf16": "" - } - ], - "Unit": "FPS", - "UnitDesc": "higher is better" - }, - "latency": { - "Precisions": [ - { - "int4": "", - "int8": 3.08, - "fp16": "", - "fp32": "", - "bf16": "" - } - ], - "Unit": "ms", - "UnitDesc": "lower is better" - } - } - }, - { - "Platform": "Intel® Xeon® Gold 5218T CPU-only", - "Model": "ssd-resnet34-1200", - "featured_SKU": false, - "whats_new_model": false, - "PlatformType": "Intel® Xeon®, CPU-only", - "Parameters": { - "throughput": { - "Precisions": [ - { - "int4": "", - "int8": 17.6, - "fp16": "", - "fp32": 4.58, - "bf16": "" - } - ], - "Unit": "FPS", - "UnitDesc": "higher is better" - }, - "latency": { - "Precisions": [ - { - "int4": "", - "int8": 116.38, - "fp16": "", - "fp32": "", - "bf16": "" - } - ], - "Unit": "ms", - "UnitDesc": "lower is better" - } - } - }, - { - "Platform": "Intel® Xeon® Gold 5218T CPU-only", - "Model": "yolo11", - "featured_SKU": false, - "whats_new_model": true, - "PlatformType": "Intel® Xeon®, CPU-only", - "Parameters": { - "throughput": { - "Precisions": [ - { - "int4": "", - "int8": "", - "fp16": "", - "fp32": 206.34, - "bf16": "" - } - ], - "Unit": "FPS", - "UnitDesc": "higher is better" - }, - "latency": { - "Precisions": [ - { - "int4": "", - "int8": "", - "fp16": "", - "fp32": "", - "bf16": "" - } - ], - "Unit": "ms", - "UnitDesc": "lower is better" - } - } - }, - { - "Platform": "Intel® Xeon® Gold 5218T CPU-only", - "Model": "yolo_v8n", - "featured_SKU": false, - "whats_new_model": false, - "PlatformType": "Intel® Xeon®, CPU-only", - "Parameters": { - "throughput": { - "Precisions": [ - { - "int4": "", - "int8": 451.74, - "fp16": "", - "fp32": 174.3, - "bf16": "" - } - ], - "Unit": "FPS", - "UnitDesc": "higher is better" - }, - "latency": { - "Precisions": [ - { - "int4": "", - "int8": 5.96, - "fp16": "", - "fp32": "", - "bf16": "" - } - ], - "Unit": "ms", - "UnitDesc": "lower is better" - } - } - }, - { - "Platform": "Intel® Xeon® Gold 6338N CPU-only", - "Model": "bert-base-cased", - "featured_SKU": false, - "whats_new_model": false, - "PlatformType": "Intel® Xeon®, CPU-only", - "Parameters": { - "throughput": { - "Precisions": [ - { - "int4": "", - "int8": 622.61, - "fp16": "", - "fp32": 241.02, - "bf16": "" - } - ], - "Unit": "FPS", - "UnitDesc": "higher is better" - }, - "latency": { - "Precisions": [ - { - "int4": "", - "int8": 6.35, + "int8": 6.35, "fp16": "", "fp32": "", "bf16": "" @@ -11387,7 +11072,7 @@ "int4": "", "int8": "", "fp16": "", - "fp32": 2.45, + "fp32": "", "bf16": "" } ], @@ -11632,7 +11317,7 @@ "int4": "", "int8": "", "fp16": "", - "fp32": 2.25, + "fp32": "", "bf16": "" } ], @@ -11875,7 +11560,7 @@ "Precisions": [ { "int4": "", - "int8": 0.18, + "int8": "", "fp16": "", "fp32": "", "bf16": "" @@ -11888,13 +11573,13 @@ "Precisions": [ { "int4": "", - "int8": 5505.78, + "int8": 113.3, "fp16": "", "fp32": "", "bf16": "" } ], - "Unit": "ms", + "Unit": "sec", "UnitDesc": "lower is better" } } @@ -12087,7 +11772,7 @@ "int4": "", "int8": "", "fp16": "", - "fp32": 3.44, + "fp32": "", "bf16": "" } ], @@ -12384,42 +12069,7 @@ "bf16": "" } ], - "Unit": "ms", - "UnitDesc": "lower is better" - } - } - }, - { - "Platform": "Intel® Xeon® Platinum 8380 CPU-only", - "Model": "stable-diffusion-v1-5", - "featured_SKU": false, - "whats_new_model": false, - "PlatformType": "Intel® Xeon®, CPU-only", - "Parameters": { - "throughput": { - "Precisions": [ - { - "int4": "", - "int8": "", - "fp16": "", - "fp32": "", - "bf16": "" - } - ], - "Unit": "Tokens per sec", - "UnitDesc": "higher is better" - }, - "latency": { - "Precisions": [ - { - "int4": "", - "int8": 12.65, - "fp16": 12.92, - "fp32": "", - "bf16": "" - } - ], - "Unit": "ms", + "Unit": "sec", "UnitDesc": "lower is better" } } @@ -12540,8 +12190,8 @@ "Precisions": [ { "int4": "", - "int8": 0.73, - "fp16": 0.64, + "int8": "", + "fp16": "", "fp32": "", "bf16": "" } @@ -12553,13 +12203,13 @@ "Precisions": [ { "int4": "", - "int8": 1357.53, - "fp16": 1544.72, + "int8": 27.6, + "fp16": "", "fp32": "", "bf16": "" } ], - "Unit": "ms", + "Unit": "sec", "UnitDesc": "lower is better" } } @@ -12752,7 +12402,7 @@ "int4": "", "int8": "", "fp16": "", - "fp32": 5.18, + "fp32": "", "bf16": "" } ], @@ -13019,41 +12669,6 @@ } } }, - { - "Platform": "Intel® Xeon® Platinum 8480+ CPU-only", - "Model": "stable-diffusion-v1-5", - "featured_SKU": true, - "whats_new_model": false, - "PlatformType": "Intel® Xeon®, CPU-only", - "Parameters": { - "throughput": { - "Precisions": [ - { - "int4": "", - "int8": "", - "fp16": "", - "fp32": "", - "bf16": "" - } - ], - "Unit": "Tokens per sec", - "UnitDesc": "higher is better" - }, - "latency": { - "Precisions": [ - { - "int4": "", - "int8": 3.36, - "fp16": 3.31, - "fp32": "", - "bf16": "" - } - ], - "Unit": "ms", - "UnitDesc": "lower is better" - } - } - }, { "Platform": "Intel® Xeon® Platinum 8480+ CPU-only", "Model": "stable-diffusion-v1-5", @@ -13084,7 +12699,7 @@ "bf16": "" } ], - "Unit": "ms", + "Unit": "sec", "UnitDesc": "lower is better" } } @@ -13205,8 +12820,8 @@ "Precisions": [ { "int4": "", - "int8": 0.77, - "fp16": 0.68, + "int8": "", + "fp16": "", "fp32": "", "bf16": "" } @@ -13218,13 +12833,13 @@ "Precisions": [ { "int4": "", - "int8": 1290.6, - "fp16": 1468.28, + "int8": 26.2, + "fp16": "", "fp32": "", "bf16": "" } ], - "Unit": "ms", + "Unit": "sec", "UnitDesc": "lower is better" } } @@ -13417,7 +13032,7 @@ "int4": "", "int8": "", "fp16": "", - "fp32": 6.41, + "fp32": "", "bf16": "" } ], @@ -13684,41 +13299,6 @@ } } }, - { - "Platform": "Intel® Xeon® Platinum 8580 CPU-only", - "Model": "stable-diffusion-v1-5", - "featured_SKU": true, - "whats_new_model": false, - "PlatformType": "Intel® Xeon®, CPU-only", - "Parameters": { - "throughput": { - "Precisions": [ - { - "int4": "", - "int8": "", - "fp16": "", - "fp32": "", - "bf16": "" - } - ], - "Unit": "Tokens per sec", - "UnitDesc": "higher is better" - }, - "latency": { - "Precisions": [ - { - "int4": "", - "int8": 3.03, - "fp16": 2.94, - "fp32": "", - "bf16": "" - } - ], - "Unit": "ms", - "UnitDesc": "lower is better" - } - } - }, { "Platform": "Intel® Xeon® Platinum 8580 CPU-only", "Model": "stable-diffusion-v1-5", @@ -13749,7 +13329,7 @@ "bf16": "" } ], - "Unit": "ms", + "Unit": "sec", "UnitDesc": "lower is better" } } @@ -13874,7 +13454,7 @@ "int4": "", "int8": "", "fp16": "", - "fp32": 0.22, + "fp32": "", "bf16": "" } ], @@ -14080,7 +13660,7 @@ "Platform": "Intel® Core™ Ultra 7 Processor 265H", "Model": "flux.1-schnell", "featured_SKU": true, - "whats_new_model": false, + "whats_new_model": true, "PlatformType": "Intel® Core™, iGPU-only", "IE_type": "iGPU", "Parameters": { @@ -14123,7 +13703,7 @@ "throughput": { "Precisions": [ { - "int4": 45.3, + "int4": "", "int8": "", "fp16": "", "fp32": "", @@ -14136,7 +13716,7 @@ "latency": { "Precisions": [ { - "int4": 22.05, + "int4": "", "int8": "", "fp16": "", "fp32": "", @@ -14159,7 +13739,7 @@ "throughput": { "Precisions": [ { - "int4": 64.5, + "int4": 12.6, "int8": "", "fp16": "", "fp32": "", @@ -14172,7 +13752,7 @@ "latency": { "Precisions": [ { - "int4": 15.5, + "int4": 79.5, "int8": "", "fp16": "", "fp32": "", @@ -14195,7 +13775,7 @@ "throughput": { "Precisions": [ { - "int4": 69.7, + "int4": 13.1, "int8": "", "fp16": "", "fp32": "", @@ -14208,7 +13788,7 @@ "latency": { "Precisions": [ { - "int4": 14.3, + "int4": 76.1, "int8": "", "fp16": "", "fp32": "", @@ -14231,7 +13811,7 @@ "throughput": { "Precisions": [ { - "int4": 72.1, + "int4": 12.5, "int8": "", "fp16": "", "fp32": "", @@ -14244,7 +13824,7 @@ "latency": { "Precisions": [ { - "int4": 13.9, + "int4": 79.8, "int8": "", "fp16": "", "fp32": "", @@ -14267,7 +13847,7 @@ "throughput": { "Precisions": [ { - "int4": 121.4, + "int4": 27.2, "int8": "", "fp16": "", "fp32": "", @@ -14280,7 +13860,7 @@ "latency": { "Precisions": [ { - "int4": 8.2, + "int4": 36.7, "int8": "", "fp16": "", "fp32": "", @@ -14305,7 +13885,7 @@ { "int4": "", "int8": "", - "fp16": 1.96, + "fp16": "", "fp32": "", "bf16": "" } @@ -14339,7 +13919,7 @@ "throughput": { "Precisions": [ { - "int4": 70.3, + "int4": 14.0, "int8": "", "fp16": "", "fp32": "", @@ -14352,7 +13932,7 @@ "latency": { "Precisions": [ { - "int4": 14.2, + "int4": 71.6, "int8": "", "fp16": "", "fp32": "", @@ -14411,7 +13991,7 @@ "throughput": { "Precisions": [ { - "int4": 108.5, + "int4": 19.9, "int8": "", "fp16": "", "fp32": "", @@ -14424,7 +14004,7 @@ "latency": { "Precisions": [ { - "int4": 9.2, + "int4": 50.2, "int8": "", "fp16": "", "fp32": "", @@ -14447,7 +14027,7 @@ "throughput": { "Precisions": [ { - "int4": 78.4, + "int4": 13.3, "int8": "", "fp16": "", "fp32": "", @@ -14460,7 +14040,7 @@ "latency": { "Precisions": [ { - "int4": 12.8, + "int4": 75.0, "int8": "", "fp16": "", "fp32": "", @@ -14483,7 +14063,7 @@ "throughput": { "Precisions": [ { - "int4": 78.4, + "int4": 13.3, "int8": "", "fp16": "", "fp32": "", @@ -14496,7 +14076,7 @@ "latency": { "Precisions": [ { - "int4": 12.8, + "int4": 75.2, "int8": "", "fp16": "", "fp32": "", @@ -14605,8 +14185,8 @@ "Precisions": [ { "int4": "", - "int8": 5.25, - "fp16": 5.1, + "int8": 5.4, + "fp16": 5.4, "fp32": "", "bf16": "" } @@ -14688,6 +14268,42 @@ } } }, + { + "Platform": "Intel® Core™ Ultra 7 Processor 265H", + "Model": "llama-2-7b-chat", + "featured_SKU": true, + "whats_new_model": true, + "PlatformType": "Intel® Core™, NPU-only", + "IE_type": "NPU", + "Parameters": { + "throughput": { + "Precisions": [ + { + "int4": 10.4, + "int8": "", + "fp16": "", + "fp32": "", + "bf16": "" + } + ], + "Unit": "Tokens per sec", + "UnitDesc": "higher is better" + }, + "latency": { + "Precisions": [ + { + "int4": 95.8, + "int8": "", + "fp16": "", + "fp32": "", + "bf16": "" + } + ], + "Unit": "ms", + "UnitDesc": "lower is better" + } + } + }, { "Platform": "Intel® Core™ Ultra 7 Processor 265H", "Model": "mask_rcnn_resnet50_atrous_coco", @@ -14724,6 +14340,78 @@ } } }, + { + "Platform": "Intel® Core™ Ultra 7 Processor 265H", + "Model": "mistral-7B-instruct-V0.2", + "featured_SKU": true, + "whats_new_model": true, + "PlatformType": "Intel® Core™, NPU-only", + "IE_type": "NPU", + "Parameters": { + "throughput": { + "Precisions": [ + { + "int4": 8.0, + "int8": "", + "fp16": "", + "fp32": "", + "bf16": "" + } + ], + "Unit": "Tokens per sec", + "UnitDesc": "higher is better" + }, + "latency": { + "Precisions": [ + { + "int4": 125.6, + "int8": "", + "fp16": "", + "fp32": "", + "bf16": "" + } + ], + "Unit": "ms", + "UnitDesc": "lower is better" + } + } + }, + { + "Platform": "Intel® Core™ Ultra 7 Processor 265H", + "Model": "phi-3-mini-4k-instruct", + "featured_SKU": true, + "whats_new_model": true, + "PlatformType": "Intel® Core™, NPU-only", + "IE_type": "NPU", + "Parameters": { + "throughput": { + "Precisions": [ + { + "int4": 11.1, + "int8": "", + "fp16": "", + "fp32": "", + "bf16": "" + } + ], + "Unit": "Tokens per sec", + "UnitDesc": "higher is better" + }, + "latency": { + "Precisions": [ + { + "int4": 90.1, + "int8": "", + "fp16": "", + "fp32": "", + "bf16": "" + } + ], + "Unit": "ms", + "UnitDesc": "lower is better" + } + } + }, { "Platform": "Intel® Core™ Ultra 7 Processor 265H", "Model": "mobilenet-v2", @@ -14918,7 +14606,7 @@ "int4": "", "int8": "", "fp16": "", - "fp32": 0.24, + "fp32": "", "bf16": "" } ], @@ -15314,8 +15002,8 @@ "int4": "", "int8": "", "fp16": "", - "fp32": 11.92, - "bf16": 83.42 + "fp32": "", + "bf16": "" } ], "Unit": "FPS",