diff --git a/README.md b/README.md index 5c03d97..10f9c69 100644 --- a/README.md +++ b/README.md @@ -112,9 +112,33 @@ meson compile benchmark-report ``` -### Benchmark Results +### OPL3 Benchmark Results -**TODO** +Some preliminary benchmarks were run against some very different CPUs: + +| System | OS | CPU | SIMD | Notes +|:-|:-|:-|:-|:-| +| PC | Windows 10 | i7 6700k | x86 SSE4.1 + AVX2 | Home PC | +| BeagleBone Black | Debian 11 | ARM Cortex-A8 | ARMv7 NEON | Headless | +| Raspberry Pi 5 | Debian 12 | ARM Cortex-A76 | ARMv7 NEON | Headless + Heatsink Fan | + +All the systems were updated to their latest software and OS releases. +The compiler was *GCC* for all these machines. +All the scores were played via `aymo_ymf262_play --benchmark --loops 3`, except for the *BBB* which did not loop (too slow!). + +All the systems run `--cpu-ext dummy`, which mimics the overhead of the test harness itself (mostly the score decoder), to subtract it from the actual benchmarks. +The reference implementation is *NukedOPL3*, run as `--cpu-ext none`. + +Here's a summary of the results: + +| CPU | SIMD | Ratio | DevSt | Speedup | +|:-|:-|-:|-:|-:| +| i7 6700k | x86 SSE4.1 | 0.590 | 0.026 | 1.695 | +| i7 6700k | x86 AVX2 | 0.302 | 0.013 | 3.315 | +| ARM Cortex-A8 | ARMv7 NEON | 0.575 | 0.035 | 1.740 | +| ARM Cortex-A76 | ARMv7 NEON | 0.374 | 0.010 | 2.671 | + +![Benchmark Results](./doc/benchmarks/benchmark-results.png) ## Integration diff --git a/doc/benchmarks/BBB_ARM-A8.csv b/doc/benchmarks/BBB_ARM-A8.csv new file mode 100644 index 0000000..22951fc --- /dev/null +++ b/doc/benchmarks/BBB_ARM-A8.csv @@ -0,0 +1,11 @@ +SCORE,dummy,none,arm_neon +"BeyondSN.vgm",0.235000,23.964000,12.382000 +"bmf1_1.ref",0.448000,40.540000,23.893000 +"bmf1_2.ref",0.439000,39.252000,23.368000 +"crusader.raw",0.352000,30.499000,18.735000 +"doofus.dro",0.625000,59.484000,37.468000 +"dro_v2.dro",0.820000,79.142000,45.182000 +"dystopia.ref",1.190000,111.587000,60.619000 +"GALWAY.ref",0.300000,29.206000,15.615000 +"inc.raw",0.120000,10.472000,6.296000 +"loudness.ref",0.023000,1.978000,1.179000 diff --git a/doc/benchmarks/PC_i7-6700k.csv b/doc/benchmarks/PC_i7-6700k.csv new file mode 100644 index 0000000..0c40828 --- /dev/null +++ b/doc/benchmarks/PC_i7-6700k.csv @@ -0,0 +1,11 @@ +SCORE,dummy,none,x86_avx2,x86_sse41 +"BeyondSN.vgm",0.046000,5.109000,1.453000,2.859000 +"bmf1_1.ref",0.093000,8.922000,2.812000,5.343000 +"bmf1_2.ref",0.078000,8.563000,2.688000,5.265000 +"crusader.raw",0.078000,6.859000,2.203000,4.218000 +"doofus.dro",0.125000,13.797000,4.375000,8.078000 +"dro_v2.dro",0.172000,18.312000,5.438000,10.453000 +"dystopia.ref",0.234000,24.437000,7.125000,13.656000 +"GALWAY.ref",0.047000,6.031000,1.860000,3.640000 +"inc.raw",0.031000,2.390000,0.734000,1.406000 +"loudness.ref",0.000000,0.437000,0.141000,0.281000 diff --git a/doc/benchmarks/RPi5_ARM-A76.csv b/doc/benchmarks/RPi5_ARM-A76.csv new file mode 100644 index 0000000..507469a --- /dev/null +++ b/doc/benchmarks/RPi5_ARM-A76.csv @@ -0,0 +1,11 @@ +SCORE,dummy,none,arm_neon +"BeyondSN.vgm",0.076017,7.737659,2.811513 +"bmf1_1.ref",0.146062,14.089469,5.268482 +"bmf1_2.ref",0.142800,13.359588,5.147929 +"crusader.raw",0.114323,10.722416,4.119310 +"doofus.dro",0.210133,21.032906,8.351923 +"dro_v2.dro",0.282348,27.369149,10.219190 +"dystopia.ref",0.396055,36.095204,13.373861 +"GALWAY.ref",0.096829,9.003890,3.521635 +"inc.raw",0.038078,3.576043,1.386682 +"loudness.ref",0.007435,0.682634,0.261767 diff --git a/doc/benchmarks/benchmark-results.png b/doc/benchmarks/benchmark-results.png new file mode 100644 index 0000000..0e47b87 Binary files /dev/null and b/doc/benchmarks/benchmark-results.png differ diff --git a/doc/benchmarks/benchmark-results.xlsx b/doc/benchmarks/benchmark-results.xlsx new file mode 100644 index 0000000..740ef50 Binary files /dev/null and b/doc/benchmarks/benchmark-results.xlsx differ