From 282dac196795963dee93567a5f143455a9bb9614 Mon Sep 17 00:00:00 2001 From: Stefan Glienke Date: Tue, 28 May 2024 16:59:34 +0200 Subject: [PATCH 1/3] fixed thread pinning - on intel hybrid CPUs it landed on e-core --- Spring.Benchmark.pas | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Spring.Benchmark.pas b/Spring.Benchmark.pas index 89aac67..0753df7 100644 --- a/Spring.Benchmark.pas +++ b/Spring.Benchmark.pas @@ -2967,7 +2967,7 @@ procedure Benchmark_Main(pinThread0: Boolean); {$IFDEF MSWINDOWS} if pinThread0 then begin - SetThreadAffinityMask(GetCurrentThread, 1 shl (CPUCount - 1)); + SetThreadAffinityMask(GetCurrentThread, 1); SetThreadPriority(GetCurrentThread, THREAD_PRIORITY_HIGHEST); end; {$ENDIF} From 51bdc24f90e3aa49219111ad4f4e3d34b4ceb0ae Mon Sep 17 00:00:00 2001 From: Stefan Glienke Date: Tue, 6 Aug 2024 07:21:45 +0200 Subject: [PATCH 2/3] fixed time measurement on Windows - using different winapi functions that return more reliable results on latest intel CPUs --- Spring.Benchmark.pas | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/Spring.Benchmark.pas b/Spring.Benchmark.pas index 0753df7..8cd149a 100644 --- a/Spring.Benchmark.pas +++ b/Spring.Benchmark.pas @@ -782,6 +782,7 @@ TCacheInfo = record public class var numCpus: Integer; cyclesPerSecond: Double; + cycleDuration: Double; caches: TArray; scaling: TScaling; loadAvg: TArray; @@ -986,6 +987,17 @@ type TIterationResults = record kMaxIterations = 1000000000; +{$IFDEF MSWINDOWS} + +function QueryProcessCycleTime(ProcessHandle: THandle; var CycleTime: UInt64): BOOL; stdcall; + external kernel32 name 'QueryProcessCycleTime'; + +function QueryThreadCycleTime(ThreadHandle: THandle; var CycleTime: UInt64): BOOL; stdcall; + external kernel32 name 'QueryThreadCycleTime'; + +{$ENDIF} + + {$REGION 'Freepascal Support'} {$IFDEF FPC} @@ -1683,11 +1695,11 @@ function ProcessCPUUsage: Double; {$IFDEF MSWINDOWS} var proc: THandle; - creationTime, exitTime, kernelTime, userTime: TFileTime; + cycleTime: UInt64; begin proc := GetCurrentProcess; - if GetProcessTimes(proc, creationTime, exitTime, kernelTime, userTime) then - Exit(MakeTime(kernelTime, userTime)); + if QueryProcessCycleTime(proc, cycleTime) then + Exit(cycleTime * TCPUInfo.cycleDuration); DiagnoseAndExit('GetProccessTimes() failed'); Result := 0; end; @@ -1706,11 +1718,11 @@ function ThreadCPUUsage: Double; {$IFDEF MSWINDOWS} var thisThread: THandle; - creationTime, exitTime, kernelTime, userTime: TFileTime; + cycleTime: UInt64; begin thisThread := GetCurrentThread; - if GetThreadTimes(thisThread, creationTime, exitTime, kernelTime, userTime) then - Exit(MakeTime(kernelTime, userTime)); + if QueryThreadCycleTime(thisThread, cycleTime) then + Exit(cycleTime * TCPUInfo.cycleDuration); DiagnoseAndExit('GetThreadTimes() failed'); Result := 0; end; @@ -3819,6 +3831,7 @@ function TBenchmark.Threads(const t: Integer): TBenchmark; begin numCpus := GetNumCPUs; cyclesPerSecond := GetCPUCyclesPerSecond; + cycleDuration := 1 / cyclesPerSecond; caches := GetCacheSizes; scaling := Unknown; loadAvg := nil; @@ -4004,12 +4017,12 @@ class function TBenchmarkFamilies.FindBenchmarks(spec: string; Inc(i); end; - if not IsZero(family.fMinTime) then - instance.name.minTime := Format('minTime:%0.3f', [family.fMinTime]); - if family.fIterations <> 0 then - instance.name.iterations := Format('iterations:%u', [family.fIterations]); - if family.fRepetitions <> 0 then - instance.name.pepetitions := Format('repeats:%d', [family.fRepetitions]); + if not IsZero(family.fMinTime) then + instance.name.minTime := Format('minTime:%0.3f', [family.fMinTime]); + if family.fIterations <> 0 then + instance.name.iterations := Format('iterations:%u', [family.fIterations]); + if family.fRepetitions <> 0 then + instance.name.pepetitions := Format('repeats:%d', [family.fRepetitions]); if family.fMeasureProcessCpuTime then instance.name.timeType := 'processTime'; From 66ab629c91bdff9cae1e55a8c9b660b28510a3f0 Mon Sep 17 00:00:00 2001 From: Stefan Glienke Date: Wed, 7 Aug 2024 13:49:20 +0200 Subject: [PATCH 3/3] added option to control arg formatting --- Spring.Benchmark.pas | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Spring.Benchmark.pas b/Spring.Benchmark.pas index 8cd149a..1f8c0c4 100644 --- a/Spring.Benchmark.pas +++ b/Spring.Benchmark.pas @@ -709,6 +709,10 @@ function Counter(const value: Double; flags: TCounter.TFlags = []; k: TCounter.T // Valid values: 'true'/'yes'/1, 'false'/'no'/0. Defaults to false. benchmark_counters_tabular: Boolean = False; + // Whether to add formatted args to the output. + // Valid values: 'true'/'yes'/1, 'false'/'no'/0. Defaults to true. + benchmark_format_args: Boolean = True; + // The level of verbose logging to output log_level: Integer = 0; @@ -1350,6 +1354,7 @@ procedure PrintUsageAndExit; ' [--benchmark_out_format=]' + sLineBreak + ' [--benchmark_color={auto|true|false}]' + sLineBreak + ' [--benchmark_counters_tabular={true|false}]' + sLineBreak + + ' [--benchmark_format_args={true|false}]' + sLineBreak + ' [--log_level=]'); Halt(0); end; @@ -1383,6 +1388,8 @@ procedure ParseCommandLineFlags; ParseStringFlag(arg, 'benchmark_color', benchmark_color) or ParseBoolFlag(arg, 'benchmark_counters_tabular', benchmark_counters_tabular) or + ParseBoolFlag(arg, 'benchmark_format_args', + benchmark_format_args) or ParseInt32Flag(arg, 'log_level', log_level)) then if IsFlag(arg, 'help') then PrintUsageAndExit @@ -4001,6 +4008,7 @@ class function TBenchmarkFamilies.FindBenchmarks(spec: string; // Add arguments to instance name i := 0; + if benchmark_format_args then for arg in args do begin if instance.name.args <> '' then @@ -4017,12 +4025,15 @@ class function TBenchmarkFamilies.FindBenchmarks(spec: string; Inc(i); end; + if benchmark_format_args then + begin if not IsZero(family.fMinTime) then instance.name.minTime := Format('minTime:%0.3f', [family.fMinTime]); if family.fIterations <> 0 then instance.name.iterations := Format('iterations:%u', [family.fIterations]); if family.fRepetitions <> 0 then instance.name.pepetitions := Format('repeats:%d', [family.fRepetitions]); + end; if family.fMeasureProcessCpuTime then instance.name.timeType := 'processTime';