From cb086a7466895dddfe11b2dcf133fdd3dc80c7ec Mon Sep 17 00:00:00 2001 From: Gold John King Date: Fri, 3 Jan 2025 01:16:41 +0800 Subject: [PATCH] Fix performance drawbacks since v2.1.2, remove redundant variants --- ide/v217/mimalloc.sln | 9 -- ide/v217/mimalloc.vcxproj | 279 -------------------------------------- src/cma/cma_api.cpp | 8 +- src/cma/cma_utils.cpp | 67 +-------- src/cma/cma_utils.h | 4 - src/init.c | 8 +- 6 files changed, 7 insertions(+), 368 deletions(-) diff --git a/ide/v217/mimalloc.sln b/ide/v217/mimalloc.sln index bf3a7afe3..fd27c5f72 100644 --- a/ide/v217/mimalloc.sln +++ b/ide/v217/mimalloc.sln @@ -8,19 +8,10 @@ EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Release|x64 = Release|x64 - Rel-LockPages|x64 = Rel-LockPages|x64 - Rel-NoCollect|x64 = Rel-NoCollect|x64 - Rel-ScheduledCollect|x64 = Rel-ScheduledCollect|x64 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {DFDFDF91-D72E-314A-BD77-88B833A76415}.Release|x64.ActiveCfg = Release|x64 {DFDFDF91-D72E-314A-BD77-88B833A76415}.Release|x64.Build.0 = Release|x64 - {DFDFDF91-D72E-314A-BD77-88B833A76415}.Rel-LockPages|x64.ActiveCfg = Rel-LockPages|x64 - {DFDFDF91-D72E-314A-BD77-88B833A76415}.Rel-LockPages|x64.Build.0 = Rel-LockPages|x64 - {DFDFDF91-D72E-314A-BD77-88B833A76415}.Rel-NoCollect|x64.ActiveCfg = Rel-NoCollect|x64 - {DFDFDF91-D72E-314A-BD77-88B833A76415}.Rel-NoCollect|x64.Build.0 = Rel-NoCollect|x64 - {DFDFDF91-D72E-314A-BD77-88B833A76415}.Rel-ScheduledCollect|x64.ActiveCfg = Rel-ScheduledCollect|x64 - {DFDFDF91-D72E-314A-BD77-88B833A76415}.Rel-ScheduledCollect|x64.Build.0 = Rel-ScheduledCollect|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/ide/v217/mimalloc.vcxproj b/ide/v217/mimalloc.vcxproj index 066464893..399da5ed9 100644 --- a/ide/v217/mimalloc.vcxproj +++ b/ide/v217/mimalloc.vcxproj @@ -4,18 +4,6 @@ x64 - - Rel-LockPages - x64 - - - Rel-NoCollect - x64 - - - Rel-ScheduledCollect - x64 - Release x64 @@ -36,24 +24,6 @@ Intel C++ Compiler 2025 true - - DynamicLibrary - MultiByte - Intel C++ Compiler 2025 - true - - - DynamicLibrary - MultiByte - Intel C++ Compiler 2025 - true - - - DynamicLibrary - MultiByte - Intel C++ Compiler 2025 - true - @@ -64,29 +34,11 @@ <_ProjectFileVersion>10.0.20506.1 $(SolutionDir)Release\ - $(SolutionDir)Release\ - $(SolutionDir)Release\ - $(SolutionDir)Release\ mimalloc.dir\Release\ - mimalloc.dir\Release\ - mimalloc.dir\Release\ - mimalloc.dir\Release\ mimalloc_v217 - mimalloc_v217_scheduled_collect - mimalloc_v217_no_collect - mimalloc_v217_lock_pages .dll - .dll - .dll - .dll false - false - false - false true - true - true - true @@ -147,183 +99,6 @@ false - - - ..\..\include;%(AdditionalIncludeDirectories) - %(AdditionalOptions) /Zc:__cplusplus - $(IntDir) - Sync - AnySuitable - stdcpp20 - MaxSpeedHighLevel - NotUsing - MultiThreaded - false - false - Level3 - %(PreprocessorDefinitions);WIN32;_WINDOWS;NDEBUG;MI_SKIP_COLLECT_ON_EXIT=1;CMAKE_INTDIR="Release";CMA_SCHEDULED_COLLECT - $(IntDir) - - - stdc17 - true - Speed - Fast - false - true - MultiFile - ALDERLAKE - CompileAsCpp - - - ..\..\include;%(AdditionalIncludeDirectories) - $(ProjectDir)/$(IntDir) - %(Filename).h - %(Filename).tlb - %(Filename)_i.c - %(Filename)_p.c - - - - - - - - - psapi.lib;shell32.lib;user32.lib;advapi32.lib;bcrypt.lib;..\..\bin\mimalloc-redirect.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;comdlg32.lib;advapi32.lib - %(AdditionalLibraryDirectories) - %(AdditionalOptions) /machine:x64 - false - %(IgnoreSpecificDefaultLibraries) - $(OutputPath)mimalloc.lib - $(OutputPath)mimalloc.pdb - Console - true - true - true - - - false - - - - - ..\..\include;%(AdditionalIncludeDirectories) - %(AdditionalOptions) /Zc:__cplusplus - $(IntDir) - Sync - AnySuitable - stdcpp20 - MaxSpeedHighLevel - NotUsing - MultiThreaded - false - false - Level3 - %(PreprocessorDefinitions);WIN32;_WINDOWS;NDEBUG;MI_SKIP_COLLECT_ON_EXIT=1;CMAKE_INTDIR="Release";CMA_NO_COLLECT - $(IntDir) - - - stdc17 - true - Speed - Fast - false - true - MultiFile - ALDERLAKE - CompileAsCpp - - - ..\..\include;%(AdditionalIncludeDirectories) - $(ProjectDir)/$(IntDir) - %(Filename).h - %(Filename).tlb - %(Filename)_i.c - %(Filename)_p.c - - - - - - - - - psapi.lib;shell32.lib;user32.lib;advapi32.lib;bcrypt.lib;..\..\bin\mimalloc-redirect.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;comdlg32.lib;advapi32.lib - %(AdditionalLibraryDirectories) - %(AdditionalOptions) /machine:x64 - false - %(IgnoreSpecificDefaultLibraries) - $(OutputPath)mimalloc.lib - $(OutputPath)mimalloc.pdb - Console - true - true - true - - - false - - - - - ..\..\include;%(AdditionalIncludeDirectories) - %(AdditionalOptions) /Zc:__cplusplus - $(IntDir) - Sync - AnySuitable - stdcpp20 - MaxSpeedHighLevel - NotUsing - MultiThreaded - false - false - Level3 - %(PreprocessorDefinitions);WIN32;_WINDOWS;NDEBUG;MI_SKIP_COLLECT_ON_EXIT=1;CMAKE_INTDIR="Release";CMA_LOCK_PAGES - $(IntDir) - - - stdc17 - true - Speed - Fast - false - true - MultiFile - ALDERLAKE - CompileAsCpp - - - ..\..\include;%(AdditionalIncludeDirectories) - $(ProjectDir)/$(IntDir) - %(Filename).h - %(Filename).tlb - %(Filename)_i.c - %(Filename)_p.c - - - - - - - - - psapi.lib;shell32.lib;user32.lib;advapi32.lib;bcrypt.lib;..\..\bin\mimalloc-redirect.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;comdlg32.lib;advapi32.lib - %(AdditionalLibraryDirectories) - %(AdditionalOptions) /machine:x64 - false - %(IgnoreSpecificDefaultLibraries) - $(OutputPath)mimalloc.lib - $(OutputPath)mimalloc.pdb - Console - true - true - true - - - false - - $(IntDir)/src/cma/cma_api.cpp.obj @@ -331,9 +106,6 @@ $(IntDir)/src/cma/cma_api.h.obj CompileAsCpp - CompileAsCpp - CompileAsCpp - CompileAsCpp $(IntDir)/src/cma/cma_utils.cpp.obj @@ -341,105 +113,54 @@ $(IntDir)/src/cma/cma_utils.h.obj CompileAsCpp - CompileAsCpp - CompileAsCpp - CompileAsCpp CompileAsCpp - CompileAsCpp - CompileAsCpp - CompileAsCpp CompileAsCpp - CompileAsCpp - CompileAsCpp - CompileAsCpp CompileAsCpp - CompileAsCpp - CompileAsCpp - CompileAsCpp CompileAsCpp - CompileAsCpp - CompileAsCpp - CompileAsCpp CompileAsCpp - CompileAsCpp - CompileAsCpp - CompileAsCpp CompileAsCpp - CompileAsCpp - CompileAsCpp - CompileAsCpp CompileAsCpp - CompileAsCpp - CompileAsCpp - CompileAsCpp CompileAsCpp - CompileAsCpp - CompileAsCpp - CompileAsCpp CompileAsCpp - CompileAsCpp - CompileAsCpp - CompileAsCpp CompileAsCpp - CompileAsCpp - CompileAsCpp - CompileAsCpp CompileAsCpp - CompileAsCpp - CompileAsCpp - CompileAsCpp CompileAsCpp - CompileAsCpp - CompileAsCpp - CompileAsCpp CompileAsCpp - CompileAsCpp - CompileAsCpp - CompileAsCpp CompileAsCpp - CompileAsCpp - CompileAsCpp - CompileAsCpp CompileAsCpp - CompileAsCpp - CompileAsCpp - CompileAsCpp CompileAsCpp - CompileAsCpp - CompileAsCpp - CompileAsCpp diff --git a/src/cma/cma_api.cpp b/src/cma/cma_api.cpp index d7ea48468..9a7c896ed 100644 --- a/src/cma/cma_api.cpp +++ b/src/cma/cma_api.cpp @@ -1,7 +1,5 @@ #include "cma_api.h" -#include "cma_utils.h" - #include "mimalloc.h" size_t __stdcall MemTotalCommitted(void) @@ -21,9 +19,7 @@ size_t __stdcall MemFlushCache(size_t size) void __stdcall MemFlushCacheAll(void) { -#ifndef CMA_NO_COLLECT - mi_collect(false); -#endif + // This function does not affect actual behavior, see: https://community.bistudio.com/wiki/Arma_3:_Custom_Memory_Allocator } size_t __stdcall MemSize(void* mem) @@ -58,5 +54,5 @@ void __stdcall MemFreeA(void* mem) void __stdcall EnableHugePages(void) { - CmaSetMemoryAllocatorRuntimeOptions(); + // Huge pages and other runtime options are set once the dll is loaded, so no need to do it here } diff --git a/src/cma/cma_utils.cpp b/src/cma/cma_utils.cpp index 5c44148c9..58214cf08 100644 --- a/src/cma/cma_utils.cpp +++ b/src/cma/cma_utils.cpp @@ -1,73 +1,14 @@ #include "cma_utils.h" #include "mimalloc.h" -#include "mimalloc/types.h" - -#define WIN32_LEAN_AND_MEAN -#include - -int CmaGetReservedHugePagesCount(void) -{ - MEMORYSTATUSEX mem_status{}; - mem_status.dwLength = sizeof(mem_status); - - GlobalMemoryStatusEx(&mem_status); - - if (mem_status.ullAvailPhys > 17179869184) // Avail > 16G - return 8; - - if (mem_status.ullAvailPhys > 12884901888) // Avail > 12G - return 6; - - if (mem_status.ullAvailPhys > 8589934592) // Avail > 8G - return 4; - - return 0; -} - -size_t CmaGetReservedOsMemorySize(void) -{ - MEMORYSTATUSEX mem_status{}; - mem_status.dwLength = sizeof(mem_status); - - GlobalMemoryStatusEx(&mem_status); - - if (mem_status.ullAvailPhys > 8589934592) // Avail > 8G - return min((floor(mem_status.ullAvailPhys / (MI_GiB * MI_SIZE_BITS)) - 2) * MI_GiB, 8 * MI_GiB); // (Avail - 2G) but no more than 8G - - return 0; -} void CmaSetMemoryAllocatorRuntimeOptions(void) { - mi_option_enable(mi_option_large_os_pages); // Enable large pages by default - -#ifdef CMA_LOCK_PAGES // Enable reserved large pages on CMA_LOCK_PAGES build variant - if (mi_option_is_enabled(mi_option_large_os_pages)) - mi_option_set(mi_option_reserve_huge_os_pages, CmaGetReservedHugePagesCount()); -#endif -} - -DWORD WINAPI ScheduledMemoryCollectorThread(LPVOID /*lpParam*/) -{ - while (true) // Collect memory every 5 minutes - { - Sleep(300000); - - mi_collect(false); - } -} + mi_option_set(mi_option_arena_eager_commit, 1); // Always eager commit arenas, which has a significant performance improvement on Arma 3 -HANDLE scheduled_memory_collector_thread; + mi_option_set(mi_option_allow_large_os_pages, 1); // Always use large pages when available -void CmaCreateScheduledMemoryCollectorThread(void) -{ - scheduled_memory_collector_thread = CreateThread(NULL, 0, ScheduledMemoryCollectorThread, NULL, 0, NULL); - SetThreadPriority(scheduled_memory_collector_thread, THREAD_MODE_BACKGROUND_BEGIN); -} + mi_option_set(mi_option_eager_commit_delay, 0); // Always eager commit segments -void CmaTerminateScheduledMemoryCollectorThread(void) -{ - TerminateThread(scheduled_memory_collector_thread, 0); - CloseHandle(scheduled_memory_collector_thread); + mi_option_set(mi_option_purge_delay, -1); // Do not purge memory back to the OS } diff --git a/src/cma/cma_utils.h b/src/cma/cma_utils.h index 388e282e7..b66de50c7 100644 --- a/src/cma/cma_utils.h +++ b/src/cma/cma_utils.h @@ -1,5 +1 @@ void CmaSetMemoryAllocatorRuntimeOptions(void); - -void CmaCreateScheduledMemoryCollectorThread(void); - -void CmaTerminateScheduledMemoryCollectorThread(void); diff --git a/src/init.c b/src/init.c index f1068019b..bcd1812bb 100644 --- a/src/init.c +++ b/src/init.c @@ -580,7 +580,7 @@ void mi_process_init(void) mi_attr_noexcept { _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id()); mi_process_setup_auto_thread_done(); - CmaSetMemoryAllocatorRuntimeOptions(); // Arma 3 CMA: Set mimalloc's runtime options based on user's actual system specifications + CmaSetMemoryAllocatorRuntimeOptions(); // Arma 3 CMA: Fine tume mimalloc's runtime options for better performance, this must be done before _mi_os_init() mi_detect_cpu_features(); _mi_os_init(); mi_heap_main_init(); @@ -619,9 +619,6 @@ void mi_process_init(void) mi_attr_noexcept { mi_reserve_os_memory((size_t)ksize*MI_KiB, true /* commit? */, true /* allow large pages? */); } } -#ifdef CMA_SCHEDULED_COLLECT - CmaCreateScheduledMemoryCollectorThread(); // Arma 3 CMA: Create a thread to execute mi_collect, in order to collect usused memory every once in a while -#endif } // Called when the process is done (through `at_exit`) @@ -632,9 +629,6 @@ static void mi_cdecl mi_process_done(void) { static bool process_done = false; if (process_done) return; process_done = true; -#ifdef CMA_SCHEDULED_COLLECT - CmaTerminateScheduledMemoryCollectorThread(); // Arma 3 CMA: Terminate the usused memory collector thread -#endif // release any thread specific resources and ensure _mi_thread_done is called on all but the main thread _mi_prim_thread_done_auto_done();