From 95e88f4fe4c83ae6539e9189f24179594e061b98 Mon Sep 17 00:00:00 2001 From: Steve Lorimer Date: Tue, 3 Dec 2024 15:30:11 +0000 Subject: [PATCH 1/2] llvm: ability to specify target cpu and features --- lleaves/lleaves.py | 8 +++++++- lleaves/llvm_binding.py | 25 +++++++++++++++---------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/lleaves/lleaves.py b/lleaves/lleaves.py index f0ee5a4..51c06e2 100644 --- a/lleaves/lleaves.py +++ b/lleaves/lleaves.py @@ -92,6 +92,8 @@ def compile( finline=True, froot_func_name="forest_root", use_fp64=True, + target_cpu=None, + target_cpu_features=None, ): """ Generate the LLVM IR for this model and compile it to ASM. @@ -117,6 +119,10 @@ def compile( :param froot_func_name: Name of entry point function in the compiled binary. This is the function to link when writing a C function wrapper. Defaults to "forest_root". :param use_fp64: If true, compile the model to use fp64 (double) precision, else use fp32 (float). + :param target_cpu: An optional string specifying the target CPU name to specialize for (defaults to the host's + cpu name). + :param target_cpu_features: An optional string specifying the target CPU features to enable (defaults to the + host's CPU features). """ assert fblocksize > 0 assert fcodemodel in ("small", "large") @@ -137,7 +143,7 @@ def compile( # keep a reference to the engine to protect it from being garbage-collected self._execution_engine = compile_module_to_asm( - module, cache, fcodemodel=fcodemodel + module, cache, fcodemodel=fcodemodel, target_cpu=target_cpu, target_cpu_features=target_cpu_features, ) # Drops GIL during call, re-acquires it after diff --git a/lleaves/llvm_binding.py b/lleaves/llvm_binding.py index f81e007..81b1040 100644 --- a/lleaves/llvm_binding.py +++ b/lleaves/llvm_binding.py @@ -13,30 +13,35 @@ def _initialize_llvm(): llvm.initialize_native_asmprinter() -def _get_target_machine(fcodemodel="large"): +def _get_target_machine(fcodemodel="large", target_cpu=None, target_cpu_features=None): target = llvm.Target.from_triple(llvm.get_process_triple()) - try: - # LLVM raises if features cannot be detected - features = llvm.get_host_cpu_features().flatten() - except RuntimeError: - features = "" + + if target_cpu is None: + target_cpu = llvm.get_host_cpu_name() + + if target_cpu_features is None: + try: + # LLVM raises if features cannot be detected + target_cpu_features = llvm.get_host_cpu_features().flatten() + except RuntimeError: + target_cpu_features = "" # large codemodel is necessary for large, ~1000 tree models. # for smaller models "default" codemodel would be faster. target_machine = target.create_target_machine( - cpu=llvm.get_host_cpu_name(), - features=features, + cpu=target_cpu, + features=target_cpu_features, reloc="pic", codemodel=fcodemodel, ) return target_machine -def compile_module_to_asm(module, cache_path=None, fcodemodel="large"): +def compile_module_to_asm(module, cache_path=None, fcodemodel="large", target_cpu=None, target_cpu_features=None): _initialize_llvm() # Create a target machine representing the host - target_machine = _get_target_machine(fcodemodel) + target_machine = _get_target_machine(fcodemodel, target_cpu, target_cpu_features) # Create execution engine for our module execution_engine = llvm.create_mcjit_compiler(module, target_machine) From d67ca884aade8605fa14f8e995c4f5304afb7cbe Mon Sep 17 00:00:00 2001 From: Steve Lorimer Date: Tue, 3 Dec 2024 17:47:03 +0100 Subject: [PATCH 2/2] linter fixes --- lleaves/lleaves.py | 6 +++++- lleaves/llvm_binding.py | 8 +++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/lleaves/lleaves.py b/lleaves/lleaves.py index 51c06e2..e1f60db 100644 --- a/lleaves/lleaves.py +++ b/lleaves/lleaves.py @@ -143,7 +143,11 @@ def compile( # keep a reference to the engine to protect it from being garbage-collected self._execution_engine = compile_module_to_asm( - module, cache, fcodemodel=fcodemodel, target_cpu=target_cpu, target_cpu_features=target_cpu_features, + module, + cache, + fcodemodel=fcodemodel, + target_cpu=target_cpu, + target_cpu_features=target_cpu_features, ) # Drops GIL during call, re-acquires it after diff --git a/lleaves/llvm_binding.py b/lleaves/llvm_binding.py index 81b1040..6e98482 100644 --- a/lleaves/llvm_binding.py +++ b/lleaves/llvm_binding.py @@ -37,7 +37,13 @@ def _get_target_machine(fcodemodel="large", target_cpu=None, target_cpu_features return target_machine -def compile_module_to_asm(module, cache_path=None, fcodemodel="large", target_cpu=None, target_cpu_features=None): +def compile_module_to_asm( + module, + cache_path=None, + fcodemodel="large", + target_cpu=None, + target_cpu_features=None, +): _initialize_llvm() # Create a target machine representing the host