diff --git a/lleaves/lleaves.py b/lleaves/lleaves.py index f0ee5a4..e1f60db 100644 --- a/lleaves/lleaves.py +++ b/lleaves/lleaves.py @@ -92,6 +92,8 @@ def compile( finline=True, froot_func_name="forest_root", use_fp64=True, + target_cpu=None, + target_cpu_features=None, ): """ Generate the LLVM IR for this model and compile it to ASM. @@ -117,6 +119,10 @@ def compile( :param froot_func_name: Name of entry point function in the compiled binary. This is the function to link when writing a C function wrapper. Defaults to "forest_root". :param use_fp64: If true, compile the model to use fp64 (double) precision, else use fp32 (float). + :param target_cpu: An optional string specifying the target CPU name to specialize for (defaults to the host's + cpu name). + :param target_cpu_features: An optional string specifying the target CPU features to enable (defaults to the + host's CPU features). """ assert fblocksize > 0 assert fcodemodel in ("small", "large") @@ -137,7 +143,11 @@ def compile( # keep a reference to the engine to protect it from being garbage-collected self._execution_engine = compile_module_to_asm( - module, cache, fcodemodel=fcodemodel + module, + cache, + fcodemodel=fcodemodel, + target_cpu=target_cpu, + target_cpu_features=target_cpu_features, ) # Drops GIL during call, re-acquires it after diff --git a/lleaves/llvm_binding.py b/lleaves/llvm_binding.py index f81e007..6e98482 100644 --- a/lleaves/llvm_binding.py +++ b/lleaves/llvm_binding.py @@ -13,30 +13,41 @@ def _initialize_llvm(): llvm.initialize_native_asmprinter() -def _get_target_machine(fcodemodel="large"): +def _get_target_machine(fcodemodel="large", target_cpu=None, target_cpu_features=None): target = llvm.Target.from_triple(llvm.get_process_triple()) - try: - # LLVM raises if features cannot be detected - features = llvm.get_host_cpu_features().flatten() - except RuntimeError: - features = "" + + if target_cpu is None: + target_cpu = llvm.get_host_cpu_name() + + if target_cpu_features is None: + try: + # LLVM raises if features cannot be detected + target_cpu_features = llvm.get_host_cpu_features().flatten() + except RuntimeError: + target_cpu_features = "" # large codemodel is necessary for large, ~1000 tree models. # for smaller models "default" codemodel would be faster. target_machine = target.create_target_machine( - cpu=llvm.get_host_cpu_name(), - features=features, + cpu=target_cpu, + features=target_cpu_features, reloc="pic", codemodel=fcodemodel, ) return target_machine -def compile_module_to_asm(module, cache_path=None, fcodemodel="large"): +def compile_module_to_asm( + module, + cache_path=None, + fcodemodel="large", + target_cpu=None, + target_cpu_features=None, +): _initialize_llvm() # Create a target machine representing the host - target_machine = _get_target_machine(fcodemodel) + target_machine = _get_target_machine(fcodemodel, target_cpu, target_cpu_features) # Create execution engine for our module execution_engine = llvm.create_mcjit_compiler(module, target_machine)