From 95e88f4fe4c83ae6539e9189f24179594e061b98 Mon Sep 17 00:00:00 2001
From: Steve Lorimer <steve@numeus.xyz>
Date: Tue, 3 Dec 2024 15:30:11 +0000
Subject: [PATCH 1/2] llvm: ability to specify target cpu and features

---
 lleaves/lleaves.py      |  8 +++++++-
 lleaves/llvm_binding.py | 25 +++++++++++++++----------
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/lleaves/lleaves.py b/lleaves/lleaves.py
index f0ee5a4..51c06e2 100644
--- a/lleaves/lleaves.py
+++ b/lleaves/lleaves.py
@@ -92,6 +92,8 @@ def compile(
         finline=True,
         froot_func_name="forest_root",
         use_fp64=True,
+        target_cpu=None,
+        target_cpu_features=None,
     ):
         """
         Generate the LLVM IR for this model and compile it to ASM.
@@ -117,6 +119,10 @@ def compile(
         :param froot_func_name: Name of entry point function in the compiled binary. This is the function to link when
             writing a C function wrapper. Defaults to "forest_root".
         :param use_fp64: If true, compile the model to use fp64 (double) precision, else use fp32 (float).
+        :param target_cpu: An optional string specifying the target CPU name to specialize for (defaults to the host's
+            cpu name).
+        :param target_cpu_features: An optional string specifying the target CPU features to enable (defaults to the
+            host's CPU features).
         """
         assert fblocksize > 0
         assert fcodemodel in ("small", "large")
@@ -137,7 +143,7 @@ def compile(
 
         # keep a reference to the engine to protect it from being garbage-collected
         self._execution_engine = compile_module_to_asm(
-            module, cache, fcodemodel=fcodemodel
+            module, cache, fcodemodel=fcodemodel, target_cpu=target_cpu, target_cpu_features=target_cpu_features,
         )
 
         # Drops GIL during call, re-acquires it after
diff --git a/lleaves/llvm_binding.py b/lleaves/llvm_binding.py
index f81e007..81b1040 100644
--- a/lleaves/llvm_binding.py
+++ b/lleaves/llvm_binding.py
@@ -13,30 +13,35 @@ def _initialize_llvm():
     llvm.initialize_native_asmprinter()
 
 
-def _get_target_machine(fcodemodel="large"):
+def _get_target_machine(fcodemodel="large", target_cpu=None, target_cpu_features=None):
     target = llvm.Target.from_triple(llvm.get_process_triple())
-    try:
-        # LLVM raises if features cannot be detected
-        features = llvm.get_host_cpu_features().flatten()
-    except RuntimeError:
-        features = ""
+
+    if target_cpu is None:
+        target_cpu = llvm.get_host_cpu_name()
+
+    if target_cpu_features is None:
+        try:
+            # LLVM raises if features cannot be detected
+            target_cpu_features = llvm.get_host_cpu_features().flatten()
+        except RuntimeError:
+            target_cpu_features = ""
 
     # large codemodel is necessary for large, ~1000 tree models.
     # for smaller models "default" codemodel would be faster.
     target_machine = target.create_target_machine(
-        cpu=llvm.get_host_cpu_name(),
-        features=features,
+        cpu=target_cpu,
+        features=target_cpu_features,
         reloc="pic",
         codemodel=fcodemodel,
     )
     return target_machine
 
 
-def compile_module_to_asm(module, cache_path=None, fcodemodel="large"):
+def compile_module_to_asm(module, cache_path=None, fcodemodel="large", target_cpu=None, target_cpu_features=None):
     _initialize_llvm()
 
     # Create a target machine representing the host
-    target_machine = _get_target_machine(fcodemodel)
+    target_machine = _get_target_machine(fcodemodel, target_cpu, target_cpu_features)
 
     # Create execution engine for our module
     execution_engine = llvm.create_mcjit_compiler(module, target_machine)

From d67ca884aade8605fa14f8e995c4f5304afb7cbe Mon Sep 17 00:00:00 2001
From: Steve Lorimer <steve@numeus.xyz>
Date: Tue, 3 Dec 2024 17:47:03 +0100
Subject: [PATCH 2/2] linter fixes

---
 lleaves/lleaves.py      | 6 +++++-
 lleaves/llvm_binding.py | 8 +++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/lleaves/lleaves.py b/lleaves/lleaves.py
index 51c06e2..e1f60db 100644
--- a/lleaves/lleaves.py
+++ b/lleaves/lleaves.py
@@ -143,7 +143,11 @@ def compile(
 
         # keep a reference to the engine to protect it from being garbage-collected
         self._execution_engine = compile_module_to_asm(
-            module, cache, fcodemodel=fcodemodel, target_cpu=target_cpu, target_cpu_features=target_cpu_features,
+            module,
+            cache,
+            fcodemodel=fcodemodel,
+            target_cpu=target_cpu,
+            target_cpu_features=target_cpu_features,
         )
 
         # Drops GIL during call, re-acquires it after
diff --git a/lleaves/llvm_binding.py b/lleaves/llvm_binding.py
index 81b1040..6e98482 100644
--- a/lleaves/llvm_binding.py
+++ b/lleaves/llvm_binding.py
@@ -37,7 +37,13 @@ def _get_target_machine(fcodemodel="large", target_cpu=None, target_cpu_features
     return target_machine
 
 
-def compile_module_to_asm(module, cache_path=None, fcodemodel="large", target_cpu=None, target_cpu_features=None):
+def compile_module_to_asm(
+    module,
+    cache_path=None,
+    fcodemodel="large",
+    target_cpu=None,
+    target_cpu_features=None,
+):
     _initialize_llvm()
 
     # Create a target machine representing the host