Skip to content

Commit

Permalink
Merge pull request #90 from steve-numeus/master
Browse files Browse the repository at this point in the history
llvm: ability to specify target cpu and features
  • Loading branch information
siboehm authored Dec 3, 2024
2 parents ced9b70 + d67ca88 commit 4251480
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 11 deletions.
12 changes: 11 additions & 1 deletion lleaves/lleaves.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ def compile(
finline=True,
froot_func_name="forest_root",
use_fp64=True,
target_cpu=None,
target_cpu_features=None,
):
"""
Generate the LLVM IR for this model and compile it to ASM.
Expand All @@ -117,6 +119,10 @@ def compile(
:param froot_func_name: Name of entry point function in the compiled binary. This is the function to link when
writing a C function wrapper. Defaults to "forest_root".
:param use_fp64: If true, compile the model to use fp64 (double) precision, else use fp32 (float).
:param target_cpu: An optional string specifying the target CPU name to specialize for (defaults to the host's
cpu name).
:param target_cpu_features: An optional string specifying the target CPU features to enable (defaults to the
host's CPU features).
"""
assert fblocksize > 0
assert fcodemodel in ("small", "large")
Expand All @@ -137,7 +143,11 @@ def compile(

# keep a reference to the engine to protect it from being garbage-collected
self._execution_engine = compile_module_to_asm(
module, cache, fcodemodel=fcodemodel
module,
cache,
fcodemodel=fcodemodel,
target_cpu=target_cpu,
target_cpu_features=target_cpu_features,
)

# Drops GIL during call, re-acquires it after
Expand Down
31 changes: 21 additions & 10 deletions lleaves/llvm_binding.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,30 +13,41 @@ def _initialize_llvm():
llvm.initialize_native_asmprinter()


def _get_target_machine(fcodemodel="large"):
def _get_target_machine(fcodemodel="large", target_cpu=None, target_cpu_features=None):
target = llvm.Target.from_triple(llvm.get_process_triple())
try:
# LLVM raises if features cannot be detected
features = llvm.get_host_cpu_features().flatten()
except RuntimeError:
features = ""

if target_cpu is None:
target_cpu = llvm.get_host_cpu_name()

if target_cpu_features is None:
try:
# LLVM raises if features cannot be detected
target_cpu_features = llvm.get_host_cpu_features().flatten()
except RuntimeError:
target_cpu_features = ""

# large codemodel is necessary for large, ~1000 tree models.
# for smaller models "default" codemodel would be faster.
target_machine = target.create_target_machine(
cpu=llvm.get_host_cpu_name(),
features=features,
cpu=target_cpu,
features=target_cpu_features,
reloc="pic",
codemodel=fcodemodel,
)
return target_machine


def compile_module_to_asm(module, cache_path=None, fcodemodel="large"):
def compile_module_to_asm(
module,
cache_path=None,
fcodemodel="large",
target_cpu=None,
target_cpu_features=None,
):
_initialize_llvm()

# Create a target machine representing the host
target_machine = _get_target_machine(fcodemodel)
target_machine = _get_target_machine(fcodemodel, target_cpu, target_cpu_features)

# Create execution engine for our module
execution_engine = llvm.create_mcjit_compiler(module, target_machine)
Expand Down

0 comments on commit 4251480

Please sign in to comment.