diff --git a/lleaves/compiler/codegen/codegen.py b/lleaves/compiler/codegen/codegen.py index 4d07487..2c55970 100644 --- a/lleaves/compiler/codegen/codegen.py +++ b/lleaves/compiler/codegen/codegen.py @@ -9,6 +9,7 @@ FLOAT = ir.FloatType() INT_CAT = ir.IntType(bits=32) INT = ir.IntType(bits=32) +LONG = ir.IntType(bits=64) ZERO_V = ir.Constant(BOOL, 0) FLOAT_POINTER = ir.PointerType(FLOAT) DOUBLE_PTR = ir.PointerType(DOUBLE) @@ -18,6 +19,10 @@ def iconst(value): return ir.Constant(INT, value) +def lconst(value): + return ir.Constant(LONG, value) + + def fconst(value): return ir.Constant(FLOAT, value) @@ -168,7 +173,9 @@ def _populate_instruction_block( # -- SETUP BLOCK builder = ir.IRBuilder(setup_block) - loop_iter = builder.alloca(INT, 1, "loop-idx") + start_index = builder.zext(start_index, LONG) + end_index = builder.zext(end_index, LONG) + loop_iter = builder.alloca(LONG, 1, "loop-idx") builder.store(start_index, loop_iter) condition_block = root_func.append_basic_block("loop-condition") builder.branch(condition_block) @@ -187,9 +194,9 @@ def _populate_instruction_block( args = [] loop_iter_reg = builder.load(loop_iter) - n_args = ir.Constant(INT, forest.n_args) + n_args = ir.Constant(LONG, forest.n_args) iter_mul_nargs = builder.mul(loop_iter_reg, n_args) - idx = (builder.add(iter_mul_nargs, iconst(i)) for i in range(forest.n_args)) + idx = (builder.add(iter_mul_nargs, lconst(i)) for i in range(forest.n_args)) raw_ptrs = [builder.gep(root_func.args[0], (c,)) for c in idx] # cast the categorical inputs to integer for feature, ptr in zip(forest.features, raw_ptrs): @@ -203,9 +210,9 @@ def _populate_instruction_block( for func in tree_funcs: tree_res = builder.call(func.llvm_function, args) results[func.class_id] = builder.fadd(tree_res, results[func.class_id]) - res_idx = builder.mul(iconst(forest.n_classes), loop_iter_reg) + res_idx = builder.mul(lconst(forest.n_classes), loop_iter_reg) results_ptr = [ - builder.gep(out_arr, (builder.add(res_idx, iconst(class_idx)),)) + builder.gep(out_arr, (builder.add(res_idx, lconst(class_idx)),)) for class_idx in range(forest.n_classes) ] @@ -224,8 +231,7 @@ def _populate_instruction_block( for result, result_ptr in zip(results, results_ptr): builder.store(result, result_ptr) - tmpp1 = builder.add(loop_iter_reg, iconst(1)) - builder.store(tmpp1, loop_iter) + builder.store(builder.add(loop_iter_reg, lconst(1)), loop_iter) builder.branch(condition_block) # -- END CORE LOOP BLOCK diff --git a/lleaves/lleaves.py b/lleaves/lleaves.py index ae4a7e3..5a2956c 100644 --- a/lleaves/lleaves.py +++ b/lleaves/lleaves.py @@ -1,7 +1,7 @@ import concurrent.futures import math import os -from ctypes import CFUNCTYPE, POINTER, c_double, c_int +from ctypes import CFUNCTYPE, POINTER, c_double, c_int32 from pathlib import Path import llvmlite.binding @@ -20,8 +20,8 @@ None, # return void POINTER(c_double), # pointer to data array POINTER(c_double), # pointer to results array - c_int, # start index - c_int, # end index + c_int32, # start index + c_int32, # end index ) @@ -89,12 +89,10 @@ def compile( """ Generate the LLVM IR for this model and compile it to ASM. - For most users tweaking the compilation flags (fcodemodel, fblocksize) will be unnecessary as the default - configuration is already very fast. + For most users tweaking the compilation flags (fcodemodel, fblocksize, finline) will be unnecessary + as the default configuration is already very fast. Modifying the flags is useful only if you're trying to squeeze out the last few percent of performance. - The compile() method is generally not thread-safe. - :param cache: Path to a cache file. If this path doesn't exist, binary will be dumped at path after compilation. If path exists, binary will be loaded and compilation skipped. No effort is made to check staleness / consistency. @@ -160,6 +158,12 @@ def predict(self, data, n_jobs=os.cpu_count()): raise ValueError( f"Data must be of dimension (N, {self.num_feature()}), is {data.shape}." ) + # protect against `ctypes.c_int32` silently overflowing and causing SIGSEGV + if n_predictions >= 2 ** 31 - 1: + raise ValueError( + "Prediction is not supported for datasets with >=2^31-1 rows. " + "Split the dataset into smaller chunks first." + ) # setup input data and predictions array ptr_data = ndarray_to_ptr(data)