diff --git a/.github/ci.sh b/.github/ci.sh
index 8298ee1..8192bd6 100755
--- a/.github/ci.sh
+++ b/.github/ci.sh
@@ -8,7 +8,7 @@ python -m pip install --no-use-pep517 --no-deps --disable-pip-version-check -e .
 pytest -v tests
 
 # Check documentation build only in one job, also do releases
-if [ "${PYTHON_VERSION}" = "3.6" ]; then
+if [ "${PYTHON_VERSION}" = "3.7" ]; then
   pushd docs
   make html
   popd
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d051325..ea4f8f2 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -34,14 +34,11 @@ jobs:
         with:
           path: ./.hypothesis
           key: hypothesisDB ${{ matrix.PYTHON_VERSION }}
-      - if: matrix.PYTHON_VERSION == '3.6'
-        shell: bash -x -l {0}
-        run: pip install dataclasses
       - name: Run the unittests
         shell: bash -x -l {0}
         run: ./.github/ci.sh ${{ matrix.PYTHON_VERSION }}
       - name: Publish a Python distribution to PyPI
-        if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') && matrix.PYTHON_VERSION == '3.6'
+        if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') && matrix.PYTHON_VERSION == '3.7'
         uses: pypa/gh-action-pypi-publish@v1.4.2
         with:
           user: __token__
diff --git a/environment.yml b/environment.yml
index cbc5848..277f60e 100644
--- a/environment.yml
+++ b/environment.yml
@@ -4,7 +4,7 @@ channels:
   - nodefaults
 dependencies:
   # runtime deps
-  - python>=3.6
+  - python>=3.7
   - llvmlite>=0.36
   - numpy
   # testing
diff --git a/lleaves/compiler/codegen/codegen.py b/lleaves/compiler/codegen/codegen.py
index 4d07487..2c55970 100644
--- a/lleaves/compiler/codegen/codegen.py
+++ b/lleaves/compiler/codegen/codegen.py
@@ -9,6 +9,7 @@
 FLOAT = ir.FloatType()
 INT_CAT = ir.IntType(bits=32)
 INT = ir.IntType(bits=32)
+LONG = ir.IntType(bits=64)
 ZERO_V = ir.Constant(BOOL, 0)
 FLOAT_POINTER = ir.PointerType(FLOAT)
 DOUBLE_PTR = ir.PointerType(DOUBLE)
@@ -18,6 +19,10 @@ def iconst(value):
     return ir.Constant(INT, value)
 
 
+def lconst(value):
+    return ir.Constant(LONG, value)
+
+
 def fconst(value):
     return ir.Constant(FLOAT, value)
 
@@ -168,7 +173,9 @@ def _populate_instruction_block(
 
     # -- SETUP BLOCK
     builder = ir.IRBuilder(setup_block)
-    loop_iter = builder.alloca(INT, 1, "loop-idx")
+    start_index = builder.zext(start_index, LONG)
+    end_index = builder.zext(end_index, LONG)
+    loop_iter = builder.alloca(LONG, 1, "loop-idx")
     builder.store(start_index, loop_iter)
     condition_block = root_func.append_basic_block("loop-condition")
     builder.branch(condition_block)
@@ -187,9 +194,9 @@ def _populate_instruction_block(
     args = []
     loop_iter_reg = builder.load(loop_iter)
 
-    n_args = ir.Constant(INT, forest.n_args)
+    n_args = ir.Constant(LONG, forest.n_args)
     iter_mul_nargs = builder.mul(loop_iter_reg, n_args)
-    idx = (builder.add(iter_mul_nargs, iconst(i)) for i in range(forest.n_args))
+    idx = (builder.add(iter_mul_nargs, lconst(i)) for i in range(forest.n_args))
     raw_ptrs = [builder.gep(root_func.args[0], (c,)) for c in idx]
     # cast the categorical inputs to integer
     for feature, ptr in zip(forest.features, raw_ptrs):
@@ -203,9 +210,9 @@ def _populate_instruction_block(
     for func in tree_funcs:
         tree_res = builder.call(func.llvm_function, args)
         results[func.class_id] = builder.fadd(tree_res, results[func.class_id])
-    res_idx = builder.mul(iconst(forest.n_classes), loop_iter_reg)
+    res_idx = builder.mul(lconst(forest.n_classes), loop_iter_reg)
     results_ptr = [
-        builder.gep(out_arr, (builder.add(res_idx, iconst(class_idx)),))
+        builder.gep(out_arr, (builder.add(res_idx, lconst(class_idx)),))
         for class_idx in range(forest.n_classes)
     ]
 
@@ -224,8 +231,7 @@ def _populate_instruction_block(
     for result, result_ptr in zip(results, results_ptr):
         builder.store(result, result_ptr)
 
-    tmpp1 = builder.add(loop_iter_reg, iconst(1))
-    builder.store(tmpp1, loop_iter)
+    builder.store(builder.add(loop_iter_reg, lconst(1)), loop_iter)
     builder.branch(condition_block)
     # -- END CORE LOOP BLOCK
 
diff --git a/lleaves/data_processing.py b/lleaves/data_processing.py
index f17cff1..30fb6e7 100644
--- a/lleaves/data_processing.py
+++ b/lleaves/data_processing.py
@@ -4,6 +4,7 @@
 from typing import List, Optional
 
 import numpy as np
+import pandas as pd
 
 try:
     from pandas import DataFrame as pd_DataFrame
@@ -15,7 +16,7 @@ class pd_DataFrame:
         pass
 
 
-def _dataframe_to_ndarray(data, pd_traintime_categories: List[List]):
+def _dataframe_to_ndarray(data: pd.DataFrame, pd_traintime_categories: List[List]):
     """
     Converts the given dataframe into a 2D numpy array and converts categorical columns to float.
 
@@ -94,7 +95,7 @@ def data_to_ndarray(data, pd_traintime_categories: Optional[List[List]] = None):
     return data
 
 
-def ndarray_to_ptr(data):
+def ndarray_to_ptr(data: np.ndarray):
     """
     Takes a 2D numpy array, converts to float64 if necessary and returns a pointer
 
diff --git a/lleaves/lleaves.py b/lleaves/lleaves.py
index ae4a7e3..5a2956c 100644
--- a/lleaves/lleaves.py
+++ b/lleaves/lleaves.py
@@ -1,7 +1,7 @@
 import concurrent.futures
 import math
 import os
-from ctypes import CFUNCTYPE, POINTER, c_double, c_int
+from ctypes import CFUNCTYPE, POINTER, c_double, c_int32
 from pathlib import Path
 
 import llvmlite.binding
@@ -20,8 +20,8 @@
     None,  # return void
     POINTER(c_double),  # pointer to data array
     POINTER(c_double),  # pointer to results array
-    c_int,  # start index
-    c_int,  # end index
+    c_int32,  # start index
+    c_int32,  # end index
 )
 
 
@@ -89,12 +89,10 @@ def compile(
         """
         Generate the LLVM IR for this model and compile it to ASM.
 
-        For most users tweaking the compilation flags (fcodemodel, fblocksize) will be unnecessary as the default
-        configuration is already very fast.
+        For most users tweaking the compilation flags (fcodemodel, fblocksize, finline) will be unnecessary
+        as the default configuration is already very fast.
         Modifying the flags is useful only if you're trying to squeeze out the last few percent of performance.
 
-        The compile() method is generally not thread-safe.
-
         :param cache: Path to a cache file. If this path doesn't exist, binary will be dumped at path after compilation.
             If path exists, binary will be loaded and compilation skipped.
             No effort is made to check staleness / consistency.
@@ -160,6 +158,12 @@ def predict(self, data, n_jobs=os.cpu_count()):
             raise ValueError(
                 f"Data must be of dimension (N, {self.num_feature()}), is {data.shape}."
             )
+        # protect against `ctypes.c_int32` silently overflowing and causing SIGSEGV
+        if n_predictions >= 2 ** 31 - 1:
+            raise ValueError(
+                "Prediction is not supported for datasets with >=2^31-1 rows. "
+                "Split the dataset into smaller chunks first."
+            )
 
         # setup input data and predictions array
         ptr_data = ndarray_to_ptr(data)
diff --git a/setup.py b/setup.py
index e380123..f49caae 100644
--- a/setup.py
+++ b/setup.py
@@ -24,6 +24,6 @@
     description="LLVM-based compiler for LightGBM models",
     long_description=long_description,
     long_description_content_type="text/markdown",
-    python_requires=">=3.6",
-    install_requires=["llvmlite>=0.36", "numpy", "dataclasses; python_version < '3.7'"],
+    python_requires=">=3.7",
+    install_requires=["llvmlite>=0.36", "numpy"],
 )
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..97e9f36
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,23 @@
+import pytest
+from lightgbm import Booster
+
+from lleaves import Model
+
+
+@pytest.fixture(scope="session")
+def NYC_llvm():
+    llvm_model = Model(model_file="tests/models/NYC_taxi/model.txt")
+    llvm_model.compile()
+    return llvm_model
+
+
+@pytest.fixture(scope="session")
+def NYC_lgbm():
+    return Booster(model_file="tests/models/NYC_taxi/model.txt")
+
+
+@pytest.fixture(scope="session")
+def mtpl2_llvm():
+    llvm_model = Model(model_file="tests/models/mtpl2/model.txt")
+    llvm_model.compile()
+    return llvm_model
diff --git a/tests/test_dataprocessing.py b/tests/test_dataprocessing.py
index 4c31ced..b8d891b 100644
--- a/tests/test_dataprocessing.py
+++ b/tests/test_dataprocessing.py
@@ -3,7 +3,9 @@
 import numpy as np
 import pandas as pd
 import pytest
+from lightgbm import Booster
 
+from lleaves import Model
 from lleaves.data_processing import (
     data_to_ndarray,
     extract_model_global_features,
@@ -87,3 +89,20 @@ def test_no_data_modification():
     pred = pd.DataFrame(data).astype("category")
     ndarray_to_ptr(data_to_ndarray(pred, data))
     pd.testing.assert_frame_equal(pred, orig)
+
+
+def test_sliced_arrays():
+    # predictions should be correct when passed a sliced array
+    llvm_model = Model(model_file="tests/models/single_tree/model.txt")
+    llvm_model.compile()
+    lgbm_model = Booster(model_file="tests/models/single_tree/model.txt")
+
+    n_feature = lgbm_model.num_feature()
+    data = np.array(list(range(-5 * n_feature, 5 * n_feature)), dtype=np.float64)
+    data = data.reshape((5, 2 * n_feature))
+    sliced = data[:, ::2]
+    assert not sliced.flags.c_contiguous
+    np.testing.assert_almost_equal(
+        llvm_model.predict(sliced, n_jobs=4), lgbm_model.predict(sliced), decimal=13
+    )
+    return
diff --git a/tests/test_parallel.py b/tests/test_parallel.py
index f30648e..6c68fb9 100644
--- a/tests/test_parallel.py
+++ b/tests/test_parallel.py
@@ -1,30 +1,37 @@
 from ctypes import POINTER, c_double
 
 import numpy as np
-from lightgbm import Booster
 
-from lleaves import Model
 
+def test_parallel_edgecases(NYC_llvm, NYC_lgbm):
+    # single row, multiple threads
+    data = np.array(1 * [NYC_lgbm.num_feature() * [1.0]], dtype=np.float64)
+    np.testing.assert_almost_equal(
+        NYC_llvm.predict(data, n_jobs=4), NYC_lgbm.predict(data), decimal=14
+    )
+
+    # last thread has only one prediction (batchsize is ceil(19/7)=3)
+    data = np.array(19 * [NYC_lgbm.num_feature() * [1.0]], dtype=np.float64)
+    np.testing.assert_almost_equal(
+        NYC_llvm.predict(data, n_jobs=7), NYC_lgbm.predict(data), decimal=14
+    )
 
-def test_parallel_iteration():
-    llvm_model = Model(model_file="tests/models/NYC_taxi/model.txt")
-    lgbm_model = Booster(model_file="tests/models/NYC_taxi/model.txt")
-    llvm_model.compile()
 
-    data = np.array(4 * [5 * [1.0]], dtype=np.float64)
+def test_parallel_iteration(NYC_llvm, NYC_lgbm):
+    data = np.array(4 * [NYC_lgbm.num_feature() * [1.0]], dtype=np.float64)
     data_flat = np.array(data.reshape(data.size), dtype=np.float64)
     np.testing.assert_almost_equal(
-        llvm_model.predict(data, n_jobs=4), lgbm_model.predict(data), decimal=14
+        NYC_llvm.predict(data, n_jobs=4), NYC_lgbm.predict(data), decimal=14
     )
 
     ptr_data = data_flat.ctypes.data_as(POINTER(c_double))
     preds = np.zeros(4, dtype=np.float64)
     ptr_preds = preds.ctypes.data_as(POINTER(c_double))
 
-    llvm_model._c_entry_func(ptr_data, ptr_preds, 2, 4)
+    NYC_llvm._c_entry_func(ptr_data, ptr_preds, 2, 4)
     preds_l = list(preds)
     assert preds_l[0] == 0.0 and preds_l[1] == 0.0
     assert preds_l[2] != 0.0 and preds_l[3] != 0.0
-    llvm_model._c_entry_func(ptr_data, ptr_preds, 0, 2)
+    NYC_llvm._c_entry_func(ptr_data, ptr_preds, 0, 2)
     preds_l = list(preds)
     assert preds_l[0] != 0.0 and preds_l[1] != 0.0