Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(build): Implement Python wrapper build using setuptools. #28

Merged
merged 2 commits into from
Dec 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,28 @@
*.ptx
*.cubin
*.fatbin

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,26 @@ cd TileFusion && git submodule update --init --recursive

TileFusion requires a C++20 host compiler, CUDA 12.0 or later, and GCC version 10.0 or higher to support C++20 features.

### Build from Source

#### Using Makefile
To build the project using the provided `Makefile`, simply run:
```bash
make
```

#### Building the Python Wrapper

1. Build the wheel:
```bash
python3 setup.py build bdist_wheel
```

2. Clean the build:
```bash
python3 setup.py clean
```

### Unit Test

- **Run a single unit test**: `make unit_test UNIT_TEST=test_scatter_nd.py`
Expand Down
8 changes: 7 additions & 1 deletion cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ set(CMAKE_BUILD_TYPE Release)

set(CMAKE_CXX_STANDARD
20
CACHE STRING "The C++ standard whoese features are requested." FORCE)
CACHE STRING "The C++ standard whose features are requested." FORCE)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

set(CMAKE_CUDA_STANDARD
Expand Down Expand Up @@ -48,6 +48,12 @@ set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -std=c++20)
set(CUDA_NVCC_FLAGS_DEBUG ${CUDA_NVCC_FLAGS_DEBUG} -std=c++20 -O0)
set(CUDA_NVCC_FLAGS_RELEASE ${CUDA_NVCC_FLAGS_RELEASE} -std=c++20 -O3)

if(${CUDA_VERSION_MAJOR} VERSION_GREATER_EQUAL "11")
add_definitions("-DENABLE_BF16")
message(STATUS "CUDA_VERSION ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} "
"is greater or equal than 11.0, enable -DENABLE_BF16 flag.")
endif()

message(STATUS "tilefusion: CUDA detected: " ${CUDA_VERSION})
message(STATUS "tilefusion: CUDA nvcc is: " ${CUDA_NVCC_EXECUTABLE})
message(STATUS "tilefusion: CUDA toolkit directory: " ${CUDA_TOOLKIT_ROOT_DIR})
Expand Down
15 changes: 15 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,26 @@ classifiers = [
"Operating System :: OS Independent",
"Topic :: Software Development :: Libraries",
]
# NOTE: setuptools's `install_requires` can overwritten in
# `pyproject.toml`'s `dependencies` field.
# Make sure to keep this field in sync with what is in `requirements.txt`.
dependencies = [
"torch",
]

[project.urls]
Homepage = "https://github.com/microsoft/TileFusion"
Issues = "https://github.com/microsoft/TileFusion/issues"

[build-system]
requires = [
"cmake",
"packaging",
"setuptools>=49.4.0",
"wheel",
]
build-backend = "setuptools.build_meta"

[tool.ruff]
line-length = 80
exclude = [
Expand Down
2 changes: 0 additions & 2 deletions pytilefusion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@

import torch

torch.ops.load_library("build/src/libtilefusion.so")


def scatter_nd(scatter_data, scatter_indices, scatter_updates):
torch.ops.tilefusion.scatter_nd(
Expand Down
1 change: 1 addition & 0 deletions pytilefusion/__version__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = '0.0.0'
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
cmake
packaging
setuptools>=49.4.0
torch
wheel
172 changes: 172 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------

import os
import subprocess
from pathlib import Path

from setuptools import Command, Extension, find_packages, setup
from setuptools.command.build_ext import build_ext

cur_path = Path(__file__).parent


def get_requirements():
"""Get Python package dependencies from requirements.txt."""
with open(cur_path / "requirements.txt") as f:
requirements = f.read().strip().split("\n")
requirements = [req for req in requirements if "https" not in req]
return requirements


class CMakeExtension(Extension):
""" specify the root folder of the CMake projects"""

def __init__(self, name, cmake_lists_dir=".", **kwargs):
Extension.__init__(self, name, sources=[], **kwargs)
self.cmake_lists_dir = os.path.abspath(cmake_lists_dir)


class CMakeBuildExt(build_ext):
"""launches the CMake build."""

def copy_extensions_to_source(self) -> None:
build_py = self.get_finalized_command("build_py")
for ext in self.extensions:
source_path = os.path.join(self.build_lib, "lib" + ext.name + ".so")
inplace_file, _ = self._get_inplace_equivalent(build_py, ext)

target_path = os.path.join(
build_py.build_lib, "pytilefusion", inplace_file
)

# Always copy, even if source is older than destination, to ensure
# that the right extensions for the current Python/platform are
# used.
if os.path.exists(source_path) or not ext.optional:
self.copy_file(source_path, target_path, level=self.verbose)

def build_extension(self, ext: CMakeExtension) -> None:
# Ensure that CMake is present and working
try:
subprocess.check_output(["cmake", "--version"])
except OSError:
raise RuntimeError("Cannot find CMake executable") from None

debug = int(
os.environ.get("DEBUG", 0)
) if self.debug is None else self.debug
cfg = "Debug" if debug else "Release"

parallel_level = os.environ.get("CMAKE_BUILD_PARALLEL_LEVEL", None)
if parallel_level is not None:
self.parallel = int(parallel_level)
else:
self.parallel = os.cpu_count()

for ext in self.extensions:
extdir = os.path.abspath(
os.path.dirname(self.get_ext_fullpath(ext.name))
)

cmake_args = [
"-DCMAKE_BUILD_TYPE=%s" % cfg,
"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}".format(
cfg.upper(), extdir
), "-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY_{}={}".format(
cfg.upper(), self.build_temp
)
]

# Adding CMake arguments set as environment variable
if "CMAKE_ARGS" in os.environ:
cmake_args += [
item for item in os.environ["CMAKE_ARGS"].split(" ") if item
]

if not os.path.exists(self.build_temp):
os.makedirs(self.build_temp)

build_args = []
build_args += ["--config", cfg]
# Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level
# across all generators.
if (
"CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ and
hasattr(self, "parallel") and self.parallel
):
build_args += [f"-j{self.parallel}"]

build_temp = Path(self.build_temp) / ext.name
if not build_temp.exists():
build_temp.mkdir(parents=True)

# Config
subprocess.check_call(["cmake", ext.cmake_lists_dir] + cmake_args,
cwd=self.build_temp)

# Build
subprocess.check_call(["cmake", "--build", "."] + build_args,
cwd=self.build_temp)

print()
self.copy_extensions_to_source()


class Clean(Command):
user_options = []

def initialize_options(self):
pass

def finalize_options(self):
pass

def run(self):
import glob
import re
import shutil

with open(".gitignore") as f:
ignores = f.read()
pat = re.compile(r"^#( BEGIN NOT-CLEAN-FILES )?")
for wildcard in filter(None, ignores.split("\n")):
match = pat.match(wildcard)
if match:
if match.group(1):
# Marker is found and stop reading .gitignore.
break
# Ignore lines which begin with '#'.
else:
# Don't remove absolute paths from the system
wildcard = wildcard.lstrip("./")

for filename in glob.glob(wildcard):
print(f"cleaning '{filename}'")
try:
os.remove(filename)
except OSError:
shutil.rmtree(filename, ignore_errors=True)


description = ("PyTileFusion: A Python wrapper for tilefusion C++ library.")

with open(os.path.join("pytilefusion", "__version__.py")) as f:
exec(f.read())

setup(
name="tilefusion",
version=__version__, # noqa F821
description=description,
author="Ying Cao, Chengxiang Qi",
python_requires=">=3.10",
packages=find_packages(exclude=[""]),
install_requires=get_requirements(),
ext_modules=[CMakeExtension("tilefusion")],
cmdclass={
"build_ext": CMakeBuildExt,
"clean": Clean,
},
)
12 changes: 10 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,15 @@ set_target_properties(
CUDA_SEPARABLE_COMPILATION ON)

target_compile_options(
${TARGET} PUBLIC $<$<COMPILE_LANGUAGE:CUDA>: -Werror,-Wall -rdc=true
-std=c++20 -fconcepts -fpermissive>)
${TARGET}
PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:
-Werror,-Wall
-rdc=true
-std=c++20
-fconcepts
-fpermissive
--use_fast_math
--generate-line-info
>)
target_compile_features(${TARGET} PUBLIC cxx_std_20 cuda_std_20)
target_link_libraries(${TARGET} "${TORCH_LIBRARIES}")
11 changes: 0 additions & 11 deletions tests/python/context.py

This file was deleted.

1 change: 0 additions & 1 deletion tests/python/test_flash_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import unittest

import context # noqa: F401
import torch

from pytilefusion import TiledFlashAttention
Expand Down
1 change: 0 additions & 1 deletion tests/python/test_scatter_nd.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import random
import unittest

import context # noqa: F401
import torch

from pytilefusion import scatter_nd
Expand Down
Loading