diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 8ddf764..f65e8cb 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -91,8 +91,12 @@ jobs: - name: Free up disk space if: ${{ runner.os == 'Linux' }} + # https://github.com/easimon/maximize-build-space/blob/master/action.yml + # https://github.com/easimon/maximize-build-space/tree/test-report run: | sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf /opt/hostedtoolcache/CodeQL - name: Install CUDA ${{ matrix.cuda-version }} if: ${{ matrix.cuda-version != 'cpu' }} diff --git a/csrc/cutlass b/csrc/cutlass index c4f6b8c..6f47420 160000 --- a/csrc/cutlass +++ b/csrc/cutlass @@ -1 +1 @@ -Subproject commit c4f6b8c6bc94ff69048492fb34df0dfaf1983933 +Subproject commit 6f47420213f757831fae65c686aa471749fa8d60 diff --git a/flash_attn/__init__.py b/flash_attn/__init__.py index b179694..b4265f7 100644 --- a/flash_attn/__init__.py +++ b/flash_attn/__init__.py @@ -1,4 +1,4 @@ -__version__ = "2.0.6.post3" +__version__ = "2.0.6.post4" from flash_attn.flash_attn_interface import flash_attn_func from flash_attn.flash_attn_interface import flash_attn_kvpacked_func diff --git a/setup.py b/setup.py index 43481da..d087813 100644 --- a/setup.py +++ b/setup.py @@ -91,9 +91,9 @@ def raise_if_cuda_home_none(global_option: str) -> None: def append_nvcc_threads(nvcc_extra_args): - _, bare_metal_version = get_cuda_bare_metal_version(CUDA_HOME) - if bare_metal_version >= Version("11.2"): - return nvcc_extra_args + ["--threads", "4"] + # _, bare_metal_version = get_cuda_bare_metal_version(CUDA_HOME) + # if bare_metal_version >= Version("11.2"): + # return nvcc_extra_args + ["--threads", "4"] return nvcc_extra_args