From e77d0f809af220b25b88138ed4b7058b290ed343 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 17 Jun 2024 09:55:37 -0700 Subject: [PATCH 1/3] Remove obsolete Python tests CI job (#38) Summary: This PR removes the previous testing job that are now obsolete. The Python tests are now being run in the "Build wheel / install-and-test" jobs. This is more robust as it simulate a user-installed torchcodec rather than a torchcodec built from source. Pull Request resolved: https://github.com/pytorch-labs/torchcodec/pull/38 Reviewed By: ahmadsharif1 Differential Revision: D58673544 Pulled By: NicolasHug fbshipit-source-id: 73dfcf5ca9101f5b8eb2ea1976ef9c6759670b91 --- .../{unit_test.yaml => cpp_tests.yaml} | 46 +------------------ 1 file changed, 2 insertions(+), 44 deletions(-) rename .github/workflows/{unit_test.yaml => cpp_tests.yaml} (64%) diff --git a/.github/workflows/unit_test.yaml b/.github/workflows/cpp_tests.yaml similarity index 64% rename from .github/workflows/unit_test.yaml rename to .github/workflows/cpp_tests.yaml index f6bd55f2..6947afe8 100644 --- a/.github/workflows/unit_test.yaml +++ b/.github/workflows/cpp_tests.yaml @@ -1,4 +1,4 @@ -name: Unit Test +name: CPP tests on: push: @@ -14,49 +14,7 @@ defaults: shell: bash -l -eo pipefail {0} jobs: - python: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: ['3.8', '3.12'] - ffmpeg-version-for-tests: ['4.4.2', '5.1.2', '6.1.1', '7.0.1'] - steps: - - name: Check out repo - uses: actions/checkout@v3 - - name: Setup conda env - uses: conda-incubator/setup-miniconda@v2 - with: - auto-update-conda: true - miniconda-version: "latest" - activate-environment: test - python-version: ${{ matrix.python-version }} - - name: Update pip - run: python -m pip install --upgrade pip - - name: Install dependencies - run: | - python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cpu - - name: Build and install torchcodec - run: | - .github/scripts/assert_ffmpeg_not_installed.sh - # TODO: should we pass -DCMAKE_BUILD_TYPE=Debug here? That's what we - # do for the C++ tests. - BUILD_AGAINST_ALL_FFMPEG_FROM_S3=1 python -m pip install -e ".[dev]" --no-build-isolation -vvv - - # list the built so files, for debugging. - find src | grep ".so" - - name: Install ffmpeg, post build - run: | - conda install "ffmpeg=${{ matrix.ffmpeg-version-for-tests }}" -c conda-forge - ffmpeg -version - - name: Smoke test - run: | - python test/decoders/manual_smoke_test.py - # TODO: diff the output frame with its expeceted value - - name: Run Python tests - run: | - pytest test - Cpp: + Cpp-tests: runs-on: ubuntu-latest strategy: fail-fast: false From 58350e151259331bac0d5917797bdb6d55e42e14 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 17 Jun 2024 09:58:52 -0700 Subject: [PATCH 2/3] Add black to pre-commit and Add lint CI job (#39) Summary: It works: https://github.com/pytorch-labs/torchcodec/actions/runs/9551252976/job/26325153079?pr=39 Pull Request resolved: https://github.com/pytorch-labs/torchcodec/pull/39 Reviewed By: ahmadsharif1 Differential Revision: D58676975 Pulled By: NicolasHug fbshipit-source-id: b0a0b6ccabff1ebaf7a9e6f3fa5cc1beed9fdbb2 --- .github/workflows/lint.yaml | 40 +++++++++++++++++++++++++++++++++++++ .pre-commit-config.yaml | 14 ++++++------- 2 files changed, 47 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/lint.yaml diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 00000000..df0c2cdf --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,40 @@ +name: Lint + +on: + push: + branches: [ main ] + pull_request: + +concurrency: + group: unit-test${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }} + cancel-in-progress: true + +defaults: + run: + shell: bash -l -eo pipefail {0} + +jobs: + build: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ['3.12'] + steps: + - name: Check out repo + uses: actions/checkout@v3 + - name: Setup conda env + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + miniconda-version: "latest" + activate-environment: test + python-version: ${{ matrix.python-version }} + - name: Update pip + run: python -m pip install --upgrade pip + - name: Install pre-commit + run: | + python -m pip install pre-commit + - name: Run pre-commit checks + run: | + pre-commit run --all-files diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c34186bc..fe280afc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,13 +16,13 @@ repos: - id: check-added-large-files args: ['--maxkb=1000'] - # - repo: https://github.com/omnilib/ufmt - # rev: v2.3.0 - # hooks: - # - id: ufmt - # additional_dependencies: - # - black == 22.12.0 - # - usort == 1.0.5 + - repo: https://github.com/omnilib/ufmt + rev: v2.6.0 + hooks: + - id: ufmt + additional_dependencies: + - black == 24.4.2 + - usort == 1.0.5 - repo: https://github.com/PyCQA/flake8 rev: 7.1.0 From 3816bc1cb7d9171d239d44c7a6d85368dbaf9109 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 17 Jun 2024 09:59:18 -0700 Subject: [PATCH 3/3] Simplify build by creating a fake extension (#34) Summary: This PR simplifies our build setup from `setup.py`'s side: instead of creating one `Extension` object per `.so` file that we build, we declare only *one* fake Extension object and let CMake do all the work. This PR addresses https://www.internalfb.com/diff/D58585136?dst_version_fbid=386542291075917&transaction_fbid=3369755276656440. Pros: - We don't have to keep the .so files name in sync between CMake and setup.py. In D58585136 that proved to be challenging and brittle. - The CMake buid is called only once. This is simpler to debug, less surprising, and slightly faster - We can unblock D58585136 Cons: - We have to handle the "move extensions from their temp build dir back into where they're expected" logic ourselves - see comments for clarifications. We had a non-standard setup before, now we have another non-standard setup. Overall, I think we gained a little bit of simplicity with that setup, even though that made me write a lot of comments. And that will help us unblock D58585136. Pull Request resolved: https://github.com/pytorch-labs/torchcodec/pull/34 Reviewed By: ahmadsharif1 Differential Revision: D58646830 Pulled By: NicolasHug fbshipit-source-id: fac238ad78931ee923ba3af7d5f6be1d1930bd36 --- .github/workflows/wheel.yaml | 2 +- setup.py | 99 ++++++++++++-------- src/torchcodec/decoders/_core/CMakeLists.txt | 2 - 3 files changed, 61 insertions(+), 42 deletions(-) diff --git a/.github/workflows/wheel.yaml b/.github/workflows/wheel.yaml index 49c31cf3..568702eb 100644 --- a/.github/workflows/wheel.yaml +++ b/.github/workflows/wheel.yaml @@ -96,7 +96,7 @@ jobs: run: | wheel_path=`find dist -type f -name "*.whl"` echo Installing $wheel_path - python -m pip install $wheel_path + python -m pip install $wheel_path -vvv - name: Check out repo uses: actions/checkout@v3 diff --git a/setup.py b/setup.py index 2d7ceebe..73321c1a 100644 --- a/setup.py +++ b/setup.py @@ -49,6 +49,11 @@ class CMakeBuild(build_ext): + + def __init__(self, *args, **kwargs): + self._install_prefix = None + super().__init__(*args, **kwargs) + def run(self): try: subprocess.check_output(["cmake", "--version"]) @@ -57,13 +62,51 @@ def run(self): super().run() def build_extension(self, ext): - install_prefix = Path(self.get_ext_fullpath(ext.name)).parent.absolute() + """Call our CMake build system to build libtorchcodec?.so""" + # Setuptools was designed to build one extension (.so file) at a time, + # calling this method for each Extension object. We're using a + # CMake-based build where all our extensions are built together at once. + # If we were to declare one Extension object per .so file as in a + # standard setup, a) we'd have to keep the Extensions names in sync with + # the CMake targets, and b) we would be calling into CMake for every + # single extension: that's overkill and inefficient, since CMake builds + # all the extensions at once. To avoid all that we create a *single* + # fake Extension which triggers the CMake build only once. + assert ext.name == "FAKE_NAME", f"Unexpected extension name: {ext.name}" + # The price to pay for our non-standard setup is that we have to tell + # setuptools *where* those extensions are expected to be within the + # source tree (for sdists or editable installs) or within the wheel. + # Normally, setuptools relies on the extension's name to figure that + # out, e.g. an extension named `torchcodec.libtorchcodec.so` would be + # placed in `torchcodec/` and importable from `torchcodec.`. From that, + # setuptools knows how to move the extensions from their temp build + # directories back into the proper dir. + # Our fake extension's name is just a placeholder, so we have to handle + # that relocation logic ourselves. + # _install_prefix is the temp directory where the built extension(s) + # will be "installed" by CMake. Once they're copied to install_prefix, + # the built .so files still need to be copied back into: + # - the source tree (for editable installs) - this is handled in + # copy_extensions_to_source() + # - the (temp) wheel directory (when building a wheel). I cannot tell + # exactly *where* this is handled, but for this to work we must + # prepend the "/torchcodec" folder to _install_prefix: this tells + # setuptools to eventually move those .so files into `torchcodec/`. + # It may seem overkill to 'cmake install' the extensions in a temp + # directory and move them back to another dir, but this is what + # setuptools would do and expect even in a standard build setup. + self._install_prefix = ( + Path(self.get_ext_fullpath(ext.name)).parent.absolute() / "torchcodec" + ) + self._build_all_extensions_with_cmake() + + def _build_all_extensions_with_cmake(self): # Note that self.debug is True when you invoke setup.py like this: # python setup.py build_ext --debug install build_type = "Debug" if self.debug else "Release" torch_dir = Path(torch.utils.cmake_prefix_path) / "Torch" cmake_args = [ - f"-DCMAKE_INSTALL_PREFIX={install_prefix}", + f"-DCMAKE_INSTALL_PREFIX={self._install_prefix}", f"-DTorch_DIR={torch_dir}", "-DCMAKE_VERBOSE_MAKEFILE=ON", f"-DCMAKE_BUILD_TYPE={build_type}", @@ -77,17 +120,18 @@ def build_extension(self, ext): subprocess.check_call(["cmake", "--build", "."], cwd=self.build_temp) subprocess.check_call(["cmake", "--install", "."], cwd=self.build_temp) - def get_ext_filename(self, fullname): - # When copying the .so files from the build tmp dir to the actual - # package dir, this tells setuptools to look for a .so file without the - # Python ABI suffix, i.e. "libtorchcodec4.so" instead of e.g. - # "libtorchcodec4.cpython-38-x86_64-linux-gnu.so", which is what - # setuptools looks for by default. - ext_filename = super().get_ext_filename(fullname) - ext_filename_parts = ext_filename.split(".") - without_abi = ext_filename_parts[:-2] + ext_filename_parts[-1:] - ext_filename = ".".join(without_abi) - return ext_filename + def copy_extensions_to_source(self): + """Copy built extensions from temporary folder back into source tree. + + This is called by setuptools at the end of .run() during editable installs. + """ + self.get_finalized_command("build_py") + + for so_file in self._install_prefix.glob("*.so"): + assert "libtorchcodec" in so_file.name + destination = Path("src/torchcodec/") / so_file.name + print(f"Copying {so_file} to {destination}") + self.copy_file(so_file, destination, level=self.verbose) NOT_A_LICENSE_VIOLATION_VAR = "I_CONFIRM_THIS_IS_NOT_A_LICENSE_VIOLATION" @@ -105,29 +149,6 @@ def get_ext_filename(self, fullname): f"If you have a good reason *not* to, then set {NOT_A_LICENSE_VIOLATION_VAR}." ) -# If BUILD_AGAINST_ALL_FFMPEG_FROM_S3 is set then we want to build against all -# ffmpeg major version that are available on our S3 bucket. -# If BUILD_AGAINST_ALL_FFMPEG_FROM_S3 is not set, we only build against the -# installed FFmpeg. We don't know what FFmpeg version that is, so we build -# `libtorchcodec.so` without any version suffix. We could probably figure out -# the version number by invoking `pkg-config --modversion`. -FFMPEG_MAJOR_VERSIONS = (4, 5, 6, 7) if build_against_all_ffmpeg_from_s3 else ("",) -extensions = [ - Extension( - # The names here must be kept in sync with the target names in the - # CMakeLists file. Grep for [ LIBTORCHCODEC_KEEP_IN_SYNC ]. The name - # parameter specifies not just the name but mainly *where* the .so file - # should be copied to. By setting the name this way, we're telling - # `libtorchcodec{ffmpeg_major_version}.so` to be copied at the root of - # the torchcodec package (next to the __init__.py file). This is where - # it's expected to be when we call torch.ops.load_library(). - name=f"torchcodec.libtorchcodec{ffmpeg_major_version}", - # sources is a mandatory parameter so we have to pass it, but we leave - # it empty because we'll be building the extensions with our custom - # CMakeBuild class, and the sources are specified within the - # CMakeLists.txt file. - sources=[], - ) - for ffmpeg_major_version in FFMPEG_MAJOR_VERSIONS -] -setup(ext_modules=extensions, cmdclass={"build_ext": CMakeBuild}) +# See `CMakeBuild.build_extension()`. +fake_extension = Extension(name="FAKE_NAME", sources=[]) +setup(ext_modules=[fake_extension], cmdclass={"build_ext": CMakeBuild}) diff --git a/src/torchcodec/decoders/_core/CMakeLists.txt b/src/torchcodec/decoders/_core/CMakeLists.txt index 786dd3e5..39281b47 100644 --- a/src/torchcodec/decoders/_core/CMakeLists.txt +++ b/src/torchcodec/decoders/_core/CMakeLists.txt @@ -62,8 +62,6 @@ if(DEFINED ENV{BUILD_AGAINST_ALL_FFMPEG_FROM_S3}) ${CMAKE_CURRENT_SOURCE_DIR}/fetch_and_expose_non_gpl_ffmpeg_libs.cmake ) - # The libtorchcodec names here must be kept in sync with the library names - # in setup.py. Grep for [ LIBTORCHCODEC_KEEP_IN_SYNC ] make_torchcodec_library(libtorchcodec4 ffmpeg4) make_torchcodec_library(libtorchcodec5 ffmpeg5) make_torchcodec_library(libtorchcodec6 ffmpeg6)