diff --git a/.docker/Dockerfile b/.docker/Dockerfile index b58c381d1..a5b0f4707 100644 --- a/.docker/Dockerfile +++ b/.docker/Dockerfile @@ -1,7 +1,7 @@ -# SPDX-FileCopyrightText: 2022 James R. Barlow +# SPDX-FileCopyrightText: 2024 James R. Barlow # SPDX-License-Identifier: MPL-2.0 -FROM ubuntu:22.04 as base +FROM ubuntu:24.04 AS base ENV LANG=C.UTF-8 ENV TZ=UTC @@ -9,19 +9,15 @@ RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selectio RUN apt-get update && apt-get install -y --no-install-recommends \ python3 \ - libqpdf-dev \ - zlib1g \ - liblept5 + python-is-python3 -FROM base as builder +FROM base AS builder # Note we need leptonica here to build jbig2 RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential autoconf automake libtool \ libleptonica-dev \ zlib1g-dev \ - python3-dev \ - python3-distutils \ libffi-dev \ ca-certificates \ curl \ @@ -29,15 +25,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libcairo2-dev \ pkg-config -# Get the latest pip (Ubuntu version doesn't support manylinux2010) -RUN \ - curl https://bootstrap.pypa.io/get-pip.py | python3 - # Compile and install jbig2 # Needs libleptonica-dev, zlib1g-dev RUN \ mkdir jbig2 \ - && curl -L https://github.com/agl/jbig2enc/archive/ea6a40a.tar.gz | \ + && curl -L https://github.com/agl/jbig2enc/archive/c0141bf.tar.gz | \ tar xz -C jbig2 --strip-components=1 \ && cd jbig2 \ && ./autogen.sh && ./configure && make && make install \ @@ -48,23 +40,24 @@ COPY . /app WORKDIR /app -RUN pip3 install --no-cache-dir .[test,webservice,watcher] +RUN curl -LsSf https://astral.sh/uv/0.4.27/install.sh | sh + +ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy + +# Instead of restarting the shell, use uv directly from its installed location. +RUN /root/.cargo/bin/uv sync --extra test --extra webservice --extra watcher FROM base -# For Tesseract 5 -RUN apt-get update && apt-get install -y --no-install-recommends \ - software-properties-common gpg-agent +RUN apt-get update && apt-get install -y software-properties-common + RUN add-apt-repository -y ppa:alex-p/tesseract-ocr-devel RUN apt-get update && apt-get install -y --no-install-recommends \ ghostscript \ fonts-droid-fallback \ jbig2dec \ - img2pdf \ - libsm6 libxext6 libxrender-dev \ pngquant \ - python-is-python3 \ tesseract-ocr \ tesseract-ocr-chi-sim \ tesseract-ocr-deu \ @@ -80,11 +73,9 @@ WORKDIR /app COPY --from=builder /usr/local/lib/ /usr/local/lib/ COPY --from=builder /usr/local/bin/ /usr/local/bin/ -COPY --from=builder /app/misc/webservice.py /app/ -COPY --from=builder /app/misc/watcher.py /app/ +COPY --from=builder --chown=app:app /app /app + +ENV PATH="/app/.venv/bin:${PATH}" -# Copy minimal project files to get the test suite. -COPY --from=builder /app/pyproject.toml /app/README.md /app/ -COPY --from=builder /app/tests /app/tests +ENTRYPOINT ["/app/.venv/bin/ocrmypdf"] -ENTRYPOINT ["/usr/local/bin/ocrmypdf"] diff --git a/.docker/Dockerfile.alpine b/.docker/Dockerfile.alpine index 979a41054..cdecf87ac 100644 --- a/.docker/Dockerfile.alpine +++ b/.docker/Dockerfile.alpine @@ -8,7 +8,7 @@ # Details # https://gitlab.alpinelinux.org/alpine/aports/-/issues/16143 # https://github.com/ocrmypdf/OCRmyPDF/issues/1395 -FROM alpine:3.19 as base +FROM alpine:3.19 AS base ENV LANG=C.UTF-8 ENV TZ=UTC @@ -17,40 +17,24 @@ RUN apk add --no-cache \ python3 \ zlib -FROM base as builder +FROM base AS builder RUN apk add --no-cache \ ca-certificates \ git \ python3-dev \ - py3-pip - -# On arm64, we need to build cffi from source. -ARG TARGETPLATFORM - -RUN if [ "${TARGETPLATFORM}" == "linux/arm64" ]; then \ - apk add --no-cache \ - build-base \ - autoconf \ - automake \ - libtool \ - zlib-dev \ - libffi-dev \ - cairo-dev \ - pkgconfig \ - ; \ - fi + curl COPY . /app WORKDIR /app -RUN python3 -m venv .venv +RUN curl -LsSf https://astral.sh/uv/0.4.27/install.sh | sh -RUN source .venv/bin/activate \ - && python3 -m pip install --no-cache-dir --upgrade pip \ - && python3 -m pip install --no-cache-dir wheel \ - && python3 -m pip install --no-cache-dir .[test,webservice,watcher] +ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy + +# Instead of restarting the shell, use uv directly from its installed location. +RUN /root/.cargo/bin/uv sync --extra test --extra webservice --extra watcher FROM base @@ -73,17 +57,7 @@ RUN apk add --no-cache \ WORKDIR /app -COPY --from=builder /usr/local/lib/ /usr/local/lib/ -COPY --from=builder /usr/local/bin/ /usr/local/bin/ - -COPY --from=builder /app/.venv/ /app/.venv/ - -COPY --from=builder /app/misc/webservice.py /app/ -COPY --from=builder /app/misc/watcher.py /app/ - -# Copy minimal project files to get the test suite. -COPY --from=builder /app/pyproject.toml /app/README.md /app/ -COPY --from=builder /app/tests /app/tests +COPY --from=builder --chown=app:app /app /app ENV PATH="/app/.venv/bin:${PATH}" diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d88a87bf7..13816263f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -19,26 +19,16 @@ jobs: test_linux: name: Test ${{ matrix.os }} with Python ${{ matrix.python }} runs-on: ${{ matrix.os }} - continue-on-error: ${{ matrix.experimental }} strategy: matrix: - experimental: [false] + os: [ubuntu-22.04, ubuntu-24.04] + python: ["3.10", "3.11", "3.12", "3.13"] include: - os: ubuntu-22.04 + tesseract_ppa: "ppa" python: "3.10" - - os: ubuntu-22.04 - python: "3.11" - - os: ubuntu-22.04 - python: "3.10" - tesseract5: true - - os: ubuntu-latest - python: "3.12" - tesseract5: true - - os: ubuntu-latest + - os: ubuntu-24.04 python: "pypy3.10" - - os: ubuntu-22.04 - python: "3.13" - experimental: true env: OS: ${{ matrix.os }} @@ -49,15 +39,18 @@ jobs: with: fetch-depth: "0" # 0=all, needed for setuptools-scm to resolve version tags - - uses: actions/setup-python@v5 - name: Setup Python + - name: Install uv + uses: astral-sh/setup-uv@v3 + with: + version: "0.4.27" + + - name: "Set up Python" + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} - cache: "pip" - allow-prereleases: ${{ matrix.experimental }} - - name: Install Tesseract 5 - if: matrix.tesseract5 + - name: Install Tesseract from PPA + if: matrix.tesseract_ppa run: | sudo add-apt-repository -y ppa:alex-p/tesseract-ocr-devel @@ -91,8 +84,7 @@ jobs: - name: Install Python packages run: | - python -m pip install --upgrade pip wheel - python -m pip install --prefer-binary .[test] + uv sync --extra test - name: Report versions run: | @@ -100,11 +92,11 @@ jobs: gs --version pngquant --version unpaper --version - img2pdf --version + uv run img2pdf --version - name: Test run: | - python -m pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/ + uv run pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/ - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 @@ -121,7 +113,7 @@ jobs: strategy: matrix: os: [macos-latest, macos-13] # macos-latest is arm64, macos-13 is x86_64 - python: ["3.10", "3.11", "3.12"] + python: ["3.10", "3.11", "3.12", "3.13"] env: OS: ${{ matrix.os }} @@ -144,27 +136,30 @@ jobs: pngquant \ tesseract - - uses: actions/setup-python@v5 - name: Setup Python + - name: Install uv + uses: astral-sh/setup-uv@v3 + with: + version: "0.4.27" + + - name: "Set up Python" + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} - cache: "pip" - name: Install Python packages run: | - python -m pip install --upgrade pip wheel - python -m pip install --prefer-binary .[test] + uv sync --extra test - name: Report versions run: | tesseract --version gs --version pngquant --version - img2pdf --version + uv run img2pdf --version - name: Test run: | - python -m pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/ + uv run pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/ - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 @@ -180,7 +175,7 @@ jobs: strategy: matrix: os: [windows-latest] - python: ["3.10", "3.11", "3.12"] + python: ["3.10", "3.11", "3.12", "3.13"] env: OS: ${{ matrix.os }} @@ -191,11 +186,15 @@ jobs: with: fetch-depth: "0" # 0=all, needed for setuptools-scm to resolve version tags - - uses: actions/setup-python@v5 - name: Setup Python + - name: Install uv + uses: astral-sh/setup-uv@v3 + with: + version: "0.4.27" + + - name: "Set up Python" + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} - cache: "pip" - name: Install system packages run: | @@ -204,12 +203,11 @@ jobs: - name: Install Python packages run: | - python -m pip install --upgrade pip wheel - python -m pip install --prefer-binary .[test] + uv sync --extra test - name: Test run: | - python -m pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/ + uv run pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/ - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 @@ -227,16 +225,14 @@ jobs: with: fetch-depth: "0" # 0=all, needed for setuptools-scm to resolve version tags - - uses: actions/setup-python@v5 - name: Setup Python + - name: Install uv + uses: astral-sh/setup-uv@v3 with: - python-version: "3.10" - cache: "pip" + version: "0.4.27" - name: Make wheels and sdist run: | - python -m pip install --upgrade pip wheel build - python -m build --sdist --wheel + uv build --sdist --wheel - uses: actions/upload-artifact@v4 with: diff --git a/docs/docker.rst b/docs/docker.rst index 419a3a623..16bc351db 100644 --- a/docs/docker.rst +++ b/docs/docker.rst @@ -65,7 +65,13 @@ The ``ocrmypdf`` image is also available, but is deprecated and will be removed in the future. OCRmyPDF will use all available CPU cores. See the Docker documentation for -`adjusting memory and CPU on other platforms `__. +`adjusting memory and CPU on other platforms `__ +if you are using Docker on macOS or Windows, where you may need to manually assign +more resources. On Linux, all resources will be available automatically. + +The underlying operating system and other details in Docker images are subject +to change at minor releases. If you are modifying the image, you should pin +the version you intend to use. Using the Docker image on the command line ========================================== diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 38e31e976..f551fa455 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -30,6 +30,18 @@ OCRmyPDF typically supports the three most recent Python versions. .. |OCRmyPDF PyPI| image:: https://img.shields.io/pypi/v/ocrmypdf.svg +v16.6.0 +======= + +- Fixed Alpine Docker image by enforcing Alpine 3.19. Alpine 3.20 includes a + defective version of Tesseract OCR and so is not usable. +- Upgraded Ubuntu Docker image to use Ubuntu 24.04. +- Build and test scripts/actions switched to uv. +- When running in a container, we now remind the user that temporary folders + are inside the container and may not be accessible. +- Fixed an error detecting the version of unpaper 7.0.0. +- Fixed Linux test coverage matrix, which was missing some key versions. + v16.5.0 ======= diff --git a/src/ocrmypdf/_exec/unpaper.py b/src/ocrmypdf/_exec/unpaper.py index 9c9c21abc..a5a92f4c8 100644 --- a/src/ocrmypdf/_exec/unpaper.py +++ b/src/ocrmypdf/_exec/unpaper.py @@ -48,7 +48,7 @@ def __init__( def version() -> Version: - return Version(get_version('unpaper', regex=r'(?m).*(\d+(\.\d+)(\.\d+)?)')) + return Version(get_version('unpaper', regex=r'(?m).*?(\d+(\.\d+)(\.\d+)?)')) @contextmanager diff --git a/src/ocrmypdf/subprocess/__init__.py b/src/ocrmypdf/subprocess/__init__.py index 0b7a1c53b..c9a0700f6 100644 --- a/src/ocrmypdf/subprocess/__init__.py +++ b/src/ocrmypdf/subprocess/__init__.py @@ -215,8 +215,10 @@ def get_version( OLD_VERSION_REQUIRED_FOR = ''' OCRmyPDF requires '{program}' {need_version} or higher when run with the -{required_for} arguments. If you omit these arguments, OCRmyPDF may be able to -proceed. For best results, install the program. +{required_for} arguments. {program} {found_version} is installed. + +If you omit these arguments, OCRmyPDF may be able to +proceed. For best results, update the program. ''' OSX_INSTALL_ADVICE = '''