From f50620c2446f418e6e981064b8fcdc5d20cdf906 Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Sun, 27 Oct 2024 13:26:41 -0700 Subject: [PATCH 01/13] Convert to uv build --- .github/workflows/build.yml | 84 +++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 37 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d88a87bf7..a413e5f6e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -38,7 +38,6 @@ jobs: python: "pypy3.10" - os: ubuntu-22.04 python: "3.13" - experimental: true env: OS: ${{ matrix.os }} @@ -49,12 +48,15 @@ jobs: with: fetch-depth: "0" # 0=all, needed for setuptools-scm to resolve version tags - - uses: actions/setup-python@v5 - name: Setup Python + - name: Install uv + uses: astral-sh/setup-uv@v3 with: - python-version: ${{ matrix.python }} - cache: "pip" - allow-prereleases: ${{ matrix.experimental }} + version: "0.4.27" + + - name: "Set up Python" + uses: actions/setup-python@v5 + with: + python-version-file: ".python-version" - name: Install Tesseract 5 if: matrix.tesseract5 @@ -89,11 +91,6 @@ jobs: libxslt1-dev \ pypy3-dev - - name: Install Python packages - run: | - python -m pip install --upgrade pip wheel - python -m pip install --prefer-binary .[test] - - name: Report versions run: | tesseract --version @@ -102,9 +99,13 @@ jobs: unpaper --version img2pdf --version + - name: Install Python packages + run: | + uv sync --extra test + - name: Test run: | - python -m pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/ + uv run pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/ - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 @@ -144,17 +145,6 @@ jobs: pngquant \ tesseract - - uses: actions/setup-python@v5 - name: Setup Python - with: - python-version: ${{ matrix.python }} - cache: "pip" - - - name: Install Python packages - run: | - python -m pip install --upgrade pip wheel - python -m pip install --prefer-binary .[test] - - name: Report versions run: | tesseract --version @@ -162,9 +152,23 @@ jobs: pngquant --version img2pdf --version + - name: Install uv + uses: astral-sh/setup-uv@v3 + with: + version: "0.4.27" + + - name: "Set up Python" + uses: actions/setup-python@v5 + with: + python-version-file: ".python-version" + + - name: Install Python packages + run: | + uv sync --extra test + - name: Test run: | - python -m pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/ + uv run pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/ - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 @@ -191,11 +195,15 @@ jobs: with: fetch-depth: "0" # 0=all, needed for setuptools-scm to resolve version tags - - uses: actions/setup-python@v5 - name: Setup Python + - name: Install uv + uses: astral-sh/setup-uv@v3 + with: + version: "0.4.27" + + - name: "Set up Python" + uses: actions/setup-python@v5 with: - python-version: ${{ matrix.python }} - cache: "pip" + python-version-file: ".python-version" - name: Install system packages run: | @@ -204,12 +212,11 @@ jobs: - name: Install Python packages run: | - python -m pip install --upgrade pip wheel - python -m pip install --prefer-binary .[test] + uv sync --extra test - name: Test run: | - python -m pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/ + uv run pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/ - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 @@ -227,16 +234,19 @@ jobs: with: fetch-depth: "0" # 0=all, needed for setuptools-scm to resolve version tags - - uses: actions/setup-python@v5 - name: Setup Python + - name: Install uv + uses: astral-sh/setup-uv@v3 + with: + version: "0.4.27" + + - name: "Set up Python" + uses: actions/setup-python@v5 with: - python-version: "3.10" - cache: "pip" + python-version-file: ".python-version" - name: Make wheels and sdist run: | - python -m pip install --upgrade pip wheel build - python -m build --sdist --wheel + uv build --sdist --wheel - uses: actions/upload-artifact@v4 with: From e44bf46d7751a43b2fa8cd6e10d97b640bd06204 Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Sun, 27 Oct 2024 13:32:06 -0700 Subject: [PATCH 02/13] Fix img2pdf and python version --- .github/workflows/build.yml | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a413e5f6e..f18ab77e8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -56,7 +56,7 @@ jobs: - name: "Set up Python" uses: actions/setup-python@v5 with: - python-version-file: ".python-version" + python-version: ${{ matrix.python }} - name: Install Tesseract 5 if: matrix.tesseract5 @@ -91,17 +91,17 @@ jobs: libxslt1-dev \ pypy3-dev + - name: Install Python packages + run: | + uv sync --extra test + - name: Report versions run: | tesseract --version gs --version pngquant --version unpaper --version - img2pdf --version - - - name: Install Python packages - run: | - uv sync --extra test + uv run img2pdf --version - name: Test run: | @@ -145,13 +145,6 @@ jobs: pngquant \ tesseract - - name: Report versions - run: | - tesseract --version - gs --version - pngquant --version - img2pdf --version - - name: Install uv uses: astral-sh/setup-uv@v3 with: @@ -160,12 +153,19 @@ jobs: - name: "Set up Python" uses: actions/setup-python@v5 with: - python-version-file: ".python-version" + python-version: ${{ matrix.python }} - name: Install Python packages run: | uv sync --extra test + - name: Report versions + run: | + tesseract --version + gs --version + pngquant --version + uv run img2pdf --version + - name: Test run: | uv run pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/ @@ -203,7 +203,7 @@ jobs: - name: "Set up Python" uses: actions/setup-python@v5 with: - python-version-file: ".python-version" + python-version: ${{ matrix.python }} - name: Install system packages run: | @@ -239,11 +239,6 @@ jobs: with: version: "0.4.27" - - name: "Set up Python" - uses: actions/setup-python@v5 - with: - python-version-file: ".python-version" - - name: Make wheels and sdist run: | uv build --sdist --wheel From 23bc247b9c9424c9a73f870fa2ae42b913eb25b3 Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Sun, 27 Oct 2024 13:47:55 -0700 Subject: [PATCH 03/13] Improve Linux coverage matrix --- .github/workflows/build.yml | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f18ab77e8..19f3a05d0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -19,25 +19,16 @@ jobs: test_linux: name: Test ${{ matrix.os }} with Python ${{ matrix.python }} runs-on: ${{ matrix.os }} - continue-on-error: ${{ matrix.experimental }} strategy: matrix: - experimental: [false] + os: [ubuntu-22.04, ubuntu-24.04] + python: ["3.10", "3.11", "3.12", "3.13"] include: - os: ubuntu-22.04 + tesseract_ppa: true python: "3.10" - - os: ubuntu-22.04 - python: "3.11" - - os: ubuntu-22.04 - python: "3.10" - tesseract5: true - - os: ubuntu-latest - python: "3.12" - tesseract5: true - - os: ubuntu-latest + - os: ubuntu-24.04 python: "pypy3.10" - - os: ubuntu-22.04 - python: "3.13" env: OS: ${{ matrix.os }} @@ -58,8 +49,8 @@ jobs: with: python-version: ${{ matrix.python }} - - name: Install Tesseract 5 - if: matrix.tesseract5 + - name: Install Tesseract from PPA + if: matrix.tesseract_ppa run: | sudo add-apt-repository -y ppa:alex-p/tesseract-ocr-devel From 9d042767cc52abacf3e1a196ee469d699a8d839e Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Sun, 27 Oct 2024 14:04:45 -0700 Subject: [PATCH 04/13] Attempt to fix matrix and unpaper version error --- .github/workflows/build.yml | 6 +++--- src/ocrmypdf/subprocess/__init__.py | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 19f3a05d0..13816263f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -25,7 +25,7 @@ jobs: python: ["3.10", "3.11", "3.12", "3.13"] include: - os: ubuntu-22.04 - tesseract_ppa: true + tesseract_ppa: "ppa" python: "3.10" - os: ubuntu-24.04 python: "pypy3.10" @@ -113,7 +113,7 @@ jobs: strategy: matrix: os: [macos-latest, macos-13] # macos-latest is arm64, macos-13 is x86_64 - python: ["3.10", "3.11", "3.12"] + python: ["3.10", "3.11", "3.12", "3.13"] env: OS: ${{ matrix.os }} @@ -175,7 +175,7 @@ jobs: strategy: matrix: os: [windows-latest] - python: ["3.10", "3.11", "3.12"] + python: ["3.10", "3.11", "3.12", "3.13"] env: OS: ${{ matrix.os }} diff --git a/src/ocrmypdf/subprocess/__init__.py b/src/ocrmypdf/subprocess/__init__.py index 0b7a1c53b..c9a0700f6 100644 --- a/src/ocrmypdf/subprocess/__init__.py +++ b/src/ocrmypdf/subprocess/__init__.py @@ -215,8 +215,10 @@ def get_version( OLD_VERSION_REQUIRED_FOR = ''' OCRmyPDF requires '{program}' {need_version} or higher when run with the -{required_for} arguments. If you omit these arguments, OCRmyPDF may be able to -proceed. For best results, install the program. +{required_for} arguments. {program} {found_version} is installed. + +If you omit these arguments, OCRmyPDF may be able to +proceed. For best results, update the program. ''' OSX_INSTALL_ADVICE = ''' From fe4725658edddda428ea49aa2e9c85b3bf7c2a0c Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Sun, 27 Oct 2024 14:57:21 -0700 Subject: [PATCH 05/13] unpaper: fix regex --- src/ocrmypdf/_exec/unpaper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ocrmypdf/_exec/unpaper.py b/src/ocrmypdf/_exec/unpaper.py index 9c9c21abc..a5a92f4c8 100644 --- a/src/ocrmypdf/_exec/unpaper.py +++ b/src/ocrmypdf/_exec/unpaper.py @@ -48,7 +48,7 @@ def __init__( def version() -> Version: - return Version(get_version('unpaper', regex=r'(?m).*(\d+(\.\d+)(\.\d+)?)')) + return Version(get_version('unpaper', regex=r'(?m).*?(\d+(\.\d+)(\.\d+)?)')) @contextmanager From 1e87930bbb852377f5755e0060060ff0da7b679c Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Sun, 27 Oct 2024 15:32:46 -0700 Subject: [PATCH 06/13] Use uv to construct Alpine Docker image --- .docker/Dockerfile.alpine | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/.docker/Dockerfile.alpine b/.docker/Dockerfile.alpine index 979a41054..0befb9f9b 100644 --- a/.docker/Dockerfile.alpine +++ b/.docker/Dockerfile.alpine @@ -8,7 +8,7 @@ # Details # https://gitlab.alpinelinux.org/alpine/aports/-/issues/16143 # https://github.com/ocrmypdf/OCRmyPDF/issues/1395 -FROM alpine:3.19 as base +FROM alpine:3.19 AS base ENV LANG=C.UTF-8 ENV TZ=UTC @@ -17,13 +17,14 @@ RUN apk add --no-cache \ python3 \ zlib -FROM base as builder +FROM base AS builder RUN apk add --no-cache \ ca-certificates \ git \ python3-dev \ - py3-pip + py3-pip \ + curl # On arm64, we need to build cffi from source. ARG TARGETPLATFORM @@ -45,12 +46,12 @@ COPY . /app WORKDIR /app -RUN python3 -m venv .venv +RUN curl -LsSf https://astral.sh/uv/0.4.27/install.sh | sh -RUN source .venv/bin/activate \ - && python3 -m pip install --no-cache-dir --upgrade pip \ - && python3 -m pip install --no-cache-dir wheel \ - && python3 -m pip install --no-cache-dir .[test,webservice,watcher] +ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy + +# Instead of restarting the shell, use uv directly from its installed location. +RUN /root/.cargo/bin/uv sync --extra test --extra webservice --extra watcher FROM base @@ -73,17 +74,7 @@ RUN apk add --no-cache \ WORKDIR /app -COPY --from=builder /usr/local/lib/ /usr/local/lib/ -COPY --from=builder /usr/local/bin/ /usr/local/bin/ - -COPY --from=builder /app/.venv/ /app/.venv/ - -COPY --from=builder /app/misc/webservice.py /app/ -COPY --from=builder /app/misc/watcher.py /app/ - -# Copy minimal project files to get the test suite. -COPY --from=builder /app/pyproject.toml /app/README.md /app/ -COPY --from=builder /app/tests /app/tests +COPY --from=builder --chown=app:app /app /app ENV PATH="/app/.venv/bin:${PATH}" From f9b3e9a97b61e395111fe188f006f839093ade22 Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Sun, 26 May 2024 14:51:20 -0700 Subject: [PATCH 07/13] Add Dockerfile for Ubuntu 24.04 LTS --- .docker/Dockerfile.ubuntu24 | 64 +++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 .docker/Dockerfile.ubuntu24 diff --git a/.docker/Dockerfile.ubuntu24 b/.docker/Dockerfile.ubuntu24 new file mode 100644 index 000000000..4a6c41c4a --- /dev/null +++ b/.docker/Dockerfile.ubuntu24 @@ -0,0 +1,64 @@ +# SPDX-FileCopyrightText: 2024 James R. Barlow +# SPDX-License-Identifier: MPL-2.0 + +FROM ubuntu:24.04 as base + +ENV LANG=C.UTF-8 +ENV TZ=UTC +RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections + +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 \ + python-is-python3 + +FROM base as builder + +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3-pip \ + python3-venv \ + git + +COPY . /app + +WORKDIR /app + +RUN python -m venv .venv + +RUN . .venv/bin/activate \ + && python -m pip install --no-cache-dir .[test,webservice,watcher] + +FROM base + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ghostscript \ + fonts-droid-fallback \ + jbig2dec \ + pngquant \ + tesseract-ocr \ + tesseract-ocr-chi-sim \ + tesseract-ocr-deu \ + tesseract-ocr-eng \ + tesseract-ocr-fra \ + tesseract-ocr-por \ + tesseract-ocr-spa \ + unpaper \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +COPY --from=builder /usr/local/lib/ /usr/local/lib/ +COPY --from=builder /usr/local/bin/ /usr/local/bin/ + +COPY --from=builder /app/.venv/ /app/.venv/ + +COPY --from=builder /app/misc/webservice.py /app/ +COPY --from=builder /app/misc/watcher.py /app/ + +# Copy minimal project files to get the test suite. +COPY --from=builder /app/pyproject.toml /app/README.md /app/ +COPY --from=builder /app/tests /app/tests + +ENV PATH="/app/.venv/bin:${PATH}" + +ENTRYPOINT ["/app/.venv/bin/ocrmypdf"] + From 120ca72393c81c4aa8b0d2e1d3e92178683c3ca4 Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Sun, 27 Oct 2024 16:12:42 -0700 Subject: [PATCH 08/13] Update Ubuntu Dockerfile --- .docker/Dockerfile.ubuntu24 | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/.docker/Dockerfile.ubuntu24 b/.docker/Dockerfile.ubuntu24 index 4a6c41c4a..6df10e3ce 100644 --- a/.docker/Dockerfile.ubuntu24 +++ b/.docker/Dockerfile.ubuntu24 @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2024 James R. Barlow # SPDX-License-Identifier: MPL-2.0 -FROM ubuntu:24.04 as base +FROM ubuntu:24.04 AS base ENV LANG=C.UTF-8 ENV TZ=UTC @@ -11,21 +11,24 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ python3 \ python-is-python3 -FROM base as builder +FROM base AS builder RUN apt-get update && apt-get install -y --no-install-recommends \ python3-pip \ python3-venv \ - git + git \ + curl COPY . /app WORKDIR /app -RUN python -m venv .venv +RUN curl -LsSf https://astral.sh/uv/0.4.27/install.sh | sh -RUN . .venv/bin/activate \ - && python -m pip install --no-cache-dir .[test,webservice,watcher] +ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy + +# Instead of restarting the shell, use uv directly from its installed location. +RUN /root/.cargo/bin/uv sync --extra test --extra webservice --extra watcher FROM base @@ -46,17 +49,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ WORKDIR /app -COPY --from=builder /usr/local/lib/ /usr/local/lib/ -COPY --from=builder /usr/local/bin/ /usr/local/bin/ - -COPY --from=builder /app/.venv/ /app/.venv/ - -COPY --from=builder /app/misc/webservice.py /app/ -COPY --from=builder /app/misc/watcher.py /app/ - -# Copy minimal project files to get the test suite. -COPY --from=builder /app/pyproject.toml /app/README.md /app/ -COPY --from=builder /app/tests /app/tests +COPY --from=builder --chown=app:app /app /app ENV PATH="/app/.venv/bin:${PATH}" From 8138664287d9bd018745c5b2fa31d97a801d1809 Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Sun, 27 Oct 2024 16:14:08 -0700 Subject: [PATCH 09/13] Remove Ubuntu 22.04 container --- .docker/Dockerfile | 65 +++++++++---------------------------- .docker/Dockerfile.ubuntu24 | 57 -------------------------------- 2 files changed, 16 insertions(+), 106 deletions(-) delete mode 100644 .docker/Dockerfile.ubuntu24 diff --git a/.docker/Dockerfile b/.docker/Dockerfile index b58c381d1..6df10e3ce 100644 --- a/.docker/Dockerfile +++ b/.docker/Dockerfile @@ -1,7 +1,7 @@ -# SPDX-FileCopyrightText: 2022 James R. Barlow +# SPDX-FileCopyrightText: 2024 James R. Barlow # SPDX-License-Identifier: MPL-2.0 -FROM ubuntu:22.04 as base +FROM ubuntu:24.04 AS base ENV LANG=C.UTF-8 ENV TZ=UTC @@ -9,62 +9,34 @@ RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selectio RUN apt-get update && apt-get install -y --no-install-recommends \ python3 \ - libqpdf-dev \ - zlib1g \ - liblept5 + python-is-python3 -FROM base as builder +FROM base AS builder -# Note we need leptonica here to build jbig2 RUN apt-get update && apt-get install -y --no-install-recommends \ - build-essential autoconf automake libtool \ - libleptonica-dev \ - zlib1g-dev \ - python3-dev \ - python3-distutils \ - libffi-dev \ - ca-certificates \ - curl \ + python3-pip \ + python3-venv \ git \ - libcairo2-dev \ - pkg-config - -# Get the latest pip (Ubuntu version doesn't support manylinux2010) -RUN \ - curl https://bootstrap.pypa.io/get-pip.py | python3 - -# Compile and install jbig2 -# Needs libleptonica-dev, zlib1g-dev -RUN \ - mkdir jbig2 \ - && curl -L https://github.com/agl/jbig2enc/archive/ea6a40a.tar.gz | \ - tar xz -C jbig2 --strip-components=1 \ - && cd jbig2 \ - && ./autogen.sh && ./configure && make && make install \ - && cd .. \ - && rm -rf jbig2 + curl COPY . /app WORKDIR /app -RUN pip3 install --no-cache-dir .[test,webservice,watcher] +RUN curl -LsSf https://astral.sh/uv/0.4.27/install.sh | sh -FROM base +ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy -# For Tesseract 5 -RUN apt-get update && apt-get install -y --no-install-recommends \ - software-properties-common gpg-agent -RUN add-apt-repository -y ppa:alex-p/tesseract-ocr-devel +# Instead of restarting the shell, use uv directly from its installed location. +RUN /root/.cargo/bin/uv sync --extra test --extra webservice --extra watcher + +FROM base RUN apt-get update && apt-get install -y --no-install-recommends \ ghostscript \ fonts-droid-fallback \ jbig2dec \ - img2pdf \ - libsm6 libxext6 libxrender-dev \ pngquant \ - python-is-python3 \ tesseract-ocr \ tesseract-ocr-chi-sim \ tesseract-ocr-deu \ @@ -77,14 +49,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ WORKDIR /app -COPY --from=builder /usr/local/lib/ /usr/local/lib/ -COPY --from=builder /usr/local/bin/ /usr/local/bin/ +COPY --from=builder --chown=app:app /app /app -COPY --from=builder /app/misc/webservice.py /app/ -COPY --from=builder /app/misc/watcher.py /app/ +ENV PATH="/app/.venv/bin:${PATH}" -# Copy minimal project files to get the test suite. -COPY --from=builder /app/pyproject.toml /app/README.md /app/ -COPY --from=builder /app/tests /app/tests +ENTRYPOINT ["/app/.venv/bin/ocrmypdf"] -ENTRYPOINT ["/usr/local/bin/ocrmypdf"] diff --git a/.docker/Dockerfile.ubuntu24 b/.docker/Dockerfile.ubuntu24 deleted file mode 100644 index 6df10e3ce..000000000 --- a/.docker/Dockerfile.ubuntu24 +++ /dev/null @@ -1,57 +0,0 @@ -# SPDX-FileCopyrightText: 2024 James R. Barlow -# SPDX-License-Identifier: MPL-2.0 - -FROM ubuntu:24.04 AS base - -ENV LANG=C.UTF-8 -ENV TZ=UTC -RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections - -RUN apt-get update && apt-get install -y --no-install-recommends \ - python3 \ - python-is-python3 - -FROM base AS builder - -RUN apt-get update && apt-get install -y --no-install-recommends \ - python3-pip \ - python3-venv \ - git \ - curl - -COPY . /app - -WORKDIR /app - -RUN curl -LsSf https://astral.sh/uv/0.4.27/install.sh | sh - -ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy - -# Instead of restarting the shell, use uv directly from its installed location. -RUN /root/.cargo/bin/uv sync --extra test --extra webservice --extra watcher - -FROM base - -RUN apt-get update && apt-get install -y --no-install-recommends \ - ghostscript \ - fonts-droid-fallback \ - jbig2dec \ - pngquant \ - tesseract-ocr \ - tesseract-ocr-chi-sim \ - tesseract-ocr-deu \ - tesseract-ocr-eng \ - tesseract-ocr-fra \ - tesseract-ocr-por \ - tesseract-ocr-spa \ - unpaper \ - && rm -rf /var/lib/apt/lists/* - -WORKDIR /app - -COPY --from=builder --chown=app:app /app /app - -ENV PATH="/app/.venv/bin:${PATH}" - -ENTRYPOINT ["/app/.venv/bin/ocrmypdf"] - From 07f7c6b812e98db0e46f7d5e4ec0f6db8c54f24b Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Sun, 27 Oct 2024 16:33:37 -0700 Subject: [PATCH 10/13] Reinstate jbig2 building --- .docker/Dockerfile | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/.docker/Dockerfile b/.docker/Dockerfile index 6df10e3ce..a5b0f4707 100644 --- a/.docker/Dockerfile +++ b/.docker/Dockerfile @@ -13,11 +13,28 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ FROM base AS builder +# Note we need leptonica here to build jbig2 RUN apt-get update && apt-get install -y --no-install-recommends \ - python3-pip \ - python3-venv \ + build-essential autoconf automake libtool \ + libleptonica-dev \ + zlib1g-dev \ + libffi-dev \ + ca-certificates \ + curl \ git \ - curl + libcairo2-dev \ + pkg-config + +# Compile and install jbig2 +# Needs libleptonica-dev, zlib1g-dev +RUN \ + mkdir jbig2 \ + && curl -L https://github.com/agl/jbig2enc/archive/c0141bf.tar.gz | \ + tar xz -C jbig2 --strip-components=1 \ + && cd jbig2 \ + && ./autogen.sh && ./configure && make && make install \ + && cd .. \ + && rm -rf jbig2 COPY . /app @@ -32,6 +49,10 @@ RUN /root/.cargo/bin/uv sync --extra test --extra webservice --extra watcher FROM base +RUN apt-get update && apt-get install -y software-properties-common + +RUN add-apt-repository -y ppa:alex-p/tesseract-ocr-devel + RUN apt-get update && apt-get install -y --no-install-recommends \ ghostscript \ fonts-droid-fallback \ @@ -49,6 +70,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ WORKDIR /app +COPY --from=builder /usr/local/lib/ /usr/local/lib/ +COPY --from=builder /usr/local/bin/ /usr/local/bin/ + COPY --from=builder --chown=app:app /app /app ENV PATH="/app/.venv/bin:${PATH}" From 2463b91051cdf2c03ee2aebf724df4cedcc0eb73 Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Sun, 27 Oct 2024 16:46:57 -0700 Subject: [PATCH 11/13] Remove unneeded build steps from Alpine --- .docker/Dockerfile.alpine | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/.docker/Dockerfile.alpine b/.docker/Dockerfile.alpine index 0befb9f9b..cdecf87ac 100644 --- a/.docker/Dockerfile.alpine +++ b/.docker/Dockerfile.alpine @@ -23,25 +23,8 @@ RUN apk add --no-cache \ ca-certificates \ git \ python3-dev \ - py3-pip \ curl -# On arm64, we need to build cffi from source. -ARG TARGETPLATFORM - -RUN if [ "${TARGETPLATFORM}" == "linux/arm64" ]; then \ - apk add --no-cache \ - build-base \ - autoconf \ - automake \ - libtool \ - zlib-dev \ - libffi-dev \ - cairo-dev \ - pkgconfig \ - ; \ - fi - COPY . /app WORKDIR /app From 86a102f8e63d44b301b88b862debcb76ae9e301b Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Sun, 27 Oct 2024 16:49:54 -0700 Subject: [PATCH 12/13] Update docker docs --- docs/docker.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/docker.rst b/docs/docker.rst index 419a3a623..16bc351db 100644 --- a/docs/docker.rst +++ b/docs/docker.rst @@ -65,7 +65,13 @@ The ``ocrmypdf`` image is also available, but is deprecated and will be removed in the future. OCRmyPDF will use all available CPU cores. See the Docker documentation for -`adjusting memory and CPU on other platforms `__. +`adjusting memory and CPU on other platforms `__ +if you are using Docker on macOS or Windows, where you may need to manually assign +more resources. On Linux, all resources will be available automatically. + +The underlying operating system and other details in Docker images are subject +to change at minor releases. If you are modifying the image, you should pin +the version you intend to use. Using the Docker image on the command line ========================================== From 92c5200ad21c9e8a6105c10f4c740375bfd1a075 Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Sun, 27 Oct 2024 16:56:41 -0700 Subject: [PATCH 13/13] Update release notes for 16.6.0 --- docs/release_notes.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 38e31e976..f551fa455 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -30,6 +30,18 @@ OCRmyPDF typically supports the three most recent Python versions. .. |OCRmyPDF PyPI| image:: https://img.shields.io/pypi/v/ocrmypdf.svg +v16.6.0 +======= + +- Fixed Alpine Docker image by enforcing Alpine 3.19. Alpine 3.20 includes a + defective version of Tesseract OCR and so is not usable. +- Upgraded Ubuntu Docker image to use Ubuntu 24.04. +- Build and test scripts/actions switched to uv. +- When running in a container, we now remind the user that temporary folders + are inside the container and may not be accessible. +- Fixed an error detecting the version of unpaper 7.0.0. +- Fixed Linux test coverage matrix, which was missing some key versions. + v16.5.0 =======