diff --git a/.docker/Dockerfile b/.docker/Dockerfile
index b58c381d1..a5b0f4707 100644
--- a/.docker/Dockerfile
+++ b/.docker/Dockerfile
@@ -1,7 +1,7 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
+# SPDX-FileCopyrightText: 2024 James R. Barlow
# SPDX-License-Identifier: MPL-2.0
-FROM ubuntu:22.04 as base
+FROM ubuntu:24.04 AS base
ENV LANG=C.UTF-8
ENV TZ=UTC
@@ -9,19 +9,15 @@ RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selectio
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
- libqpdf-dev \
- zlib1g \
- liblept5
+ python-is-python3
-FROM base as builder
+FROM base AS builder
# Note we need leptonica here to build jbig2
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential autoconf automake libtool \
libleptonica-dev \
zlib1g-dev \
- python3-dev \
- python3-distutils \
libffi-dev \
ca-certificates \
curl \
@@ -29,15 +25,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libcairo2-dev \
pkg-config
-# Get the latest pip (Ubuntu version doesn't support manylinux2010)
-RUN \
- curl https://bootstrap.pypa.io/get-pip.py | python3
-
# Compile and install jbig2
# Needs libleptonica-dev, zlib1g-dev
RUN \
mkdir jbig2 \
- && curl -L https://github.com/agl/jbig2enc/archive/ea6a40a.tar.gz | \
+ && curl -L https://github.com/agl/jbig2enc/archive/c0141bf.tar.gz | \
tar xz -C jbig2 --strip-components=1 \
&& cd jbig2 \
&& ./autogen.sh && ./configure && make && make install \
@@ -48,23 +40,24 @@ COPY . /app
WORKDIR /app
-RUN pip3 install --no-cache-dir .[test,webservice,watcher]
+RUN curl -LsSf https://astral.sh/uv/0.4.27/install.sh | sh
+
+ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
+
+# Instead of restarting the shell, use uv directly from its installed location.
+RUN /root/.cargo/bin/uv sync --extra test --extra webservice --extra watcher
FROM base
-# For Tesseract 5
-RUN apt-get update && apt-get install -y --no-install-recommends \
- software-properties-common gpg-agent
+RUN apt-get update && apt-get install -y software-properties-common
+
RUN add-apt-repository -y ppa:alex-p/tesseract-ocr-devel
RUN apt-get update && apt-get install -y --no-install-recommends \
ghostscript \
fonts-droid-fallback \
jbig2dec \
- img2pdf \
- libsm6 libxext6 libxrender-dev \
pngquant \
- python-is-python3 \
tesseract-ocr \
tesseract-ocr-chi-sim \
tesseract-ocr-deu \
@@ -80,11 +73,9 @@ WORKDIR /app
COPY --from=builder /usr/local/lib/ /usr/local/lib/
COPY --from=builder /usr/local/bin/ /usr/local/bin/
-COPY --from=builder /app/misc/webservice.py /app/
-COPY --from=builder /app/misc/watcher.py /app/
+COPY --from=builder --chown=app:app /app /app
+
+ENV PATH="/app/.venv/bin:${PATH}"
-# Copy minimal project files to get the test suite.
-COPY --from=builder /app/pyproject.toml /app/README.md /app/
-COPY --from=builder /app/tests /app/tests
+ENTRYPOINT ["/app/.venv/bin/ocrmypdf"]
-ENTRYPOINT ["/usr/local/bin/ocrmypdf"]
diff --git a/.docker/Dockerfile.alpine b/.docker/Dockerfile.alpine
index 979a41054..cdecf87ac 100644
--- a/.docker/Dockerfile.alpine
+++ b/.docker/Dockerfile.alpine
@@ -8,7 +8,7 @@
# Details
# https://gitlab.alpinelinux.org/alpine/aports/-/issues/16143
# https://github.com/ocrmypdf/OCRmyPDF/issues/1395
-FROM alpine:3.19 as base
+FROM alpine:3.19 AS base
ENV LANG=C.UTF-8
ENV TZ=UTC
@@ -17,40 +17,24 @@ RUN apk add --no-cache \
python3 \
zlib
-FROM base as builder
+FROM base AS builder
RUN apk add --no-cache \
ca-certificates \
git \
python3-dev \
- py3-pip
-
-# On arm64, we need to build cffi from source.
-ARG TARGETPLATFORM
-
-RUN if [ "${TARGETPLATFORM}" == "linux/arm64" ]; then \
- apk add --no-cache \
- build-base \
- autoconf \
- automake \
- libtool \
- zlib-dev \
- libffi-dev \
- cairo-dev \
- pkgconfig \
- ; \
- fi
+ curl
COPY . /app
WORKDIR /app
-RUN python3 -m venv .venv
+RUN curl -LsSf https://astral.sh/uv/0.4.27/install.sh | sh
-RUN source .venv/bin/activate \
- && python3 -m pip install --no-cache-dir --upgrade pip \
- && python3 -m pip install --no-cache-dir wheel \
- && python3 -m pip install --no-cache-dir .[test,webservice,watcher]
+ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
+
+# Instead of restarting the shell, use uv directly from its installed location.
+RUN /root/.cargo/bin/uv sync --extra test --extra webservice --extra watcher
FROM base
@@ -73,17 +57,7 @@ RUN apk add --no-cache \
WORKDIR /app
-COPY --from=builder /usr/local/lib/ /usr/local/lib/
-COPY --from=builder /usr/local/bin/ /usr/local/bin/
-
-COPY --from=builder /app/.venv/ /app/.venv/
-
-COPY --from=builder /app/misc/webservice.py /app/
-COPY --from=builder /app/misc/watcher.py /app/
-
-# Copy minimal project files to get the test suite.
-COPY --from=builder /app/pyproject.toml /app/README.md /app/
-COPY --from=builder /app/tests /app/tests
+COPY --from=builder --chown=app:app /app /app
ENV PATH="/app/.venv/bin:${PATH}"
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index d88a87bf7..13816263f 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -19,26 +19,16 @@ jobs:
test_linux:
name: Test ${{ matrix.os }} with Python ${{ matrix.python }}
runs-on: ${{ matrix.os }}
- continue-on-error: ${{ matrix.experimental }}
strategy:
matrix:
- experimental: [false]
+ os: [ubuntu-22.04, ubuntu-24.04]
+ python: ["3.10", "3.11", "3.12", "3.13"]
include:
- os: ubuntu-22.04
+ tesseract_ppa: "ppa"
python: "3.10"
- - os: ubuntu-22.04
- python: "3.11"
- - os: ubuntu-22.04
- python: "3.10"
- tesseract5: true
- - os: ubuntu-latest
- python: "3.12"
- tesseract5: true
- - os: ubuntu-latest
+ - os: ubuntu-24.04
python: "pypy3.10"
- - os: ubuntu-22.04
- python: "3.13"
- experimental: true
env:
OS: ${{ matrix.os }}
@@ -49,15 +39,18 @@ jobs:
with:
fetch-depth: "0" # 0=all, needed for setuptools-scm to resolve version tags
- - uses: actions/setup-python@v5
- name: Setup Python
+ - name: Install uv
+ uses: astral-sh/setup-uv@v3
+ with:
+ version: "0.4.27"
+
+ - name: "Set up Python"
+ uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python }}
- cache: "pip"
- allow-prereleases: ${{ matrix.experimental }}
- - name: Install Tesseract 5
- if: matrix.tesseract5
+ - name: Install Tesseract from PPA
+ if: matrix.tesseract_ppa
run: |
sudo add-apt-repository -y ppa:alex-p/tesseract-ocr-devel
@@ -91,8 +84,7 @@ jobs:
- name: Install Python packages
run: |
- python -m pip install --upgrade pip wheel
- python -m pip install --prefer-binary .[test]
+ uv sync --extra test
- name: Report versions
run: |
@@ -100,11 +92,11 @@ jobs:
gs --version
pngquant --version
unpaper --version
- img2pdf --version
+ uv run img2pdf --version
- name: Test
run: |
- python -m pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/
+ uv run pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
@@ -121,7 +113,7 @@ jobs:
strategy:
matrix:
os: [macos-latest, macos-13] # macos-latest is arm64, macos-13 is x86_64
- python: ["3.10", "3.11", "3.12"]
+ python: ["3.10", "3.11", "3.12", "3.13"]
env:
OS: ${{ matrix.os }}
@@ -144,27 +136,30 @@ jobs:
pngquant \
tesseract
- - uses: actions/setup-python@v5
- name: Setup Python
+ - name: Install uv
+ uses: astral-sh/setup-uv@v3
+ with:
+ version: "0.4.27"
+
+ - name: "Set up Python"
+ uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python }}
- cache: "pip"
- name: Install Python packages
run: |
- python -m pip install --upgrade pip wheel
- python -m pip install --prefer-binary .[test]
+ uv sync --extra test
- name: Report versions
run: |
tesseract --version
gs --version
pngquant --version
- img2pdf --version
+ uv run img2pdf --version
- name: Test
run: |
- python -m pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/
+ uv run pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
@@ -180,7 +175,7 @@ jobs:
strategy:
matrix:
os: [windows-latest]
- python: ["3.10", "3.11", "3.12"]
+ python: ["3.10", "3.11", "3.12", "3.13"]
env:
OS: ${{ matrix.os }}
@@ -191,11 +186,15 @@ jobs:
with:
fetch-depth: "0" # 0=all, needed for setuptools-scm to resolve version tags
- - uses: actions/setup-python@v5
- name: Setup Python
+ - name: Install uv
+ uses: astral-sh/setup-uv@v3
+ with:
+ version: "0.4.27"
+
+ - name: "Set up Python"
+ uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python }}
- cache: "pip"
- name: Install system packages
run: |
@@ -204,12 +203,11 @@ jobs:
- name: Install Python packages
run: |
- python -m pip install --upgrade pip wheel
- python -m pip install --prefer-binary .[test]
+ uv sync --extra test
- name: Test
run: |
- python -m pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/
+ uv run pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
@@ -227,16 +225,14 @@ jobs:
with:
fetch-depth: "0" # 0=all, needed for setuptools-scm to resolve version tags
- - uses: actions/setup-python@v5
- name: Setup Python
+ - name: Install uv
+ uses: astral-sh/setup-uv@v3
with:
- python-version: "3.10"
- cache: "pip"
+ version: "0.4.27"
- name: Make wheels and sdist
run: |
- python -m pip install --upgrade pip wheel build
- python -m build --sdist --wheel
+ uv build --sdist --wheel
- uses: actions/upload-artifact@v4
with:
diff --git a/docs/docker.rst b/docs/docker.rst
index 419a3a623..16bc351db 100644
--- a/docs/docker.rst
+++ b/docs/docker.rst
@@ -65,7 +65,13 @@ The ``ocrmypdf`` image is also available, but is deprecated and will be removed
in the future.
OCRmyPDF will use all available CPU cores. See the Docker documentation for
-`adjusting memory and CPU on other platforms `__.
+`adjusting memory and CPU on other platforms `__
+if you are using Docker on macOS or Windows, where you may need to manually assign
+more resources. On Linux, all resources will be available automatically.
+
+The underlying operating system and other details in Docker images are subject
+to change at minor releases. If you are modifying the image, you should pin
+the version you intend to use.
Using the Docker image on the command line
==========================================
diff --git a/docs/release_notes.rst b/docs/release_notes.rst
index 38e31e976..f551fa455 100644
--- a/docs/release_notes.rst
+++ b/docs/release_notes.rst
@@ -30,6 +30,18 @@ OCRmyPDF typically supports the three most recent Python versions.
.. |OCRmyPDF PyPI| image:: https://img.shields.io/pypi/v/ocrmypdf.svg
+v16.6.0
+=======
+
+- Fixed Alpine Docker image by enforcing Alpine 3.19. Alpine 3.20 includes a
+ defective version of Tesseract OCR and so is not usable.
+- Upgraded Ubuntu Docker image to use Ubuntu 24.04.
+- Build and test scripts/actions switched to uv.
+- When running in a container, we now remind the user that temporary folders
+ are inside the container and may not be accessible.
+- Fixed an error detecting the version of unpaper 7.0.0.
+- Fixed Linux test coverage matrix, which was missing some key versions.
+
v16.5.0
=======
diff --git a/src/ocrmypdf/_exec/unpaper.py b/src/ocrmypdf/_exec/unpaper.py
index 9c9c21abc..a5a92f4c8 100644
--- a/src/ocrmypdf/_exec/unpaper.py
+++ b/src/ocrmypdf/_exec/unpaper.py
@@ -48,7 +48,7 @@ def __init__(
def version() -> Version:
- return Version(get_version('unpaper', regex=r'(?m).*(\d+(\.\d+)(\.\d+)?)'))
+ return Version(get_version('unpaper', regex=r'(?m).*?(\d+(\.\d+)(\.\d+)?)'))
@contextmanager
diff --git a/src/ocrmypdf/subprocess/__init__.py b/src/ocrmypdf/subprocess/__init__.py
index 0b7a1c53b..c9a0700f6 100644
--- a/src/ocrmypdf/subprocess/__init__.py
+++ b/src/ocrmypdf/subprocess/__init__.py
@@ -215,8 +215,10 @@ def get_version(
OLD_VERSION_REQUIRED_FOR = '''
OCRmyPDF requires '{program}' {need_version} or higher when run with the
-{required_for} arguments. If you omit these arguments, OCRmyPDF may be able to
-proceed. For best results, install the program.
+{required_for} arguments. {program} {found_version} is installed.
+
+If you omit these arguments, OCRmyPDF may be able to
+proceed. For best results, update the program.
'''
OSX_INSTALL_ADVICE = '''