Skip to content

Commit

Permalink
Merge branch 'feature/docker-ubuntu-24'
Browse files Browse the repository at this point in the history
  • Loading branch information
jbarlow83 committed Oct 28, 2024
2 parents 6c6cbfd + 92c5200 commit bf62562
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 110 deletions.
43 changes: 17 additions & 26 deletions .docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,43 +1,35 @@
# SPDX-FileCopyrightText: 2022 James R. Barlow
# SPDX-FileCopyrightText: 2024 James R. Barlow
# SPDX-License-Identifier: MPL-2.0

FROM ubuntu:22.04 as base
FROM ubuntu:24.04 AS base

ENV LANG=C.UTF-8
ENV TZ=UTC
RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections

RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
libqpdf-dev \
zlib1g \
liblept5
python-is-python3

FROM base as builder
FROM base AS builder

# Note we need leptonica here to build jbig2
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential autoconf automake libtool \
libleptonica-dev \
zlib1g-dev \
python3-dev \
python3-distutils \
libffi-dev \
ca-certificates \
curl \
git \
libcairo2-dev \
pkg-config

# Get the latest pip (Ubuntu version doesn't support manylinux2010)
RUN \
curl https://bootstrap.pypa.io/get-pip.py | python3

# Compile and install jbig2
# Needs libleptonica-dev, zlib1g-dev
RUN \
mkdir jbig2 \
&& curl -L https://github.com/agl/jbig2enc/archive/ea6a40a.tar.gz | \
&& curl -L https://github.com/agl/jbig2enc/archive/c0141bf.tar.gz | \
tar xz -C jbig2 --strip-components=1 \
&& cd jbig2 \
&& ./autogen.sh && ./configure && make && make install \
Expand All @@ -48,23 +40,24 @@ COPY . /app

WORKDIR /app

RUN pip3 install --no-cache-dir .[test,webservice,watcher]
RUN curl -LsSf https://astral.sh/uv/0.4.27/install.sh | sh

ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy

# Instead of restarting the shell, use uv directly from its installed location.
RUN /root/.cargo/bin/uv sync --extra test --extra webservice --extra watcher

FROM base

# For Tesseract 5
RUN apt-get update && apt-get install -y --no-install-recommends \
software-properties-common gpg-agent
RUN apt-get update && apt-get install -y software-properties-common

RUN add-apt-repository -y ppa:alex-p/tesseract-ocr-devel

RUN apt-get update && apt-get install -y --no-install-recommends \
ghostscript \
fonts-droid-fallback \
jbig2dec \
img2pdf \
libsm6 libxext6 libxrender-dev \
pngquant \
python-is-python3 \
tesseract-ocr \
tesseract-ocr-chi-sim \
tesseract-ocr-deu \
Expand All @@ -80,11 +73,9 @@ WORKDIR /app
COPY --from=builder /usr/local/lib/ /usr/local/lib/
COPY --from=builder /usr/local/bin/ /usr/local/bin/

COPY --from=builder /app/misc/webservice.py /app/
COPY --from=builder /app/misc/watcher.py /app/
COPY --from=builder --chown=app:app /app /app

ENV PATH="/app/.venv/bin:${PATH}"

# Copy minimal project files to get the test suite.
COPY --from=builder /app/pyproject.toml /app/README.md /app/
COPY --from=builder /app/tests /app/tests
ENTRYPOINT ["/app/.venv/bin/ocrmypdf"]

ENTRYPOINT ["/usr/local/bin/ocrmypdf"]
44 changes: 9 additions & 35 deletions .docker/Dockerfile.alpine
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# Details
# https://gitlab.alpinelinux.org/alpine/aports/-/issues/16143
# https://github.com/ocrmypdf/OCRmyPDF/issues/1395
FROM alpine:3.19 as base
FROM alpine:3.19 AS base

ENV LANG=C.UTF-8
ENV TZ=UTC
Expand All @@ -17,40 +17,24 @@ RUN apk add --no-cache \
python3 \
zlib

FROM base as builder
FROM base AS builder

RUN apk add --no-cache \
ca-certificates \
git \
python3-dev \
py3-pip

# On arm64, we need to build cffi from source.
ARG TARGETPLATFORM

RUN if [ "${TARGETPLATFORM}" == "linux/arm64" ]; then \
apk add --no-cache \
build-base \
autoconf \
automake \
libtool \
zlib-dev \
libffi-dev \
cairo-dev \
pkgconfig \
; \
fi
curl

COPY . /app

WORKDIR /app

RUN python3 -m venv .venv
RUN curl -LsSf https://astral.sh/uv/0.4.27/install.sh | sh

RUN source .venv/bin/activate \
&& python3 -m pip install --no-cache-dir --upgrade pip \
&& python3 -m pip install --no-cache-dir wheel \
&& python3 -m pip install --no-cache-dir .[test,webservice,watcher]
ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy

# Instead of restarting the shell, use uv directly from its installed location.
RUN /root/.cargo/bin/uv sync --extra test --extra webservice --extra watcher

FROM base

Expand All @@ -73,17 +57,7 @@ RUN apk add --no-cache \

WORKDIR /app

COPY --from=builder /usr/local/lib/ /usr/local/lib/
COPY --from=builder /usr/local/bin/ /usr/local/bin/

COPY --from=builder /app/.venv/ /app/.venv/

COPY --from=builder /app/misc/webservice.py /app/
COPY --from=builder /app/misc/watcher.py /app/

# Copy minimal project files to get the test suite.
COPY --from=builder /app/pyproject.toml /app/README.md /app/
COPY --from=builder /app/tests /app/tests
COPY --from=builder --chown=app:app /app /app

ENV PATH="/app/.venv/bin:${PATH}"

Expand Down
86 changes: 41 additions & 45 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,16 @@ jobs:
test_linux:
name: Test ${{ matrix.os }} with Python ${{ matrix.python }}
runs-on: ${{ matrix.os }}
continue-on-error: ${{ matrix.experimental }}
strategy:
matrix:
experimental: [false]
os: [ubuntu-22.04, ubuntu-24.04]
python: ["3.10", "3.11", "3.12", "3.13"]
include:
- os: ubuntu-22.04
tesseract_ppa: "ppa"
python: "3.10"
- os: ubuntu-22.04
python: "3.11"
- os: ubuntu-22.04
python: "3.10"
tesseract5: true
- os: ubuntu-latest
python: "3.12"
tesseract5: true
- os: ubuntu-latest
- os: ubuntu-24.04
python: "pypy3.10"
- os: ubuntu-22.04
python: "3.13"
experimental: true

env:
OS: ${{ matrix.os }}
Expand All @@ -49,15 +39,18 @@ jobs:
with:
fetch-depth: "0" # 0=all, needed for setuptools-scm to resolve version tags

- uses: actions/setup-python@v5
name: Setup Python
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "0.4.27"

- name: "Set up Python"
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python }}
cache: "pip"
allow-prereleases: ${{ matrix.experimental }}

- name: Install Tesseract 5
if: matrix.tesseract5
- name: Install Tesseract from PPA
if: matrix.tesseract_ppa
run: |
sudo add-apt-repository -y ppa:alex-p/tesseract-ocr-devel
Expand Down Expand Up @@ -91,20 +84,19 @@ jobs:
- name: Install Python packages
run: |
python -m pip install --upgrade pip wheel
python -m pip install --prefer-binary .[test]
uv sync --extra test
- name: Report versions
run: |
tesseract --version
gs --version
pngquant --version
unpaper --version
img2pdf --version
uv run img2pdf --version
- name: Test
run: |
python -m pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/
uv run pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
Expand All @@ -121,7 +113,7 @@ jobs:
strategy:
matrix:
os: [macos-latest, macos-13] # macos-latest is arm64, macos-13 is x86_64
python: ["3.10", "3.11", "3.12"]
python: ["3.10", "3.11", "3.12", "3.13"]

env:
OS: ${{ matrix.os }}
Expand All @@ -144,27 +136,30 @@ jobs:
pngquant \
tesseract
- uses: actions/setup-python@v5
name: Setup Python
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "0.4.27"

- name: "Set up Python"
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python }}
cache: "pip"

- name: Install Python packages
run: |
python -m pip install --upgrade pip wheel
python -m pip install --prefer-binary .[test]
uv sync --extra test
- name: Report versions
run: |
tesseract --version
gs --version
pngquant --version
img2pdf --version
uv run img2pdf --version
- name: Test
run: |
python -m pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/
uv run pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
Expand All @@ -180,7 +175,7 @@ jobs:
strategy:
matrix:
os: [windows-latest]
python: ["3.10", "3.11", "3.12"]
python: ["3.10", "3.11", "3.12", "3.13"]

env:
OS: ${{ matrix.os }}
Expand All @@ -191,11 +186,15 @@ jobs:
with:
fetch-depth: "0" # 0=all, needed for setuptools-scm to resolve version tags

- uses: actions/setup-python@v5
name: Setup Python
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "0.4.27"

- name: "Set up Python"
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python }}
cache: "pip"

- name: Install system packages
run: |
Expand All @@ -204,12 +203,11 @@ jobs:
- name: Install Python packages
run: |
python -m pip install --upgrade pip wheel
python -m pip install --prefer-binary .[test]
uv sync --extra test
- name: Test
run: |
python -m pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/
uv run pytest --cov-report xml --cov=ocrmypdf --cov=tests/ -n0 tests/
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
Expand All @@ -227,16 +225,14 @@ jobs:
with:
fetch-depth: "0" # 0=all, needed for setuptools-scm to resolve version tags

- uses: actions/setup-python@v5
name: Setup Python
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
python-version: "3.10"
cache: "pip"
version: "0.4.27"

- name: Make wheels and sdist
run: |
python -m pip install --upgrade pip wheel build
python -m build --sdist --wheel
uv build --sdist --wheel
- uses: actions/upload-artifact@v4
with:
Expand Down
8 changes: 7 additions & 1 deletion docs/docker.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,13 @@ The ``ocrmypdf`` image is also available, but is deprecated and will be removed
in the future.

OCRmyPDF will use all available CPU cores. See the Docker documentation for
`adjusting memory and CPU on other platforms <https://docs.docker.com/config/containers/resource_constraints/>`__.
`adjusting memory and CPU on other platforms <https://docs.docker.com/config/containers/resource_constraints/>`__
if you are using Docker on macOS or Windows, where you may need to manually assign
more resources. On Linux, all resources will be available automatically.

The underlying operating system and other details in Docker images are subject
to change at minor releases. If you are modifying the image, you should pin
the version you intend to use.

Using the Docker image on the command line
==========================================
Expand Down
12 changes: 12 additions & 0 deletions docs/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,18 @@ OCRmyPDF typically supports the three most recent Python versions.

.. |OCRmyPDF PyPI| image:: https://img.shields.io/pypi/v/ocrmypdf.svg

v16.6.0
=======

- Fixed Alpine Docker image by enforcing Alpine 3.19. Alpine 3.20 includes a
defective version of Tesseract OCR and so is not usable.
- Upgraded Ubuntu Docker image to use Ubuntu 24.04.
- Build and test scripts/actions switched to uv.
- When running in a container, we now remind the user that temporary folders
are inside the container and may not be accessible.
- Fixed an error detecting the version of unpaper 7.0.0.
- Fixed Linux test coverage matrix, which was missing some key versions.

v16.5.0
=======

Expand Down
Loading

0 comments on commit bf62562

Please sign in to comment.