Skip to content

Commit

Permalink
Use PyMuPDF wheels for non-ARM architectures.
Browse files Browse the repository at this point in the history
This removes the need to build the PyMuPDF project by ourselves, but
only when on non-ARM architectures since the wheels for these are not
provided yet.

Changes the `Dockerfile` and `build-image.py` script, introducing a new
`ARCH` flag to conditionally build the wheels.
  • Loading branch information
almet committed Sep 10, 2024
1 parent 2bd09e9 commit e4af44c
Show file tree
Hide file tree
Showing 4 changed files with 292 additions and 254 deletions.
16 changes: 14 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,23 @@
# Build PyMuPDF

FROM alpine:latest as pymupdf-build

ARG ARCH
ARG REQUIREMENTS_TXT

RUN mkdir -p /usr/lib/python3.12/site-packages/PyMuPDFb.libs
# Install PyMuPDF via hash-checked requirements file
COPY ${REQUIREMENTS_TXT} /tmp/requirements.txt
RUN apk --no-cache add linux-headers g++ linux-headers gcc make python3-dev py3-pip clang-dev

# PyMuPDF provides non-arm musl wheels only.
# Only install build-dependencies if we are actually building the wheel
RUN case "$ARCH" in \
"arm64") \
# This is required for copying later, but is created only in the pre-built wheels
mkdir -p /usr/lib/python3.12/site-packages/PyMuPDFb.libs/ \
&& apk --no-cache add linux-headers g++ linux-headers gcc make python3-dev py3-pip clang-dev ;; \
*) \
apk --no-cache add py3-pip ;; \
esac
RUN pip install -vv --break-system-packages --require-hashes -r /tmp/requirements.txt


Expand Down Expand Up @@ -63,6 +74,7 @@ RUN apk --no-cache -U upgrade && \

COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/fitz/ /usr/lib/python3.12/site-packages/fitz
COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/pymupdf/ /usr/lib/python3.12/site-packages/pymupdf
COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/PyMuPDFb.libs/ /usr/lib/python3.12/site-packages/PyMuPDFb.libs
COPY --from=tessdata-dl /usr/share/tessdata/ /usr/share/tessdata
COPY --from=h2orestart-dl /libreoffice_ext/ /libreoffice_ext

Expand Down
13 changes: 12 additions & 1 deletion install/common/build-image.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
elif platform.system() == "Linux":
CONTAINER_RUNTIME = "podman"

ARCH = platform.machine()


def main():
parser = argparse.ArgumentParser()
Expand All @@ -37,6 +39,8 @@ def main():
)
args = parser.parse_args()

print(f"Building for architecture '{ARCH}'")

print("Exporting container pip dependencies")
with ContainerPipDependencies():
print("Pulling base image")
Expand All @@ -57,6 +61,8 @@ def main():
BUILD_CONTEXT,
"--build-arg",
f"REQUIREMENTS_TXT={REQUIREMENTS_TXT}",
"--build-arg",
f"ARCH={ARCH}",
"-f",
"Dockerfile",
"--tag",
Expand Down Expand Up @@ -121,7 +127,12 @@ def __enter__(self):
# XXX Export container dependencies and exclude pymupdfb since it is not needed in container
req_txt_pymupdfb_stripped = container_requirements_txt.split("pymupdfb")[0]
with open(Path(BUILD_CONTEXT) / REQUIREMENTS_TXT, "w") as f:
f.write(req_txt_pymupdfb_stripped)
if ARCH == "arm64":
# PyMuPDF needs to be built on ARM64 machines
# But is already provided as a prebuilt-wheel on other architectures
f.write(req_txt_pymupdfb_stripped)
else:
f.write(container_requirements_txt)

def __exit__(self, exc_type, exc_value, exc_tb):
print("Leaving the context...")
Expand Down
Loading

0 comments on commit e4af44c

Please sign in to comment.