-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
4gac
committed
Dec 10, 2024
0 parents
commit 6845a6a
Showing
16 changed files
with
806 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
name: Code style | ||
on: [push, pull_request] | ||
jobs: | ||
code-style: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Check out the repo | ||
uses: actions/checkout@v4 | ||
- name: Check formatting | ||
uses: chartboost/ruff-action@v1 | ||
with: | ||
args: 'format --check' | ||
- name: Check coding style | ||
uses: chartboost/ruff-action@v1 | ||
with: | ||
args: 'check --select F,E,W,I,PLW' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
name: Build and Publish Docker Image | ||
|
||
on: | ||
push: | ||
branches: | ||
- master | ||
tags: | ||
- 'v*.*.*' # Matches version tags like v1.0.0 | ||
workflow_dispatch: | ||
|
||
env: | ||
DOCKER_HUB_NAMESPACE: pdfix | ||
DOCKER_HUB_REPOSITORY: table-summary-openai | ||
|
||
jobs: | ||
push_to_registry: | ||
name: Push Docker image to Docker Hub | ||
runs-on: ubuntu-latest | ||
permissions: | ||
packages: write | ||
contents: read | ||
attestations: write | ||
id-token: write | ||
steps: | ||
- name: Check out the repo | ||
uses: actions/checkout@v4 | ||
|
||
- name: Set up QEMU | ||
uses: docker/setup-qemu-action@v3 | ||
|
||
- name: Set up Docker Buildx | ||
uses: docker/setup-buildx-action@v3 | ||
|
||
- name: Log in to Docker Hub | ||
uses: docker/login-action@v2 | ||
with: | ||
username: ${{ secrets.DOCKER_USERNAME }} | ||
password: ${{ secrets.DOCKER_PASSWORD }} | ||
|
||
- name: Set Docker tag | ||
id: vars | ||
run: | | ||
if [[ $GITHUB_EVENT_NAME == 'workflow_dispatch' || $GITHUB_REF == refs/heads/master ]]; then | ||
echo "tag=latest" >> $GITHUB_ENV | ||
else | ||
echo "tag=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV | ||
fi | ||
- name: Update config.json version | ||
run: chmod +x update_version.sh && ./update_version.sh ${{ env.tag }} | ||
|
||
- name: Extract metadata (tags, labels) for Docker | ||
id: meta | ||
uses: docker/metadata-action@v4 | ||
with: | ||
images: ${{ env.DOCKER_HUB_NAMESPACE }}/${{ env.DOCKER_HUB_REPOSITORY }} | ||
tags: ${{ env.tag }} | ||
|
||
- name: Build and push Docker image | ||
id: push | ||
uses: docker/build-push-action@v6 | ||
with: | ||
platforms: linux/amd64 #,linux/arm64 | ||
context: . | ||
file: ./Dockerfile | ||
push: true | ||
tags: ${{ steps.meta.outputs.tags }} | ||
labels: ${{ steps.meta.outputs.labels }} | ||
|
||
# - name: Generate artifact attestation | ||
# uses: actions/attest-build-provenance@v1 | ||
# with: | ||
# subject-name: index.docker.io/${{ env.DOCKER_HUB_NAMESPACE }}/${{ env.DOCKER_HUB_REPOSITORY }}:${{ env.tag }} | ||
# subject-digest: ${{ steps.push.outputs.digest }} | ||
# push-to-registry: true | ||
- name: Docker Hub Description | ||
uses: peter-evans/dockerhub-description@v4 | ||
with: | ||
username: ${{ secrets.DOCKER_USERNAME }} | ||
password: ${{ secrets.DOCKER_PASSWORD }} | ||
repository: ${{ env.DOCKER_HUB_NAMESPACE }}/${{ env.DOCKER_HUB_REPOSITORY }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
.vscode | ||
|
||
build/ | ||
dist/ | ||
env/ | ||
venv/ | ||
.env/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# Use the official Debian slim image as a base | ||
FROM debian:bookworm-slim | ||
|
||
# Install necessary dependencies | ||
RUN apt-get update && \ | ||
apt-get install -y \ | ||
python3 \ | ||
python3-pip \ | ||
python3-venv \ | ||
&& apt-get clean \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
WORKDIR /usr/tab-sum-openai/ | ||
|
||
ENV VIRTUAL_ENV=venv | ||
|
||
|
||
# Create a virtual environment and install dependencies | ||
RUN python3 -m venv venv | ||
ENV PATH="$VIRTUAL_ENV/bin:$PATH" | ||
|
||
|
||
# Copy the source code and requirements.txt into the container | ||
COPY src/ /usr/tab-sum-openai/src/ | ||
COPY requirements.txt /usr/tab-sum-openai/ | ||
COPY config.json /usr/tab-sum-openai/ | ||
|
||
|
||
RUN pip install --no-cache-dir -r requirements.txt | ||
|
||
|
||
ENTRYPOINT ["/usr/tab-sum-openai/venv/bin/python3", "/usr/tab-sum-openai/src/main.py"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# Alt Text Generator OpenAI | ||
|
||
A Docker image that automatically generates and applies alternative image descriptions to PDF files using PDFix SDK and OpenAI. | ||
|
||
## Table of Contents | ||
|
||
- [Alt Text Generator OpenAI](#alt-text-generator-openai) | ||
- [Table of Contents](#table-of-contents) | ||
- [Getting Started](#getting-started) | ||
- [Run using Command Line Interface](#run-using-command-line-interface) | ||
- [Run Description Generation using REST API](#run-description-generation-using-rest-api) | ||
- [Exporting Configuration for Integration](#exporting-configuration-for-integration) | ||
- [License \& libraries used](#license--libraries-used) | ||
- [Help \& Support](#help--support) | ||
|
||
|
||
## Getting Started | ||
|
||
To use this Docker application, you'll need to have Docker installed on your system. If Docker is not installed, please follow the instructions on the [official Docker website](https://docs.docker.com/get-docker/) to install it. | ||
|
||
|
||
## Run using Command Line Interface | ||
|
||
To run the docker container as CLI, you should share the folder containing the PDF for processing using the `-i` parameter. In this example, the current folder is used. | ||
|
||
```bash | ||
docker run -v $(pwd):/data/ -w /data pdfix/alt-text-openai:latest detect -i document.pdf -o out.pdf --tags "Figure|Formula" --openai <api_key> --lang English --overwrite true | ||
``` | ||
|
||
With an account-based PDFix License add these arguments. | ||
```bash | ||
--name ${LICENSE_NAME} --key ${LICENSE_KEY} | ||
``` | ||
Contact support for more infomation. | ||
|
||
First run will pull the docker image, which may take some time. Make your own image for more advanced use. | ||
|
||
For more detailed information about the available command-line arguments, you can run the following command: | ||
|
||
```bash | ||
docker run --rm pdfix/alt-text-openai:latest --help | ||
``` | ||
|
||
## Run Description Generation using REST API | ||
Comming soon. Please contact us. | ||
|
||
### Exporting Configuration for Integration | ||
To export the configuration JSON file, use the following command: | ||
```bash | ||
docker run -v $(pwd):/data -w /data --rm pdfix/alt-text-openai:latest config -o config.json | ||
``` | ||
|
||
## License & libraries used | ||
- PDFix SDK - https://pdfix.net/terms | ||
- OpenAI API - https://openai.com/policies/ | ||
|
||
Trial version of the PDFix SDK may apply a watermark on the page and redact random parts of the PDF including the scanned image in background. Contact us to get an evaluation or production license. | ||
|
||
## Help & Support | ||
To obtain a PDFix SDK license or report an issue please contact us at support@pdfix.net. | ||
For more information visit https://pdfix.net | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
{ | ||
"actions": [ | ||
{ | ||
"name": "Table Summary Generator OpenAI", | ||
"desc": "A Docker image that automatically generates and applies table summary to PDF files using PDFix SDK and OpenAI", | ||
"version": { | ||
"major": 1, | ||
"minor": 0 | ||
}, | ||
"icon": "add_table_summary", | ||
"category": "Tags", | ||
"configurations": [ | ||
{ | ||
"program": "docker run --platform linux/amd64 -v \"${working_directory}:/data\" -w /data --rm pdfix/table-summary-openai:latest --name \"${license_name}\" --key \"${license_key}\" detect -i \"${input_pdf}\" -o \"${output_pdf}\" --tags \"${tag_name}\" --openai \"${openai_key}\" --lang \"${lang}\" --overwrite \"${overwrite}\"", | ||
"name": "Generate Table Summary for all specified Tags", | ||
"desc": "Generate Table Summary for all specified Tags" | ||
} | ||
], | ||
"args": [ | ||
{ | ||
"name": "input_pdf", | ||
"desc": "Input PDF file", | ||
"flags": 2, | ||
"type": "file_path", | ||
"ext": "pdf", | ||
"value": "" | ||
}, | ||
{ | ||
"name": "output_pdf", | ||
"desc": "Output PDF file", | ||
"flags": 4, | ||
"type": "file_path", | ||
"ext": "pdf", | ||
"value": "" | ||
}, | ||
{ | ||
"title": "OpenAI API Key", | ||
"name": "openai_key", | ||
"desc": "OpenAI API Key", | ||
"type": "string", | ||
"value": "" | ||
}, | ||
{ | ||
"title": "Tag Name", | ||
"name": "tag_name", | ||
"desc": "Tag name defined by a regular expression", | ||
"type": "string", | ||
"value": "Table" | ||
}, | ||
{ | ||
"title": "Language", | ||
"name": "lang", | ||
"desc": "Alternate description language", | ||
"type": "string", | ||
"value": "English", | ||
"set": [ | ||
{ | ||
"value": "English" | ||
}, | ||
{ | ||
"value": "Deutsch" | ||
}, | ||
{ | ||
"value": "Español" | ||
}, | ||
{ | ||
"value": "Français" | ||
}, | ||
{ | ||
"value": "Italiano" | ||
}, | ||
{ | ||
"value": "Português" | ||
}, | ||
{ | ||
"value": "Nederlands" | ||
}, | ||
{ | ||
"value": "Polski" | ||
}, | ||
{ | ||
"value": "Русский" | ||
}, | ||
{ | ||
"value": "中文" | ||
}, | ||
{ | ||
"value": "日本語" | ||
}, | ||
{ | ||
"value": "한국어" | ||
}, | ||
{ | ||
"value": "Türkçe" | ||
}, | ||
{ | ||
"value": "Svenska" | ||
}, | ||
{ | ||
"value": "Norsk" | ||
}, | ||
{ | ||
"value": "Suomi" | ||
}, | ||
{ | ||
"value": "Čeština" | ||
}, | ||
{ | ||
"value": "Magyar" | ||
}, | ||
{ | ||
"value": "Slovenčina" | ||
}, | ||
{ | ||
"value": "Українська" | ||
}, | ||
{ | ||
"value": "Ελληνικά" | ||
}, | ||
{ | ||
"value": "Română" | ||
} | ||
] | ||
}, | ||
{ | ||
"title": "Overwrite", | ||
"desc": "Replace the alternate text if it exists", | ||
"name": "overwrite", | ||
"type": "bool", | ||
"value": false | ||
} | ||
] | ||
} | ||
] | ||
} |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Oops, something went wrong.