Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
4gac committed Dec 10, 2024
0 parents commit 6845a6a
Show file tree
Hide file tree
Showing 16 changed files with 806 additions and 0 deletions.
16 changes: 16 additions & 0 deletions .github/workflows/code-style.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: Code style
on: [push, pull_request]
jobs:
code-style:
runs-on: ubuntu-latest
steps:
- name: Check out the repo
uses: actions/checkout@v4
- name: Check formatting
uses: chartboost/ruff-action@v1
with:
args: 'format --check'
- name: Check coding style
uses: chartboost/ruff-action@v1
with:
args: 'check --select F,E,W,I,PLW'
81 changes: 81 additions & 0 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
name: Build and Publish Docker Image

on:
push:
branches:
- master
tags:
- 'v*.*.*' # Matches version tags like v1.0.0
workflow_dispatch:

env:
DOCKER_HUB_NAMESPACE: pdfix
DOCKER_HUB_REPOSITORY: table-summary-openai

jobs:
push_to_registry:
name: Push Docker image to Docker Hub
runs-on: ubuntu-latest
permissions:
packages: write
contents: read
attestations: write
id-token: write
steps:
- name: Check out the repo
uses: actions/checkout@v4

- name: Set up QEMU
uses: docker/setup-qemu-action@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Log in to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Set Docker tag
id: vars
run: |
if [[ $GITHUB_EVENT_NAME == 'workflow_dispatch' || $GITHUB_REF == refs/heads/master ]]; then
echo "tag=latest" >> $GITHUB_ENV
else
echo "tag=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
fi
- name: Update config.json version
run: chmod +x update_version.sh && ./update_version.sh ${{ env.tag }}

- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ env.DOCKER_HUB_NAMESPACE }}/${{ env.DOCKER_HUB_REPOSITORY }}
tags: ${{ env.tag }}

- name: Build and push Docker image
id: push
uses: docker/build-push-action@v6
with:
platforms: linux/amd64 #,linux/arm64
context: .
file: ./Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

# - name: Generate artifact attestation
# uses: actions/attest-build-provenance@v1
# with:
# subject-name: index.docker.io/${{ env.DOCKER_HUB_NAMESPACE }}/${{ env.DOCKER_HUB_REPOSITORY }}:${{ env.tag }}
# subject-digest: ${{ steps.push.outputs.digest }}
# push-to-registry: true
- name: Docker Hub Description
uses: peter-evans/dockerhub-description@v4
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
repository: ${{ env.DOCKER_HUB_NAMESPACE }}/${{ env.DOCKER_HUB_REPOSITORY }}
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Byte-compiled / optimized / DLL files
__pycache__/
.vscode

build/
dist/
env/
venv/
.env/
32 changes: 32 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Use the official Debian slim image as a base
FROM debian:bookworm-slim

# Install necessary dependencies
RUN apt-get update && \
apt-get install -y \
python3 \
python3-pip \
python3-venv \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

WORKDIR /usr/tab-sum-openai/

ENV VIRTUAL_ENV=venv


# Create a virtual environment and install dependencies
RUN python3 -m venv venv
ENV PATH="$VIRTUAL_ENV/bin:$PATH"


# Copy the source code and requirements.txt into the container
COPY src/ /usr/tab-sum-openai/src/
COPY requirements.txt /usr/tab-sum-openai/
COPY config.json /usr/tab-sum-openai/


RUN pip install --no-cache-dir -r requirements.txt


ENTRYPOINT ["/usr/tab-sum-openai/venv/bin/python3", "/usr/tab-sum-openai/src/main.py"]
63 changes: 63 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Alt Text Generator OpenAI

A Docker image that automatically generates and applies alternative image descriptions to PDF files using PDFix SDK and OpenAI.

## Table of Contents

- [Alt Text Generator OpenAI](#alt-text-generator-openai)
- [Table of Contents](#table-of-contents)
- [Getting Started](#getting-started)
- [Run using Command Line Interface](#run-using-command-line-interface)
- [Run Description Generation using REST API](#run-description-generation-using-rest-api)
- [Exporting Configuration for Integration](#exporting-configuration-for-integration)
- [License \& libraries used](#license--libraries-used)
- [Help \& Support](#help--support)


## Getting Started

To use this Docker application, you'll need to have Docker installed on your system. If Docker is not installed, please follow the instructions on the [official Docker website](https://docs.docker.com/get-docker/) to install it.


## Run using Command Line Interface

To run the docker container as CLI, you should share the folder containing the PDF for processing using the `-i` parameter. In this example, the current folder is used.

```bash
docker run -v $(pwd):/data/ -w /data pdfix/alt-text-openai:latest detect -i document.pdf -o out.pdf --tags "Figure|Formula" --openai <api_key> --lang English --overwrite true
```

With an account-based PDFix License add these arguments.
```bash
--name ${LICENSE_NAME} --key ${LICENSE_KEY}
```
Contact support for more infomation.

First run will pull the docker image, which may take some time. Make your own image for more advanced use.

For more detailed information about the available command-line arguments, you can run the following command:

```bash
docker run --rm pdfix/alt-text-openai:latest --help
```

## Run Description Generation using REST API
Comming soon. Please contact us.

### Exporting Configuration for Integration
To export the configuration JSON file, use the following command:
```bash
docker run -v $(pwd):/data -w /data --rm pdfix/alt-text-openai:latest config -o config.json
```

## License & libraries used
- PDFix SDK - https://pdfix.net/terms
- OpenAI API - https://openai.com/policies/

Trial version of the PDFix SDK may apply a watermark on the page and redact random parts of the PDF including the scanned image in background. Contact us to get an evaluation or production license.

## Help & Support
To obtain a PDFix SDK license or report an issue please contact us at support@pdfix.net.
For more information visit https://pdfix.net


135 changes: 135 additions & 0 deletions config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
{
"actions": [
{
"name": "Table Summary Generator OpenAI",
"desc": "A Docker image that automatically generates and applies table summary to PDF files using PDFix SDK and OpenAI",
"version": {
"major": 1,
"minor": 0
},
"icon": "add_table_summary",
"category": "Tags",
"configurations": [
{
"program": "docker run --platform linux/amd64 -v \"${working_directory}:/data\" -w /data --rm pdfix/table-summary-openai:latest --name \"${license_name}\" --key \"${license_key}\" detect -i \"${input_pdf}\" -o \"${output_pdf}\" --tags \"${tag_name}\" --openai \"${openai_key}\" --lang \"${lang}\" --overwrite \"${overwrite}\"",
"name": "Generate Table Summary for all specified Tags",
"desc": "Generate Table Summary for all specified Tags"
}
],
"args": [
{
"name": "input_pdf",
"desc": "Input PDF file",
"flags": 2,
"type": "file_path",
"ext": "pdf",
"value": ""
},
{
"name": "output_pdf",
"desc": "Output PDF file",
"flags": 4,
"type": "file_path",
"ext": "pdf",
"value": ""
},
{
"title": "OpenAI API Key",
"name": "openai_key",
"desc": "OpenAI API Key",
"type": "string",
"value": ""
},
{
"title": "Tag Name",
"name": "tag_name",
"desc": "Tag name defined by a regular expression",
"type": "string",
"value": "Table"
},
{
"title": "Language",
"name": "lang",
"desc": "Alternate description language",
"type": "string",
"value": "English",
"set": [
{
"value": "English"
},
{
"value": "Deutsch"
},
{
"value": "Español"
},
{
"value": "Français"
},
{
"value": "Italiano"
},
{
"value": "Português"
},
{
"value": "Nederlands"
},
{
"value": "Polski"
},
{
"value": "Русский"
},
{
"value": "中文"
},
{
"value": "日本語"
},
{
"value": "한국어"
},
{
"value": "Türkçe"
},
{
"value": "Svenska"
},
{
"value": "Norsk"
},
{
"value": "Suomi"
},
{
"value": "Čeština"
},
{
"value": "Magyar"
},
{
"value": "Slovenčina"
},
{
"value": "Українська"
},
{
"value": "Ελληνικά"
},
{
"value": "Română"
}
]
},
{
"title": "Overwrite",
"desc": "Replace the alternate text if it exists",
"name": "overwrite",
"type": "bool",
"value": false
}
]
}
]
}
Binary file added example/changement_climatique.pdf
Binary file not shown.
Binary file added example/climate_change.pdf
Binary file not shown.
Binary file added example/letak_jasna.pdf
Binary file not shown.
Binary file added example/misc_tagged.pdf
Binary file not shown.
Binary file added example/out_tagged.pdf
Binary file not shown.
Loading

0 comments on commit 6845a6a

Please sign in to comment.