Skip to content

Commit

Permalink
ui updates
Browse files Browse the repository at this point in the history
cleanup
  • Loading branch information
piizei committed Jun 21, 2024
1 parent e5c02c1 commit af832fb
Show file tree
Hide file tree
Showing 10 changed files with 247 additions and 154 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.env
44 changes: 44 additions & 0 deletions .github/workflows/docker-image-ui.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: Publish Docker image

on:
release:
types: [published]
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}-ui

jobs:
build-and-push-image:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write

steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Build and push Docker image
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
with:
context: .
file: ./DockerfileUi
push: true
platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@ To run it locally, install
```bash
poetry install --with ui
```
an run `./ui.sh` in the root folder. (env is picked from .env file in the root folder)

then run `./ui.sh` in the root folder. (env is picked from .env file in the root folder)

## Develop

Expand Down
2 changes: 1 addition & 1 deletion ai_ocr/azure/doc_intelligence.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from ai_ocr.azure.config import get_config

config = get_config()
kwargs = {"api_version": "2023-10-31-preview"}
kwargs = {"api_version": "2024-02-29-preview"}
client = document_analysis_client = DocumentIntelligenceClient(endpoint=config["doc_intelligence_endpoint"],
credential=AzureKeyCredential(config["doc_intelligence_key"]),
**kwargs)
Expand Down
4 changes: 2 additions & 2 deletions ai_ocr/chains.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ def get_structured_data(pages: str, prompt: str, json_schema: str, images=[]) ->
Here is the output schema:
```""" + json_schema + "```"

prompt = ChatPromptTemplate.from_messages(messages)
if len(images) > 0:
prompt.append(HumanMessage("There are also images available that you can use to verify the ocr information."))
prompt.append(HumanMessage("Use these images to verify the ocr information."))
for img in images:
print("adding image")
prompt.append(
HumanMessage(content=[{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img}"}}]))
model = get_llm()
Expand Down
27 changes: 25 additions & 2 deletions ai_ocr/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ class ExtractionOutput(BaseModel):
async def extract(
pdf: Annotated[UploadFile, File(...)],
prompt: Annotated[UploadFile, File(...)],
json_schema: Annotated[UploadFile, File(...)],
json_schema: Annotated[UploadFile, File(...)]

):
pdf = await pdf.read()
prompt = await prompt.read()
Expand All @@ -41,12 +42,34 @@ async def extract(
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
tmp.write(pdf)
tmp_file_name = tmp.name
response = process_pdf(file_to_ocr=tmp_file_name,
try:
response = process_pdf(file_to_ocr=tmp_file_name,
prompt=prompt.decode(),
json_schema=json_schema.decode())
except Exception as e:
response = {"error": str(e)}
os.remove(tmp_file_name)
return Response(content=json.dumps(response), media_type='application/json')

@app.post("/extract/{type}")
async def extract_type(
pdf: Annotated[UploadFile, File(...)],
prompt: Annotated[UploadFile, File(...)],
json_schema: Annotated[UploadFile, File(...)],
type: str
):
file = await pdf.read()
prompt = await prompt.read()
json_schema = await json_schema.read()
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{type}") as tmp:
tmp.write(pdf)
tmp_file_name = tmp.name
response = process_pdf(file_to_ocr=tmp_file_name,
prompt=prompt.decode(),
json_schema=json_schema.decode(),
type=type)
os.remove(tmp_file_name)
return Response(content=json.dumps(response), media_type='application/json')

if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)
32 changes: 19 additions & 13 deletions ai_ocr/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,28 @@
from ai_ocr.model import Config


def process_pdf(file_to_ocr: str, prompt: str, json_schema: str, config: Config = Config()) -> any:
def process_pdf(file_to_ocr: str, prompt: str, json_schema: str, config: Config = Config(), type: str = "pdf") -> any:
ocr_result = get_ocr_results(file_to_ocr)
pdf_path, _ = os.path.split(file_to_ocr)
if config.vision_capable:
imgs = []
reduced_imgs = []
if config.vision_capable and type == "pdf":
pdf_to_pages(file_to_ocr)
imgs = glob.glob(os.path.join(pdf_path, "page*.png"))
imgs = glob.glob(os.path.join(pdf_path, "page*.png"))
if type == "png" or type == "jpg" or type == "jpeg":
imgs = [file_to_ocr]
# limit imgs by default
imgs = imgs[:config.max_images]
imgs = list(map(lambda x: load_image(x), imgs))
# Check if images total size over 20MB
total_size = get_size_of_base64_images(imgs)
# Reduce image sizes if total size is over 20MB
max_size = config.gpt_vision_limit_mb * 1024 * 1024 # 20MB
reduced_imgs = imgs
while get_size_of_base64_images(reduced_imgs) > max_size:
# Reduce the size of the images by removing the last one
reduced_imgs = reduced_imgs[:-1]
if imgs:
imgs = imgs[:config.max_images]
imgs = list(map(lambda x: load_image(x), imgs))
# Check if images total size over 20MB
total_size = get_size_of_base64_images(imgs)
# Reduce image sizes if total size is over 20MB
max_size = config.gpt_vision_limit_mb * 1024 * 1024 # 20MB
reduced_imgs = imgs
while get_size_of_base64_images(reduced_imgs) > max_size:
# Reduce the size of the images by removing the last one
reduced_imgs = reduced_imgs[:-1]
structured = get_structured_data(ocr_result.content, prompt, json_schema, reduced_imgs)
print(structured.content)
return parse_json_markdown(structured.content)
Loading

0 comments on commit af832fb

Please sign in to comment.