ui updates

cleanup
piizei · Jun 21, 2024 · af832fb · af832fb
1 parent e5c02c1
commit af832fb
Show file tree

Hide file tree

Showing 10 changed files with 247 additions and 154 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1 @@
+.env
diff --git a/.github/workflows/docker-image-ui.yml b/.github/workflows/docker-image-ui.yml
@@ -0,0 +1,44 @@
+name: Publish Docker image
+
+on:
+ release:
+ types: [published]
+env:
+ REGISTRY: ghcr.io
+ IMAGE_NAME: ${{ github.repository }}-ui
+
+jobs:
+ build-and-push-image:
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ packages: write
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v3
+ - name: Log in to the Container registry
+ uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
+ with:
+ registry: ${{ env.REGISTRY }}
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Extract metadata (tags, labels) for Docker
+ id: meta
+ uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
+ with:
+ images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v2
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v2
+ - name: Build and push Docker image
+ uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
+ with:
+ context: .
+ file: ./DockerfileUi
+ push: true
+ platforms: linux/amd64,linux/arm64
+ tags: ${{ steps.meta.outputs.tags }}
+ labels: ${{ steps.meta.outputs.labels }}
diff --git a/README.md b/README.md
@@ -38,8 +38,7 @@ To run it locally, install
 ```bash
 poetry install --with ui
 ```
-an run `./ui.sh` in the root folder. (env is picked from .env file in the root folder) 
-
+then run `./ui.sh` in the root folder. (env is picked from .env file in the root folder)
 
 ## Develop
 

diff --git a/ai_ocr/azure/doc_intelligence.py b/ai_ocr/azure/doc_intelligence.py
@@ -5,7 +5,7 @@
 from ai_ocr.azure.config import get_config
 
 config = get_config()
-kwargs = {"api_version": "2023-10-31-preview"}
+kwargs = {"api_version": "2024-02-29-preview"}
 client = document_analysis_client = DocumentIntelligenceClient(endpoint=config["doc_intelligence_endpoint"],
  credential=AzureKeyCredential(config["doc_intelligence_key"]),
  **kwargs)

diff --git a/ai_ocr/chains.py b/ai_ocr/chains.py
@@ -21,11 +21,11 @@ def get_structured_data(pages: str, prompt: str, json_schema: str, images=[]) ->
 
  Here is the output schema:
  ```""" + json_schema + "```"
-
  prompt = ChatPromptTemplate.from_messages(messages)
  if len(images) > 0:
- prompt.append(HumanMessage("There are also images available that you can use to verify the ocr information."))
+ prompt.append(HumanMessage("Use these images to verify the ocr information."))
  for img in images:
+ print("adding image")
  prompt.append(
  HumanMessage(content=[{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img}"}}]))
  model = get_llm()

diff --git a/ai_ocr/main.py b/ai_ocr/main.py
@@ -32,7 +32,8 @@ class ExtractionOutput(BaseModel):
 async def extract(
  pdf: Annotated[UploadFile, File(...)],
  prompt: Annotated[UploadFile, File(...)],
- json_schema: Annotated[UploadFile, File(...)],
+ json_schema: Annotated[UploadFile, File(...)]
+
 ):
  pdf = await pdf.read()
  prompt = await prompt.read()
@@ -41,12 +42,34 @@ async def extract(
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
  tmp.write(pdf)
  tmp_file_name = tmp.name
- response = process_pdf(file_to_ocr=tmp_file_name,
+ try:
+ response = process_pdf(file_to_ocr=tmp_file_name,
  prompt=prompt.decode(),
  json_schema=json_schema.decode())
+ except Exception as e:
+ response = {"error": str(e)}
  os.remove(tmp_file_name)
  return Response(content=json.dumps(response), media_type='application/json')
 
+@app.post("/extract/{type}")
+async def extract_type(
+ pdf: Annotated[UploadFile, File(...)],
+ prompt: Annotated[UploadFile, File(...)],
+ json_schema: Annotated[UploadFile, File(...)],
+ type: str
+):
+ file = await pdf.read()
+ prompt = await prompt.read()
+ json_schema = await json_schema.read()
+ with tempfile.NamedTemporaryFile(delete=False, suffix=f".{type}") as tmp:
+ tmp.write(pdf)
+ tmp_file_name = tmp.name
+ response = process_pdf(file_to_ocr=tmp_file_name,
+ prompt=prompt.decode(),
+ json_schema=json_schema.decode(),
+ type=type)
+ os.remove(tmp_file_name)
+ return Response(content=json.dumps(response), media_type='application/json')
 
 if __name__ == "__main__":
  uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/ai_ocr/process.py b/ai_ocr/process.py
@@ -11,22 +11,28 @@
 from ai_ocr.model import Config
 
 
-def process_pdf(file_to_ocr: str, prompt: str, json_schema: str, config: Config = Config()) -> any:
+def process_pdf(file_to_ocr: str, prompt: str, json_schema: str, config: Config = Config(), type: str = "pdf") -> any:
  ocr_result = get_ocr_results(file_to_ocr)
  pdf_path, _ = os.path.split(file_to_ocr)
- if config.vision_capable:
+ imgs = []
+ reduced_imgs = []
+ if config.vision_capable and type == "pdf":
  pdf_to_pages(file_to_ocr)
- imgs = glob.glob(os.path.join(pdf_path, "page*.png"))
+ imgs = glob.glob(os.path.join(pdf_path, "page*.png"))
+ if type == "png" or type == "jpg" or type == "jpeg":
+ imgs = [file_to_ocr]
  # limit imgs by default
- imgs = imgs[:config.max_images]
- imgs = list(map(lambda x: load_image(x), imgs))
- # Check if images total size over 20MB
- total_size = get_size_of_base64_images(imgs)
- # Reduce image sizes if total size is over 20MB
- max_size = config.gpt_vision_limit_mb * 1024 * 1024 # 20MB
- reduced_imgs = imgs
- while get_size_of_base64_images(reduced_imgs) > max_size:
- # Reduce the size of the images by removing the last one
- reduced_imgs = reduced_imgs[:-1]
+ if imgs:
+ imgs = imgs[:config.max_images]
+ imgs = list(map(lambda x: load_image(x), imgs))
+ # Check if images total size over 20MB
+ total_size = get_size_of_base64_images(imgs)
+ # Reduce image sizes if total size is over 20MB
+ max_size = config.gpt_vision_limit_mb * 1024 * 1024 # 20MB
+ reduced_imgs = imgs
+ while get_size_of_base64_images(reduced_imgs) > max_size:
+ # Reduce the size of the images by removing the last one
+ reduced_imgs = reduced_imgs[:-1]
  structured = get_structured_data(ocr_result.content, prompt, json_schema, reduced_imgs)
+ print(structured.content)
  return parse_json_markdown(structured.content)