-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhandler.py
65 lines (49 loc) · 1.52 KB
/
handler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import os
from pdf_to_text import PdfToText
from pdf_images_extractor import PdfImagesExtractor
from aws import s3_client, s3_bucket
import json
def pdf_to_text(event, context):
key = file_key(event)
if key == None:
return missing_key_response()
path = download_file(key)
text = PdfToText(path).convert()
response = {
"statusCode": 200,
"body": json.dumps({"text": text}, ensure_ascii=False),
"headers": {
"Content-Type": "application/json; charset=utf-8"
}
}
return response
def pdf_images(event, context):
key = file_key(event)
if key == None:
return missing_key_response()
path = download_file(key)
s3_folder = event.get('queryStringParameters', {}).get('s3_images_folder')
images = PdfImagesExtractor(
pdf_path=path, s3_images_folder=s3_folder).extract_and_upload_images()
response = {
"statusCode": 200,
"body": json.dumps({"images": images}),
"headers": {
"Content-Type": "application/json"
}
}
return response
def file_key(event):
return event.get('queryStringParameters', {}).get('s3_pdf_key')
def missing_key_response():
return {
"statusCode": 200,
"body": json.dumps({"error": "Missing 'key' query parameter to fetch from S3"}),
"headers": {
"Content-Type": "application/json"
}
}
def download_file(key):
path = '/tmp/file.pdf'
s3_client().download_file(s3_bucket(), key, path)
return path