-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocess.py
94 lines (73 loc) · 2.66 KB
/
process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# Importing required libraries
import csv
from io import StringIO
# Converts table data returned by Form recognizer into CSV data
def makeTableCSV(tablesData):
data2d = [
["" for x in range(tablesData["columnCount"])]
for y in range(tablesData["rowCount"])
]
headerLast = 0
for cell in tablesData["cells"]:
data2d[cell["rowIndex"]][cell["columnIndex"]] = cell["content"]
if cell.get("kind") == "columnHeader":
headerLast = cell["rowIndex"]
return data2d, headerLast
# Converts the key value data returned by Form recognizer into CSV data
def makeKeyValueCSV(data):
pairs = []
for pair in data:
key = pair["key"]["content"]
if key[-1] == ":":
key = key[:-1]
pairs.append([key, pair["value"]["content"]])
pairs.insert(0, ["Key", "Value"])
return pairs
# Converts the paragraphs data returned by Form recognizer into CSV data
def makeParagraphsCSV(data):
paras = []
for p in data:
paras.append([p["content"]])
paras.insert(0, ["Paragraphs"])
return paras
# Merges all the 2d CSV data tables in the provided list into a single 2d CSV data table
def mergeCSV(datas):
colmax = 0
rowmax = 0
for data in datas:
cols = len(data[0])
rows = len(data)
if cols > colmax:
colmax = cols
rowmax += rows
rowmax += (len(datas) - 1) * 3
data2d = [["" for x in range(colmax)] for y in range(rowmax)]
rowIndex = -1
for data in datas:
if rowIndex != -1:
rowIndex += 3
for row in data:
rowIndex += 1
for i in range(len(row)):
data2d[rowIndex][i] = row[i]
return data2d
# Extracts all the table, key-value, and paragraph data from Form recognizer and writes it to a CSV string
def processDataToCSV(data):
csvData = []
csvData.append(makeKeyValueCSV(data["keyValuePairs"]))
for table in data["tables"]:
tableData, headerLast = makeTableCSV(table)
csvData.append(tableData)
csvData.append(makeParagraphsCSV(data["paragraphs"]))
f = StringIO()
csv.writer(f).writerows(mergeCSV(csvData))
return f.getvalue()
# Extracts all the table, key-value, and paragraph data from Form recognizer and returns it as a dictionary
def processDataToObj(data):
processedData = {"tables": []}
processedData["keyValuePairs"] = makeKeyValueCSV(data["keyValuePairs"])
for table in data["tables"]:
tableData, headerLast = makeTableCSV(table)
processedData["tables"].append([tableData, headerLast])
processedData["paragraphs"] = makeParagraphsCSV(data["paragraphs"])
return processedData