-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtest.py
59 lines (56 loc) · 1.72 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/env python
import re, csv
page = False
checksOnPage = False
pages = {}
pageNo = 0
startAddressBlock = False
addressBlock = []
with open("test4 statement file.ps", 'rb') as df:
for line in df.readlines():
if line.find('%%Page:') != -1:
page = True
pageNo = pageNo + 1
elif line.find('%%PageTrailer') != -1:
page = False
try:
pages[account][0] = pages[account][0] + 1
pages[account][1].append(str(pageNo))
except KeyError, e:
# [totalPages, WhichPages, addressBlock, checkPages]
pages[account] = [1, [str(pageNo)], '', 0]
if checksOnPage:
pages[account][3] = pages[account][3] + 1
checksOnPage = False
if not pages[account][2]:
pages[account][2] = ' '.join(addressBlock)
addressBlock = []
matches = re.findall(r'^\(.*\)', line)
for match in matches:
if re.match(r'\(\s*[ADTF]{20,65}\)', match):
startAddressBlock = False
else:
words = match[1:-1].upper().split()
if startAddressBlock:
addressBlock.append(match[1:-1].strip())
try:
if words[0] == 'PRIMARY' and words[1] == 'ACCOUNT':
account = words[2]
elif 'CK' in words and 'NUM:' in words:
checksOnPage = True
elif words[2] == 'PAGE' and words[3] == '1':
startAddressBlock = True
except Exception, e:
pass
# after parsing the postscript file, write our data to a csv file
writer = csv.writer(open("out.csv", 'wb'), dialect="excel")
writer.writerow(["Statement ID", "Page Count", "Page No", "Address Block"])
for name,item in pages.iteritems():
row = []
row.append(name)
row.append(item[0]) # total pages
row.append(':'.join(item[1])) # pages in original
row.append(item[2]) # address block
row.append(item[3]) # check pages
writer.writerow(row)
del writer