Skip to content

Commit 1e1e916

Browse files
author
jsennett
committed
fixed decoding issues
1 parent 5ce6260 commit 1e1e916

File tree

1 file changed

+6
-5
lines changed

1 file changed

+6
-5
lines changed

json_to_csv.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
import csv
66
import io
77

8-
8+
ENCODING = "Latin-1"
9+
910
def convertToCSV(jsonData, keys):
1011
returnData = {}
1112
global counter
@@ -46,7 +47,7 @@ def convertToCSV(jsonData, keys):
4647
return returnData
4748

4849
def writeCSV(a, filename):
49-
write = csv.writer(io.open(filename, 'w', newline='', encoding='Latin-1'))
50+
write = csv.writer(io.open(filename, 'w', newline='', encoding=ENCODING))
5051
groups = ["cited_patents","inventors","application_citations",
5152
"applications", "assignees","citedby_patents","coinventors",
5253
"cpc_subgroups", "cpc_subsections", "cpcs", "IPCs",
@@ -80,7 +81,7 @@ def writeCSV(a, filename):
8081
flag = True
8182
if (flag):
8283
try:
83-
row = [str(s).encode("Latin-1", "replace").decode('cp1252') for s in row]
84+
row = [str(s).encode(ENCODING, "replace").decode(ENCODING, errors='ignore') for s in row]
8485
except:
8586
pass
8687
write.writerow(row)
@@ -89,10 +90,10 @@ def writeCSV(a, filename):
8990
def merge_csv(fd,q,requests):
9091
diri = [d for d in os.listdir(fd) if re.search(q+'_\d+.csv',d)]
9192
csv_out = open(os.path.join(fd, q+'.csv'), 'w')
92-
for line in open(os.path.join(fd,q+'_0.csv')):
93+
for line in open(os.path.join(fd,q+'_0.csv'), 'rb').read().decode(ENCODING, errors='ignore'):
9394
csv_out.write(line)
9495
for i in range(requests):
95-
f = open(os.path.join(fd, q+'_'+str(i)+'.csv'), 'r+', encoding='Latin-1')
96+
f = open(os.path.join(fd, q+'_'+str(i)+'.csv'), 'r+', encoding=ENCODING)
9697
if sys.version_info >= (3,):
9798
next(f)
9899
else:

0 commit comments

Comments
 (0)