-
Notifications
You must be signed in to change notification settings - Fork 0
/
Hashmapper.py
93 lines (67 loc) · 2.25 KB
/
Hashmapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# -*- coding: utf-8 -*-
"""
Created on Sat Feb 4 00:56:39 2017
@author: allan
"""
import os
import csv
import json
import pickle
test_csv_105 = '/home/allan/Tuberculosis/Test105CSV'
test_csv_10 = '/home/allan/Tuberculosis/Test10CSV'
master_csv_dir = '/home/allan/Tuberculosis/CSVs'
os.chdir('/home/allan/Tuberculosis/Hash')
def pages(directory):
for page in directory:
yield page
csvdir = master_csv_dir
field_names = ['Type', 'Data']
# Hash1 list of all genes by Rv Number
# Saved with pickle
hashlist = []
for i in pages(os.listdir(csvdir)):
hashlist.append(i.strip('.csv'))
hashlist2 = sorted(hashlist)
with open('Hash1', 'w+') as fb:
pickle.dump(hashlist2, fb)
# Hash2 hashmap - Key = "GeneName" :
# Value = (List of all 'Types' without null 'Data' value)
# Saved with json
Hash2 = {}
for i in pages(os.listdir(csvdir)):
csvfile_ = '%s/%s' % (csvdir, i)
with open(str(csvfile_), 'r') as csvfile:
reader = csv.DictReader(csvfile,
fieldnames = field_names
)
templist = []
for row in reader:
if row['Data'] != 'null':
templist.append(row['Type'])
Hash2['{name}'.format(name = i.strip('.csv'))] = templist
with open('Hash2', 'w+') as fp:
json.dump(Hash2, fp, sort_keys = True, indent = 4)
# Hash3 hashmap - Key = "'GeneName'_'Type'" :
# Value = "Data"
# Saved with json
Hash3 = {}
for i in pages(os.listdir(csvdir)):
csvfile_ = '%s/%s' % (csvdir, i)
with open(str(csvfile_), 'r') as csvfile:
reader = csv.DictReader(csvfile,
fieldnames = field_names
)
templist = []
for row in reader:
Hash3['{name}_{key}'.format(name = i.strip('.csv'),
key = row['Type']
)
] = row['Data']
with open('Hash3', 'w+') as fp:
json.dump(Hash3, fp, sort_keys = True, indent = 4)
'''
field_names = ['Type', 'Data']
metB = '/home/allan/Tuberculosis/Test10CSV/metB.csv'
csvfile = open(metB)
reader = csv.DictReader(csvfile, fieldnames = field_names)
'''