-
Notifications
You must be signed in to change notification settings - Fork 1
/
ciscat_xml2csv.py
executable file
·237 lines (195 loc) · 7.45 KB
/
ciscat_xml2csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
#!/usr/bin/python3
import re
import argparse
import xml.etree.ElementTree as etree
import html2text
__author__ = "Xavier Garceau-Aranda"
__license__ = "GPL v2"
__version__ = "0.2"
__maintainer__ = "Xavier Garceau-Aranda"
__email__ = "xavier.garceau-aranda@owasp.org"
__status__ = "Testing"
# Parse input arguments
parser = argparse.ArgumentParser()
parser.add_argument('--xml', '-x', required=True, type=str,
help='Input XML file generated by CISCAT')
parser.add_argument('--csv', '-c', required=True, type=str,
help='Output CSV file generated by this script')
args = parser.parse_args()
# This is the "depth" of the branches we want to use. Default is 1 but in some
# cases we might want to go "deeper".
group_title_depth = 1
print('[+] Processing file %s' % args.xml)
tree = etree.parse(args.xml)
root = tree.getroot()
"""
First step is to build a dict with Rule ids and pass/fail results
XML structure:
<Benchmark xmlns="http://checklists.nist.gov/xccdf/1.2"
<TestResult end-time="2015-12-18T10:10:51.776+01:00">
<rule-result idref="xccdf_org.cisecurity.benchmarks_rule_1.1.1_L1_ ...
Set_Enforce_password_history_to_24_or_more_passwords"
"""
result_dict = dict() # contains id-pass/fail/error/notselected results
for child in root: # iterate over root
if 'TestResult' in child.tag: # TestResult contains all the results
for i in child:
if 'rule-result' in i.tag: # each rule-result contains one result
idref = i.get('idref') # Rule id
for j in i:
if 'result' in j.tag: # result
result_dict[idref] = j.text
"""
Second step is to parse all Groups and their content, and cross-reference with
the result dict for pass/fail/error/notselected result.
XML structure:
<Benchmark xmlns="http://checklists.nist.gov/xccdf/1.2"
<Group id="xccdf_org.cisecurity.benchmarks_group_1_Account_Policies">
<Group id="xccdf_org.cisecurity.benchmarks_group_1.1_Password_Policy">
<Rule id="xccdf_org.cisecurity.benchmarks_rule_1.1.1_L1_Set_ ...
Enforce_password_history_to_24_or_more_passwords">
"""
class Entry(object):
"""Entry object, containing all the information for a control
branch the name of the top-level branch for the control
number the node number
control the name of the control
result the pass/fail/error result of the control
description the description of the control
remediation the remediation for the control
"""
def __init__(self,
branch, number, control, result, description, remediation):
self.branch = branch
self.number = number
self.control = control
self.result = result
self.description = description
self.remediation = remediation
def get_csv_string(self):
# not sending description as it isnt perfect
if self.result == 'pass':
remediation_text = ''
else:
remediation_text = self.remediation
return('"%s","%s","%s","%s"\n' % (
self.branch,
self.control,
self.result,
remediation_text))
def description_node_to_text(node):
"""
Takes an XML node containing nested strings and returns a single string.
"""
h = html2text.HTML2Text()
h.ignore_links = True
html = etree.tostring(node)
text = str(h.handle(str(html))
.replace('b\'\\n ', '')
.replace('\\n \\n \'', '')
.replace('\\n \\n ', '')
.replace('\\n ', '')
.replace('\\n \\n', '')
.replace('\\n', '')
.replace('"', '\'')
.replace('\\\'', '\'')
.strip())
# for some reason some text end with an '
if text[-1] == '\'':
text = text[:-2]
return text
def remediation_node_to_text(node):
"""
Takes an XML node containing nested strings and returns a single string.
"""
h = html2text.HTML2Text()
h.ignore_links = True
html = etree.tostring(node)
"""
text = str(h.handle(str(html))
.replace('\\n', ' ')
.replace('b\'', ' ')
.replace('\\\\', '\\'))
"""
text = str(h.handle(str(html))
.replace('b\'\\n ', '')
.replace('\\n \\n \'', '')
.replace('\\n \\n ', '')
.replace('\\n ', '')
.replace('\\n \\n', '')
.replace('\\n', '')
.replace('"', '\'')
.replace('\\\'', '\'')
.replace('\\\\\\', '\\')
.strip())
text = re.sub(r'\s+', ' ', text)
text = text.split("Impact")[0]
if text[0] == ' ':
text = text[1:]
if text[-1] == ' ':
text = text[:-1]
if text[-1] != '.':
text += '.'
text = text.replace('Computer Configuration', '\r\nComputer Configuration')
return text
def recursive_iter_over_group(node, level):
"""
As each group (branch) can contain either rules or sub-groups
(sub-branches), we use a recursive function to iterate over each
group/sub-group.
"""
global entry_list
global group_title
global group_title_depth
for child in node:
if 'title' in child.tag:
if level == group_title_depth:
group_title = child.text
elif 'description' in child.tag:
# group_description = '-'
# group_description = recursive_get_string(child)
pass
elif 'Rule' in child.tag:
rule_id = child.get('id')
for i in child:
if 'title' in i.tag:
# rule_title = i.text.replace('\n ', ' ')
rule_title = re.sub(r'\s+', ' ', i.text)
elif 'description' in i.tag:
if len(i):
rule_description = description_node_to_text(i)
else:
rule_description = i.text
elif 'fixtext' in i.tag:
rule_remediation = '-'
rule_remediation = remediation_node_to_text(i)
rule_number = rule_id.split('_')[3]
rule_result = result_dict[rule_id]
# TODO make sure all values are set
if rule_result != 'notselected' and \
rule_result != 'notchecked' and \
rule_result != 'unknown':
new_entry = Entry(group_title,
rule_number,
rule_title,
rule_result,
rule_description,
rule_remediation)
entry_list.append(new_entry)
elif 'Group' in child.tag:
recursive_iter_over_group(child, level+1)
else: # unhandled case
print('[-] Unhandled tag %s.' % child.tag)
entry_list = []
for child in root: # iterate over root
if 'Group' in child.tag: # each Group is a branch
recursive_iter_over_group(child, 0) # recursive iterate over each group
"""
Third step is to create a csv file with all the info
"""
print('[+] Generation file %s' % args.csv)
f = open(args.csv, 'wb')
f.write(bytes('Category, Title, Result, Remediation\n', 'UTF-8'))
for entry in entry_list:
f.write(bytes(entry.get_csv_string(), 'UTF-8'))
f.close()