-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.py
131 lines (102 loc) · 3.61 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""
Title: UFW Log Parser
Author: Darius Strasel @dariusstrasel
Description: takes an input logfile and returns all the lines as a Python dictionary.
"""
import time
import re
import json
def _cleanse_line(line):
"""Massages and removes unparseable elements from a log line."""
new_line = line
# Remove blank spaces from within bracket, to preserve structure.
new_line = new_line.replace("[ ", "[")
# Remove brackets
new_line = new_line.replace("[", "")
new_line = new_line.replace("]", "")
# Remove newlines
new_line = new_line.replace("\n", "")
# Remove blank space at end of line
new_line = new_line.strip()
return new_line
def _tokenize_line(line):
"""Splits a line into a normalized data structure."""
new_line = _cleanse_line(line) # Removes impurities.
split_line = new_line.split(' ')
# Keys which identify the index of static flags.
MONTH = 0
DAY = 1
TIME = 2
HOSTNAME = 3
BLOCK_TYPE = 4
UPTIME = 5
TYPE = (6, 7)
REMAINDER = split_line[8:len(split_line)]
log_map = {
'month': split_line[MONTH],
'day': split_line[DAY],
'time': split_line[TIME],
'hostname': split_line[HOSTNAME],
'block_type': split_line[BLOCK_TYPE][:-1], # Remove trailing ':'
'uptime': split_line[UPTIME],
'type': split_line[TYPE[0]] + split_line[TYPE[1]]
}
# Convert the remaining key-value pairs and map them to log_map.
remainder = [_convert_to_key_value_pair(key) for key in REMAINDER]
for key in remainder:
_add_dict1_to_dict2(key, log_map)
print(log_map)
return log_map
def _add_dict1_to_dict2(dict_one, dict_two):
"""Merge one dictionary into a second dictionary."""
if dict_one and dict_two:
for key in dict_one:
dict_two[key] = dict_one[key]
def _convert_to_key_value_pair(string_pair):
"""Takes input string e.g. "key=value" and converts into a Python dictionary."""
new_string = string_pair.split("=")
KEY = 0
VALUE = 1
# Single length pair indicates Null value
if len(new_string) == 1:
result = {new_string[KEY]: None}
else:
result = {new_string[KEY]: new_string[VALUE]}
return result
def _find_illegal_keys(line):
"""Checks for patterns of text which match: SYN URGP=0"""
regex = r"\s(\w+\s\w+=\w+)"
matches = re.findall(regex, line)
return matches
def _dump_to_JSON(dictionary_input):
"""Saves a Python dictionary to a JSON file."""
with open('result.json', 'w') as fp:
return json.dump(dictionary_input, fp)
def _get_dict_keys(parser_results):
"""Returns a list of all the keys found in the argument parser result; helps to define data model of logfile."""
results = parser_results
existing_keys = []
for result in results:
for key in list(result.keys()):
if key not in existing_keys:
existing_keys.append(key)
else:
pass
print(existing_keys)
def process_file(file_name):
"""Main function used to starr parser. Accepts filepath of file to process."""
result = []
try:
with open(file_name, 'r+') as logfile:
print("Opening: %s" % file_name)
execution_start_time = time.time()
for line in logfile:
result.append(_tokenize_line(line))
execution_end_time = time.time()
except IOError:
print("File is locked.")
raise
print("Elapsed: %s Found: %s events" % ((execution_end_time - execution_start_time), len(result)))
return result
if __name__ == "__main__":
process_file("./ufw.log")