forked from Dubniak/SafeDroid-v2.0
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathvectors.py
221 lines (167 loc) · 6.64 KB
/
vectors.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
"""
Part of the SafeDroid v2.0 FrameWork.
Author : Arygriou Marios
Year : 2017
The framework is distributed under the GNU General Public License v3.0
"""
import pandas as pd
import numpy as np
import os
from sklearn import preprocessing
import cPickle as pickle
class API:
''' Create 3 DataFrames from api.csv : overall , malicious , benign
@arg filepath : path csv file , string
@arg column_names : names for columns , list
'''
def __init__(self, filepath, column_names, app_df_size):
self._matrix= self.readData(filepath)
self.setColumnNames(column_names)
self.setRatio(app_df_size)
self.feature_names = ['api','ratio']
self.target = self.setTarget()
self.target_ratio = self.setTargetRatio()
return
def setTarget(self):
return np.array(self._matrix.api)
def setTargetRatio(self):
return np.array(self._matrix.ratio)
def readData(self, filepath):
return pd.read_csv(filepath, index_col=0, header=None)
def getData(self):
return self._matrix
def calulateRatioApi(self,x):
if (x.malicious == 0):
return 0
return float(x.malicious)/(x.malicious + x.benign)
def calulateRatio(self,x,mal_apps, ben_apps):
try:
return float(x.malicious)/mal_apps - float(x.benign)/ben_apps
except TypeError:
print str(x.malicious) , type(x.malicious)
print str(mal_apps), type(mal_apps)
print str(x.benign), type(x.benign)
print str(ben_apps), type(ben_apps)
exit()
def trasformStringColumn(self, column=None):
label_encoder = preprocessing.LabelEncoder()
self._matrix['api'] = label_encoder.fit_transform(self._matrix['api'])
def setRatio(self, size):
self._matrix.ratio = self._matrix.apply(self.calulateRatio, axis = 1,args=(size[0], size[1]))
#sets column names
def setColumnNames(self,names):
if (len(names)!= 0):
try:
self._matrix.columns = names
except Exception:
print Exception
return
#return dataframe filtered by ratio
def getFiltered(self, threshold = None):
if (threshold != None):
return self._matrix[self._matrix.ratio >= threshold ]
return self._matrix[self._matrix.ratio >= 0.30 ]
#return self._matrix[self._matrix.ratio >= threshold ] if threshold is not None else self._matrix[self._matrix.ratio >= 0.30 ]
def splitSample(self, threshold= None):
#thr = threshold == None ? 0.6 : threshold
thr = threshold if threshold is not None else 0.6
def getSubSet(self, indexes):
'''@ attr indexes : the indexes of the subset
@ ret : the subset
'''
return self._matrix.loc[indexes]
def printSpecific(self, index):
print self._matrix[self._matrix.index == index]
class Applications:
def __init__(self, filepath, column_names):
self._matrix = self.readData(filepath)
self.setColumnNames(column_names)
self.feature_names=['application','isMalicious']
self.target_names=np.array(['benign','malicious'])
self.target = self.setTarget()
# define Dataframes malicious, benign
self.malicious = self._matrix[self._matrix.isMalicious==1]
self.benign = self._matrix[self._matrix.isMalicious==0]
return
def readData(self, filepath):
return pd.read_csv(filepath, index_col=0, header = None)
def setTarget(self):
return np.array(self._matrix.isMalicious)
def setColumnNames(self, column_names):
if len(column_names) != 0:
try:
self._matrix.columns = column_names
except Exception:
print Exception
return
def getData(self):
return self._matrix
def getMalicious(self):
return self.malicious
def getBenign(self):
return self.benign
def getMaliciousSize(self):
return len(self._matrix[self._matrix['isMalicious'] == 1])
def getBenignSize(self):
return len(self._matrix[self._matrix['isMalicious'] == 0])
def getOverallSize(self):
return len(self._matrix)
def getSubSet(self, indexes):
'''@ attr indexes : the indexes of the subset
@ ret : the subset
'''
return self._matrix.loc[indexes]
def printSpecific(self, index):
print self._matrix[self._matrix.index == index]
class Permissions:
def __init__(self, filepath, column_names):
self._matrix = self.readData(filepath)
self.setColumnNames(column_names)
self.setRatio()
return
def readData(self, filepath):
return pd.read_csv(filepath, index_col = 0, header=None)
def setColumnNames(self, column_names):
if len(column_names) != 0:
try:
self._matrix.columns = column_names
except Exception:
print Exception
return
def setRatio(self):
self._matrix.ratio = self._matrix.apply(self.calulateRatio, axis=1)
return
def calulateRatio(self, x):
return 0 if x.malicious == 0 else float(x.malicious)/(x.malicious + x.benign)
class AppToApi:
def __init__(self, filepath, column_names):
self._matrix = self.readData(filepath)
self.setColumnNames(column_names)
self.feature_names = ['application','api']
#self.data = self.setData()
return
def readData(self, filepath):
return pd.read_csv(filepath, header=None)
def setData(self):
for i in range(0,len(self._matrix.index)):
#do stuff
return
def setColumnNames(self, column_names):
if len(column_names) != 0:
try:
self._matrix.columns = column_names
except Exception:
print Exception
return
def getAppId(self, apiId):
''' @attr apiId to search
Find out which Applications use the specified API
@ret pandas.core.series.Series object holding appIds
'''
return self._matrix[self._matrix.apiid == apiId].appid
def getApiId(self,appId):
''' @attr appId to search
Find out which APIs are used by a specific Application
@ret pandas.core.series.Series object holding apiIds
'''
return self._matrix[self._matrix.appid == appId].apiid