-
Notifications
You must be signed in to change notification settings - Fork 0
/
tree.lua
241 lines (211 loc) · 7.4 KB
/
tree.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
local tree = {}
-- http://dmg.org/pmml/v4-1/TreeModel.html
-- P Q AND OR XOR
-- True True True True False
-- True False False True True
-- True Unknown Unknown True Unknown
-- False True False True True
-- False False False False False
-- False Unknown False Unknown Unknown
-- Unknown True Unknown True Unknown
-- Unknown False False Unknown Unknown
-- Unknown Unknown Unknown Unknown Unknown
-- And performs a logical AND operation on the set of nullable boolean values
-- true true true
-- true false false
-- true nil nil
-- false true false
-- false false false
-- false nil false
-- nil true nil
-- nil false false
-- nil nil nil
function tree.And(arr)
local result = true
for i=1, arr.n do
local v = arr[i]
if Unknown(v) and result == true then
result = nil
elseif Unknown(result) and v == false then
result = false
else
result = result and v
end
end
return result
end
-- Or performs a logical OR operation on a set of nullable boolean values
-- true true true
-- true false true
-- true nil true
-- false true true
-- false false false
-- false nil nil
-- nil true true
-- nil false nil
-- nil nil nil
function tree.Or(arr)
local result = false
for i=1, arr.n do
local v = arr[i]
if Unknown(v) and result == false then
result = nil
elseif Unknown(result) and v == false then
result = nil
else
result = result or v
end
end
return result
end
-- Xor performs a logical XOR operation on a set of nullable boolean values
-- true true false
-- true false true
-- true nil nil
-- false true true
-- false false false
-- false nil nil
-- nil true nil
-- nil false nil
-- nil nil nil
function tree.Xor(arr)
local result = false
for i=1, arr.n do
local v = arr[i]
if Unknown(v) then
return nil
end
result = nand(nand(result, nand(result, v)), nand(v, nand(result, v)))
end
return result
end
-- The operator surrogate provides a special means to handle logical expressions with missing
-- values. It is applied to a sequence of predicates. The order of the predicates matters, the
-- first predicate is the primary, the next predicates are the surrogates. Evaluation order is
-- left-to-right. The cascaded predicates are applied when the primary predicate evaluates to
-- UNKNOWN.
function tree.Surrogate(arr)
local result = nil
for i=1, arr.n do
result = arr[i]
if not Unknown(result) then
return result
end
end
return result
end
-- Checks if the value is present in the set
function tree.IsIn(target, array)
for i, v in ipairs(array) do
if v == target then
return true
end
end
return false
end
-- Checks if the value is missing in the set
function tree.IsNotIn(target, array)
return not tree.IsIn(target, array)
end
-- NewNode creates a node structure
function tree.NewNode(id, run, ...)
n = {}
n.id = id
n.run = run
n.children = {}
-- Create a table with all of the children
for k, v in pairs({ ... }) do
n.children[v.id] = v
end
-- The evaluation function
n.eval = function(t, v)
local done = n.run(t, n, v); if done then
return
end
-- Evaluate siblings
for k, n in pairs(nodes) do
local done = n.eval(t, v); if done then
return
end
end
end
return n
end
-- NewTree creates a new decision tree
function tree.NewTree(strategy, node)
t = {}
t.onMiss = strategy
t.node = node
t.conf = {}
t.last = nil
-- Function which traverses the tree
t.eval = function(v)
t.node.eval(t, v)
return t.last
end
-- Function that records an unknown value
t.miss = function(score, count, confidence)
t.conf[score] = t.conf[score] or {}
table.insert(t.conf[score], {count, confidence})
end
return t
end
-- If a Node's predicate evaluates to UNKNOWN while traversing the tree, evaluation is stopped
-- and the current winner is returned as the final prediction.
function tree.LastPrediction(t, n, v)
return true -- stop
end
-- If a Node's predicate value evaluates to UNKNOWN while traversing the tree, abort the scoring
-- process and give no prediction.
function tree.NullPrediction(t, n, v)
t.last = nil
return true -- stop
end
-- Comparisons with missing values other than checks for missing values always evaluate to FALSE.
-- If no rule fires, then use the noTrueChildStrategy to decide on a result. This option requires
-- that missing values be handled after all rules at the Node have been evaluated.
-- Note: In contrast to lastPrediction, evaluation is carried on instead of stopping immediately
-- upon first discovery of a Node who's predicate value cannot be determined due to missing
-- values.
function tree.None(t, n, v)
return false -- continue
end
-- If a Node's predicate value evaluates to UNKNOWN while traversing the tree, evaluate the
-- attribute defaultChild which gives the child to continue traversing with. Requires the
-- presence of the attribute defaultChild in every non-leaf Node.
function tree.DefaultChild(t, n, v)
--return t.next(v, {n.children[n.def]})
return true
end
-- If a Node's predicate value evaluates to UNKNOWN while traversing the tree, the confidences for
-- each class is calculated from scoring it and each of its sibling Nodes in turn (excluding any
-- siblings whose predicates evaluate to FALSE). The confidences returned for each class from each
-- sibling Node that was scored are weighted by the proportion of the number of records in that Node,
-- then summed to produce a total confidence for each class. The winner is the class with the highest
-- confidence. Note that weightedConfidence should be applied recursively to deal with situations
-- where several predicates within the tree evaluate to UNKNOWN during the scoring of a case.
function tree.WeightedConfidence(t, n, v)
return true
end
-- If a Node's predicate value evaluates to UNKNOWN while traversing the tree, we consider evaluation
-- of the Node's predicate being TRUE and follow this Node. In addition, subsequent Nodes to the
-- initial Node are evaluated as well. This procedure is applied recursively for each Node being
-- evaluated until a leaf Node is reached. All leaf Nodes being reached by this procedure are
-- aggregated such that for each value attribute of such a leaf Node's ScoreDistribution element the
-- corresponding recordCount attribute values are accumulated. The value associated with the highest
-- recordCount accumulated through this procedure is predicted. The basic idea of aggregateNodes is
-- to aggregate all leaf Nodes which may be reached by a record with one or more missing values
-- considering all possible values. Strategy aggregateNodes calculates a virtual Node and predicts a
-- score according to this virtual Node. Requires the presence of attribute recordCount in all
-- ScoreDistribution elements.
function tree.AggregateNodes(t, n, v)
end
-- Checks if the value is missing
function Unknown(v)
return v == nil or v == ''
end
-- Inverted AND operator
function nand(a, b)
return not (a and b)
end
return tree