-
Notifications
You must be signed in to change notification settings - Fork 153
/
dataset.go
212 lines (171 loc) · 4.7 KB
/
dataset.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
package execute
import (
uuid "github.com/gofrs/uuid"
"github.com/influxdata/flux"
"github.com/influxdata/flux/codes"
"github.com/influxdata/flux/internal/errors"
"github.com/influxdata/flux/plan"
)
// Dataset represents the set of data produced by a transformation.
type Dataset interface {
Node
RetractTable(key flux.GroupKey) error
UpdateProcessingTime(t Time) error
UpdateWatermark(mark Time) error
Finish(error)
SetTriggerSpec(t plan.TriggerSpec)
}
// DataCache holds all working data for a transformation.
type DataCache interface {
Table(flux.GroupKey) (flux.Table, error)
ForEach(func(flux.GroupKey))
ForEachWithContext(func(flux.GroupKey, Trigger, TableContext))
DiscardTable(flux.GroupKey)
ExpireTable(flux.GroupKey)
SetTriggerSpec(t plan.TriggerSpec)
}
type AccumulationMode int
const (
// DiscardingMode will discard the data associated with a group key
// after it has been processed.
DiscardingMode AccumulationMode = iota
// AccumulatingMode will retain the data associated with a group key
// after it has been processed. If it has already sent a table with
// that group key to a downstream transformation, it will signal
// to that transformation that the previous table should be retracted.
//
// This is not implemented at the moment.
AccumulatingMode
)
type DatasetID uuid.UUID
func (id DatasetID) String() string {
return uuid.UUID(id).String()
}
var ZeroDatasetID DatasetID
func (id DatasetID) IsZero() bool {
return id == ZeroDatasetID
}
func DatasetIDFromNodeID(id plan.NodeID) DatasetID {
return DatasetID(uuid.NewV5(uuid.UUID{}, string(id)))
}
type dataset struct {
id DatasetID
ts TransformationSet
accMode AccumulationMode
watermark Time
processingTime Time
cache DataCache
}
func NewDataset(id DatasetID, accMode AccumulationMode, cache DataCache) *dataset {
return &dataset{
id: id,
accMode: accMode,
cache: cache,
}
}
func (d *dataset) AddTransformation(t Transformation) {
d.ts = append(d.ts, t)
}
func (d *dataset) SetTriggerSpec(spec plan.TriggerSpec) {
d.cache.SetTriggerSpec(spec)
}
func (d *dataset) UpdateWatermark(mark Time) error {
d.watermark = mark
if err := d.evalTriggers(); err != nil {
return err
}
return d.ts.UpdateWatermark(d.id, mark)
}
func (d *dataset) UpdateProcessingTime(time Time) error {
d.processingTime = time
if err := d.evalTriggers(); err != nil {
return err
}
return d.ts.UpdateProcessingTime(d.id, time)
}
func (d *dataset) evalTriggers() (err error) {
d.cache.ForEachWithContext(func(key flux.GroupKey, trigger Trigger, bc TableContext) {
if err != nil {
// Skip the rest once we have encountered an error
return
}
c := TriggerContext{
Table: bc,
Watermark: d.watermark,
CurrentProcessingTime: d.processingTime,
}
if trigger.Triggered(c) {
err = d.triggerTable(key)
}
if trigger.Finished() {
d.expireTable(key)
}
})
return err
}
func (d *dataset) triggerTable(key flux.GroupKey) error {
b, err := d.cache.Table(key)
if err != nil {
return err
}
switch d.accMode {
case DiscardingMode:
if err := d.ts.Process(d.id, b); err != nil {
return err
}
d.cache.DiscardTable(key)
case AccumulatingMode:
return errors.New(codes.Unimplemented)
}
return nil
}
func (d *dataset) expireTable(key flux.GroupKey) {
d.cache.ExpireTable(key)
}
func (d *dataset) RetractTable(key flux.GroupKey) error {
d.cache.DiscardTable(key)
return d.ts.RetractTable(d.id, key)
}
func (d *dataset) Finish(err error) {
if err == nil {
// Only trigger tables we if we not finishing because of an error.
d.cache.ForEach(func(bk flux.GroupKey) {
if err != nil {
return
}
err = d.triggerTable(bk)
d.cache.ExpireTable(bk)
})
}
d.ts.Finish(d.id, err)
}
// PassthroughDataset is a Dataset that will passthrough
// the processed data to the next Transformation.
type PassthroughDataset struct {
id DatasetID
ts TransformationSet
}
// NewPassthroughDataset constructs a new PassthroughDataset.
func NewPassthroughDataset(id DatasetID) *PassthroughDataset {
return &PassthroughDataset{id: id}
}
func (d *PassthroughDataset) AddTransformation(t Transformation) {
d.ts = append(d.ts, t)
}
func (d *PassthroughDataset) Process(tbl flux.Table) error {
return d.ts.Process(d.id, tbl)
}
func (d *PassthroughDataset) RetractTable(key flux.GroupKey) error {
return d.ts.RetractTable(d.id, key)
}
func (d *PassthroughDataset) UpdateProcessingTime(t Time) error {
return d.ts.UpdateProcessingTime(d.id, t)
}
func (d *PassthroughDataset) UpdateWatermark(mark Time) error {
return d.ts.UpdateWatermark(d.id, mark)
}
func (d *PassthroughDataset) Finish(err error) {
d.ts.Finish(d.id, err)
}
func (d *PassthroughDataset) SetTriggerSpec(t plan.TriggerSpec) {
}