This repository has been archived by the owner on Sep 16, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpipelines.go
228 lines (187 loc) · 5.57 KB
/
pipelines.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
package pipelines
import (
"context"
"fmt"
"log"
"sync"
)
// Processor defines a process' input (Source) channel, its process function, and an exit handler.
type Processor interface {
Source() chan interface{}
Process(interface{}) (interface{}, error)
Exit()
}
// Pipeline contains a mapping of Processors to its graph metadata and a grouping to gracefully shutdown all of its Processors.
type Pipeline struct {
processes map[Processor]*ProcessNode
groups []*processGroup
}
// New initialize an empty pipeline. A pipeline should always be initialized with this function.
func New() *Pipeline {
processes := map[Processor]*ProcessNode{}
return &Pipeline{
processes: processes,
}
}
// Process returns a *ProcessNode for a given Processor. If the Processor doesn't exist in the Pipeline, a new node will be created for it and returned.
func (p *Pipeline) Process(source Processor) *ProcessNode {
node, ok := p.processes[source]
// if it exists, return the pipeline process
if ok {
return node
}
// else create it
node = &ProcessNode{
proc: source,
pipeline: p,
}
p.processes[source] = node
return node
}
// Processes returns ProcessNodes for all inputted Processors. If any of the Processor don't exist in the Pipeline, a new node will be created for it and returned.
func (p *Pipeline) Processes(sources ...Processor) ProcessNodes {
nodes := ProcessNodes{}
for _, s := range sources {
nodes = append(nodes, p.Process(s))
}
return nodes
}
// Graph returns a mapping from Processor to its ProcessNode metadata.
// This graph representation of the pipeline is used to:
// 1. Maintain the relationships between Processor and the other Processors that care about their output (consumers).
// 2. Gracefully shutdown of every Processor.
func (p *Pipeline) Graph() (map[Processor]*ProcessNode, error) {
for proc := range p.processes {
visited := map[Processor]bool{}
err := p.addGraphNode(proc, 0, visited)
if err != nil {
return p.processes, err
}
}
return p.processes, nil
}
// Run every process in order of its process group. Processes groups
// are assigned to each processes according to its depth in the graph.
// Each process is run in its own go routine.
// An error is returned if there is a cycle detected in the pipeline graph (DAGs).
func (p *Pipeline) Run() error {
graph, err := p.Graph()
if err != nil {
return err
}
if len(graph) == 0 {
return fmt.Errorf("no processes to run")
}
// find the max depth of the procedure graph
maxDepth := 0
for _, node := range graph {
if node.depth > maxDepth {
maxDepth = node.depth
}
}
// create run order and validate there are no unknown processes
// procedure depth
nodeGroups := make([]ProcessNodes, maxDepth+1)
for _, node := range graph {
nodeGroups[node.depth] = append(nodeGroups[node.depth], node)
}
for _, nodes := range nodeGroups {
// create procedure group
pg := newProcessGroup(context.Background())
// save the pg
p.groups = append(p.groups, pg)
pg.WG.Add(len(nodes))
for _, node := range nodes {
go runProc(pg.Ctx, pg.WG, node.proc, node.consumers)
}
}
return nil
}
// Shutdown gracefully shutdowns a pipeline in order of proccess groups.
// Root processes will be shutdown first, then their consumers in a BFS order.
func (p *Pipeline) Shutdown() {
if len(p.groups) == 0 {
log.Println("Pipeline is not running")
return
}
// cancel and wait on each group in order
for i, pg := range p.groups {
log.Printf("Shutting down depth %d processes", i)
pg.Cancel()
// Should there be a timeout?
pg.WG.Wait()
log.Printf("All depth %d processes shutdown", i)
}
log.Println("Shutdown pipeline")
}
// addGraphNode recursively mutates the graph (map[Processor]int) mapping a Processor to its maximum depth
// in a Processes tree (DAG).
func (p *Pipeline) addGraphNode(proc Processor, depth int, visited map[Processor]bool) error {
_, isCycle := visited[proc]
if isCycle {
return fmt.Errorf("Cycle found for %v: %v %v", proc, p.processes, visited)
}
// mark as visited
visited[proc] = true
node := p.Process(proc)
// if we are at a greater depth than before
if depth > node.depth {
node.depth = depth
}
for _, c := range node.consumers {
// copy the visited map for each consumer
visitCopy := map[Processor]bool{}
for k, v := range visited {
visitCopy[k] = v
}
err := p.addGraphNode(c, depth+1, visitCopy)
if err != nil {
return err
}
}
return nil
}
// runProc runs a Processor Until it receives a cancellation signal from its context.
// While running, a Processor processes all events that comes through its Source and sends the processed output to its consumers.
// On cancellation, the Processor's Exit handler is called before closing its Source.
func runProc(ctx context.Context, wg *sync.WaitGroup, p Processor, consumers []Processor) {
defer wg.Done()
// initialize before the for select
source := p.Source()
consumerChans := make([]chan interface{}, len(consumers))
for i, c := range consumers {
consumerChans[i] = c.Source()
}
for {
select {
case e, ok := <-source:
// set to nil if the channel is closed
if !ok {
source = nil
continue
}
out, err := p.Process(e)
// log and ignore errors
if err != nil {
log.Printf("Error processing %v: %v\n", e, err)
continue
}
// implicitly filter nil events
if out == nil {
continue
}
for _, c := range consumerChans {
c <- out
}
case <-ctx.Done():
log.Println("Received cancellation signal")
p.Exit()
log.Println("Exited")
if source != nil {
close(source)
}
log.Println("Closed source channel")
return
}
}
}