1
+ /* eslint-disable @typescript-eslint/no-non-null-assertion */
1
2
// Copyright (c) 2024 Cloudflare, Inc.
2
3
// Licensed under the Apache 2.0 license found in the LICENSE file or at:
3
4
// https://opensource.org/licenses/Apache-2.0
@@ -57,26 +58,39 @@ enum Format {
57
58
JSON_STREAM = 'json_stream' , // jsonl
58
59
}
59
60
60
- export class PipelineTransformImpl extends entrypoints . WorkerEntrypoint {
61
+ type PipelineBatchMetadata = {
62
+ pipelineId : string ;
63
+ pipelineName : string ;
64
+ } ;
65
+
66
+ type PipelineRecord = Record < string , unknown > ;
67
+
68
+ export class PipelineTransformImpl <
69
+ I extends PipelineRecord ,
70
+ O extends PipelineRecord ,
71
+ > extends entrypoints . WorkerEntrypoint {
61
72
#batch?: Batch ;
62
73
#initalized: boolean = false ;
63
74
64
- // stub overridden on the sub class
75
+ // stub overridden on the subclass
65
76
// eslint-disable-next-line @typescript-eslint/require-await
66
- public async transformJson ( _data : object [ ] ) : Promise < object [ ] > {
77
+ public async run (
78
+ _records : I [ ] ,
79
+ _metadata : PipelineBatchMetadata
80
+ ) : Promise < O [ ] > {
67
81
throw new Error ( 'should be implemented by parent' ) ;
68
82
}
69
83
70
- // called by the dispatcher which then calls the subclass methods
84
+ // called by the dispatcher to validate that run is properly implemented by the subclass
71
85
// @ts -expect-error thinks ping is never used
72
86
private _ping ( ) : Promise < void > {
73
87
// making sure the function was overridden by an implementing subclass
74
- if ( this . transformJson !== PipelineTransformImpl . prototype . transformJson ) {
88
+ if ( this . run !== PipelineTransformImpl . prototype . run ) {
75
89
return Promise . resolve ( ) ;
76
90
} else {
77
91
return Promise . reject (
78
92
new Error (
79
- 'the transformJson method must be overridden by the PipelineTransform subclass'
93
+ 'the run method must be overridden by the PipelineTransformationEntrypoint subclass'
80
94
)
81
95
) ;
82
96
}
@@ -85,55 +99,59 @@ export class PipelineTransformImpl extends entrypoints.WorkerEntrypoint {
85
99
// called by the dispatcher which then calls the subclass methods
86
100
// the reason this is typescript private and not javascript private is that this must be
87
101
// able to be called by the dispatcher but should not be called by the class implementer
88
- // @ts -expect-error _transform is called by rpc
89
- private async _transform ( batch : Batch ) : Promise < JsonStream > {
102
+ // @ts -expect-error _run is called by rpc
103
+ private async _run (
104
+ batch : Batch ,
105
+ metadata : PipelineBatchMetadata
106
+ ) : Promise < JsonStream > {
90
107
if ( this . #initalized) {
91
108
throw new Error ( 'pipeline entrypoint has already been initialized' ) ;
92
109
}
93
110
94
111
this . #batch = batch ;
95
112
this . #initalized = true ;
96
113
97
- switch ( this . #batch . format ) {
98
- // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
99
- case Format . JSON_STREAM : {
100
- const data = await this . #readJsonStream ( ) ;
101
- const transformed = await this . transformJson ( data ) ;
102
- return this . #sendJson ( transformed ) ;
103
- }
104
- default :
105
- throw new Error ( 'unsupported batch format' ) ;
114
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
115
+ if ( this . #batch . format === Format . JSON_STREAM ) {
116
+ const records : I [ ] = await this . #readJsonStream ( ) ;
117
+ const transformed = await this . run ( records , metadata ) ;
118
+ return this . #sendJson ( transformed ) ;
119
+ } else {
120
+ throw new Error (
121
+ 'PipelineTransformationEntrypoint run supports only the JSON_STREAM batch format'
122
+ ) ;
106
123
}
107
124
}
108
125
109
- async #readJsonStream( ) : Promise < object [ ] > {
126
+ async #readJsonStream( ) : Promise < I [ ] > {
110
127
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
111
128
if ( this . #batch! . format !== Format . JSON_STREAM ) {
129
+ // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
112
130
throw new Error ( `expected JSON_STREAM not ${ this . #batch! . format } ` ) ;
113
131
}
114
132
115
133
const batch = this . #batch! . data as ReadableStream < Uint8Array > ;
116
134
const decoder = batch . pipeThrough ( new TextDecoderStream ( ) ) ;
117
135
118
- const data : object [ ] = [ ] ;
136
+ const data : I [ ] = [ ] ;
119
137
for await ( const line of readLines ( decoder ) ) {
120
- data . push ( JSON . parse ( line ) as object ) ;
138
+ data . push ( JSON . parse ( line ) as I ) ;
121
139
}
122
140
123
141
return data ;
124
142
}
125
143
126
- #sendJson( data : object [ ] ) : JsonStream {
127
- if ( ! ( data instanceof Array ) ) {
128
- throw new Error ( 'transformJson must return an array of objects ' ) ;
144
+ #sendJson( records : O [ ] ) : JsonStream {
145
+ if ( ! ( records instanceof Array ) ) {
146
+ throw new Error ( 'transformations must return an array of PipelineRecord ' ) ;
129
147
}
130
148
131
149
let written = 0 ;
132
150
const encoder = new TextEncoder ( ) ;
133
151
const readable = new ReadableStream < Uint8Array > ( {
134
152
start ( controller ) : void {
135
- for ( const obj of data ) {
136
- const encoded = encoder . encode ( `${ JSON . stringify ( obj ) } \n` ) ;
153
+ for ( const record of records ) {
154
+ const encoded = encoder . encode ( `${ JSON . stringify ( record ) } \n` ) ;
137
155
written += encoded . length ;
138
156
controller . enqueue ( encoded ) ;
139
157
}
@@ -149,7 +167,7 @@ export class PipelineTransformImpl extends entrypoints.WorkerEntrypoint {
149
167
format : Format . JSON_STREAM ,
150
168
size : {
151
169
bytes : written ,
152
- rows : data . length ,
170
+ rows : records . length ,
153
171
} ,
154
172
data : readable ,
155
173
} ;
0 commit comments