@@ -204,14 +204,18 @@ def __init__(self, key_ser, val_ser):
204204 self .key_ser = key_ser
205205 self .val_ser = val_ser
206206
207- def load_stream (self , stream ):
207+ def prepare_keys_values (self , stream ):
208208 key_stream = self .key_ser ._load_stream_without_unbatching (stream )
209209 val_stream = self .val_ser ._load_stream_without_unbatching (stream )
210210 key_is_batched = isinstance (self .key_ser , BatchedSerializer )
211211 val_is_batched = isinstance (self .val_ser , BatchedSerializer )
212212 for (keys , vals ) in izip (key_stream , val_stream ):
213213 keys = keys if key_is_batched else [keys ]
214214 vals = vals if val_is_batched else [vals ]
215+ yield (keys , vals )
216+
217+ def load_stream (self , stream ):
218+ for (keys , vals ) in self .prepare_keys_values (stream ):
215219 for pair in product (keys , vals ):
216220 yield pair
217221
@@ -224,6 +228,29 @@ def __str__(self):
224228 (str (self .key_ser ), str (self .val_ser ))
225229
226230
231+ class PairDeserializer (CartesianDeserializer ):
232+ """
233+ Deserializes the JavaRDD zip() of two PythonRDDs.
234+ """
235+
236+ def __init__ (self , key_ser , val_ser ):
237+ self .key_ser = key_ser
238+ self .val_ser = val_ser
239+
240+ def load_stream (self , stream ):
241+ for (keys , vals ) in self .prepare_keys_values (stream ):
242+ for pair in izip (keys , vals ):
243+ yield pair
244+
245+ def __eq__ (self , other ):
246+ return isinstance (other , PairDeserializer ) and \
247+ self .key_ser == other .key_ser and self .val_ser == other .val_ser
248+
249+ def __str__ (self ):
250+ return "PairDeserializer<%s, %s>" % \
251+ (str (self .key_ser ), str (self .val_ser ))
252+
253+
227254class NoOpSerializer (FramedSerializer ):
228255
229256 def loads (self , obj ): return obj
0 commit comments