You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I just simply tried it in spark-shell 2.2.0 with scala 2.11.8. the shell was started with involving two jars of spark-indexedrdd-0.3.jar and part_2.10-0.1.jar. And I pasted your example in it and got some error messages as following:
scala> // Create an RDD of key-value pairs with Long keys.
scala> val rdd = sc.parallelize((1 to 1000000).map(x => (x.toLong, 0)))
rdd: org.apache.spark.rdd.RDD[(Long, Int)] = ParallelCollectionRDD[0] at parallelize at :28
scala> // Construct an IndexedRDD from the pairs, hash-partitioning and indexing
scala> // the entries.
scala> val indexed = IndexedRDD(rdd).cache()
indexed: edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD[Long,Int] = IndexedRDD[3] at RDD at IndexedRDD.scala:37
scala>
scala> // Perform a point update.
scala> val indexed2 = indexed.put(1234L, 10873).cache()
17/07/16 16:51:26 WARN util.ClosureCleaner: Expected a closure; got edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$MultiputZipper
indexed2: edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD[Long,Int] = IndexedRDD[7] at RDD at IndexedRDD.scala:37
scala> // Perform a point lookup. Note that the original IndexedRDD remains
scala> // unmodified.
scala> indexed2.get(1234L) // => Some(10873)
java.lang.NoSuchMethodError: org.apache.spark.SparkContext.runJob(Lorg/apache/spark/rdd/RDD;Lscala/Function2;Lscala/collection/Seq;ZLscala/reflect/ClassTag;)Ljava/lang/Object;
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.multiget(IndexedRDD.scala:83)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.get(IndexedRDD.scala:76)
... 50 elided
scala> indexed.get(1234L) // => Some(0)
java.lang.NoSuchMethodError: org.apache.spark.SparkContext.runJob(Lorg/apache/spark/rdd/RDD;Lscala/Function2;Lscala/collection/Seq;ZLscala/reflect/ClassTag;)Ljava/lang/Object;
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.multiget(IndexedRDD.scala:83)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.get(IndexedRDD.scala:76)
... 50 elided
scala>
scala> // Efficiently join derived IndexedRDD with original.
scala> val indexed3 = indexed.innerJoin(indexed2) { (id, a, b) => b }.filter(_._2 != 0)
17/07/16 16:51:28 WARN util.ClosureCleaner: Expected a closure; got edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$InnerJoinZipper
indexed3: edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD[Long,Int] = IndexedRDD[11] at RDD at IndexedRDD.scala:37
scala> indexed3.collect // => Array((1234L, 10873))
[Stage 1:> (0 + 0) / 2]17/07/16 16:51:29 WARN scheduler.TaskSetManager: Stage 0 contains a task of very large size (9770 KB). The maximum recommended task size is 100 KB.
[Stage 0:=========> (1 + 1) / 2][Stage 1:> (0 + 2) / 2]17/07/16 16:51:32 WARN storage.BlockManager: Putting block rdd_2_0 failed due to an exception
17/07/16 16:51:32 WARN storage.BlockManager: Putting block rdd_2_1 failed due to an exception
17/07/16 16:51:32 WARN storage.BlockManager: Block rdd_2_0 could not be removed as it was not found on disk or in memory
17/07/16 16:51:32 WARN storage.BlockManager: Block rdd_2_1 could not be removed as it was not found on disk or in memory
17/07/16 16:51:32 ERROR executor.Executor: Exception in task 1.0 in stage 2.0 (TID 5)
java.lang.NoClassDefFoundError: org/apache/spark/Logging
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:760)
at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
at java.net.URLClassLoader.defineClass(URLClassLoader.java:467)
at java.net.URLClassLoader.access$100(URLClassLoader.java:73)
at java.net.URLClassLoader$1.run(URLClassLoader.java:368)
at java.net.URLClassLoader$1.run(URLClassLoader.java:362)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:361)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.compute(IndexedRDD.scala:72)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ClassNotFoundException: org.apache.spark.Logging
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 41 more
17/07/16 16:51:32 ERROR executor.Executor: Exception in task 0.0 in stage 2.0 (TID 4)
java.lang.NoClassDefFoundError: edu/berkeley/cs/amplab/spark/indexedrdd/impl/PARTPartition
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.compute(IndexedRDD.scala:72)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
17/07/16 16:51:32 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 2.0 (TID 4, localhost, executor driver): java.lang.NoClassDefFoundError: edu/berkeley/cs/amplab/spark/indexedrdd/impl/PARTPartition
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.compute(IndexedRDD.scala:72)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
17/07/16 16:51:32 ERROR scheduler.TaskSetManager: Task 0 in stage 2.0 failed 1 times; aborting job
17/07/16 16:51:32 WARN scheduler.TaskSetManager: Lost task 1.0 in stage 2.0 (TID 5, localhost, executor driver): java.lang.NoClassDefFoundError: org/apache/spark/Logging
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:760)
at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
at java.net.URLClassLoader.defineClass(URLClassLoader.java:467)
at java.net.URLClassLoader.access$100(URLClassLoader.java:73)
at java.net.URLClassLoader$1.run(URLClassLoader.java:368)
at java.net.URLClassLoader$1.run(URLClassLoader.java:362)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:361)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.compute(IndexedRDD.scala:72)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ClassNotFoundException: org.apache.spark.Logging
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 41 more
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 2.0 failed 1 times, most recent failure: Lost task 0.0 in stage 2.0 (TID 4, localhost, executor driver): java.lang.NoClassDefFoundError: edu/berkeley/cs/amplab/spark/indexedrdd/impl/PARTPartition
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.compute(IndexedRDD.scala:72)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2022)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2043)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2062)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2087)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:936)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.collect(RDD.scala:935)
... 50 elided
Caused by: java.lang.NoClassDefFoundError: edu/berkeley/cs/amplab/spark/indexedrdd/impl/PARTPartition
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.compute(IndexedRDD.scala:72)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
scala>
scala> // Perform insertions and deletions.
scala> val indexed4 = indexed2.put(-100L, 111).delete(Array(998L, 999L)).cache()
17/07/16 16:51:33 WARN util.ClosureCleaner: Expected a closure; got edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$MultiputZipper
17/07/16 16:51:33 WARN util.ClosureCleaner: Expected a closure; got edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$DeleteZipper
indexed4: edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD[Long,Int] = IndexedRDD[19] at RDD at IndexedRDD.scala:37
scala> indexed2.get(-100L) // => None
java.lang.NoSuchMethodError: org.apache.spark.SparkContext.runJob(Lorg/apache/spark/rdd/RDD;Lscala/Function2;Lscala/collection/Seq;ZLscala/reflect/ClassTag;)Ljava/lang/Object;
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.multiget(IndexedRDD.scala:83)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.get(IndexedRDD.scala:76)
... 50 elided
scala> indexed4.get(-100L) // => Some(111)
java.lang.NoSuchMethodError: org.apache.spark.SparkContext.runJob(Lorg/apache/spark/rdd/RDD;Lscala/Function2;Lscala/collection/Seq;ZLscala/reflect/ClassTag;)Ljava/lang/Object;
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.multiget(IndexedRDD.scala:83)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.get(IndexedRDD.scala:76)
... 50 elided
scala> indexed2.get(999L) // => Some(0)
java.lang.NoSuchMethodError: org.apache.spark.SparkContext.runJob(Lorg/apache/spark/rdd/RDD;Lscala/Function2;Lscala/collection/Seq;ZLscala/reflect/ClassTag;)Ljava/lang/Object;
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.multiget(IndexedRDD.scala:83)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.get(IndexedRDD.scala:76)
... 50 elided
scala> indexed4.get(999L) // => None
java.lang.NoSuchMethodError: org.apache.spark.SparkContext.runJob(Lorg/apache/spark/rdd/RDD;Lscala/Function2;Lscala/collection/Seq;ZLscala/reflect/ClassTag;)Ljava/lang/Object;
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.multiget(IndexedRDD.scala:83)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.get(IndexedRDD.scala:76)
... 50 elided
should I use earlier version or do something else?
thanks
Henry
The text was updated successfully, but these errors were encountered:
Hi,
I just simply tried it in spark-shell 2.2.0 with scala 2.11.8. the shell was started with involving two jars of spark-indexedrdd-0.3.jar and part_2.10-0.1.jar. And I pasted your example in it and got some error messages as following:
====================================================================
scala> import edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD
import edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD
scala> import edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD._
import edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD._
scala>
scala> // Create an RDD of key-value pairs with Long keys.
scala> val rdd = sc.parallelize((1 to 1000000).map(x => (x.toLong, 0)))
rdd: org.apache.spark.rdd.RDD[(Long, Int)] = ParallelCollectionRDD[0] at parallelize at :28
scala> // Construct an IndexedRDD from the pairs, hash-partitioning and indexing
scala> // the entries.
scala> val indexed = IndexedRDD(rdd).cache()
indexed: edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD[Long,Int] = IndexedRDD[3] at RDD at IndexedRDD.scala:37
scala>
scala> // Perform a point update.
scala> val indexed2 = indexed.put(1234L, 10873).cache()
17/07/16 16:51:26 WARN util.ClosureCleaner: Expected a closure; got edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$MultiputZipper
indexed2: edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD[Long,Int] = IndexedRDD[7] at RDD at IndexedRDD.scala:37
scala> // Perform a point lookup. Note that the original IndexedRDD remains
scala> // unmodified.
scala> indexed2.get(1234L) // => Some(10873)
java.lang.NoSuchMethodError: org.apache.spark.SparkContext.runJob(Lorg/apache/spark/rdd/RDD;Lscala/Function2;Lscala/collection/Seq;ZLscala/reflect/ClassTag;)Ljava/lang/Object;
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.multiget(IndexedRDD.scala:83)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.get(IndexedRDD.scala:76)
... 50 elided
scala> indexed.get(1234L) // => Some(0)
java.lang.NoSuchMethodError: org.apache.spark.SparkContext.runJob(Lorg/apache/spark/rdd/RDD;Lscala/Function2;Lscala/collection/Seq;ZLscala/reflect/ClassTag;)Ljava/lang/Object;
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.multiget(IndexedRDD.scala:83)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.get(IndexedRDD.scala:76)
... 50 elided
scala>
scala> // Efficiently join derived IndexedRDD with original.
scala> val indexed3 = indexed.innerJoin(indexed2) { (id, a, b) => b }.filter(_._2 != 0)
17/07/16 16:51:28 WARN util.ClosureCleaner: Expected a closure; got edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$InnerJoinZipper
indexed3: edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD[Long,Int] = IndexedRDD[11] at RDD at IndexedRDD.scala:37
scala> indexed3.collect // => Array((1234L, 10873))
[Stage 1:> (0 + 0) / 2]17/07/16 16:51:29 WARN scheduler.TaskSetManager: Stage 0 contains a task of very large size (9770 KB). The maximum recommended task size is 100 KB.
[Stage 0:=========> (1 + 1) / 2][Stage 1:> (0 + 2) / 2]17/07/16 16:51:32 WARN storage.BlockManager: Putting block rdd_2_0 failed due to an exception
17/07/16 16:51:32 WARN storage.BlockManager: Putting block rdd_2_1 failed due to an exception
17/07/16 16:51:32 WARN storage.BlockManager: Block rdd_2_0 could not be removed as it was not found on disk or in memory
17/07/16 16:51:32 WARN storage.BlockManager: Block rdd_2_1 could not be removed as it was not found on disk or in memory
17/07/16 16:51:32 ERROR executor.Executor: Exception in task 1.0 in stage 2.0 (TID 5)
java.lang.NoClassDefFoundError: org/apache/spark/Logging
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:760)
at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
at java.net.URLClassLoader.defineClass(URLClassLoader.java:467)
at java.net.URLClassLoader.access$100(URLClassLoader.java:73)
at java.net.URLClassLoader$1.run(URLClassLoader.java:368)
at java.net.URLClassLoader$1.run(URLClassLoader.java:362)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:361)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.compute(IndexedRDD.scala:72)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ClassNotFoundException: org.apache.spark.Logging
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 41 more
17/07/16 16:51:32 ERROR executor.Executor: Exception in task 0.0 in stage 2.0 (TID 4)
java.lang.NoClassDefFoundError: edu/berkeley/cs/amplab/spark/indexedrdd/impl/PARTPartition
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.compute(IndexedRDD.scala:72)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
17/07/16 16:51:32 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 2.0 (TID 4, localhost, executor driver): java.lang.NoClassDefFoundError: edu/berkeley/cs/amplab/spark/indexedrdd/impl/PARTPartition
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.compute(IndexedRDD.scala:72)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
17/07/16 16:51:32 ERROR scheduler.TaskSetManager: Task 0 in stage 2.0 failed 1 times; aborting job
17/07/16 16:51:32 WARN scheduler.TaskSetManager: Lost task 1.0 in stage 2.0 (TID 5, localhost, executor driver): java.lang.NoClassDefFoundError: org/apache/spark/Logging
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:760)
at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
at java.net.URLClassLoader.defineClass(URLClassLoader.java:467)
at java.net.URLClassLoader.access$100(URLClassLoader.java:73)
at java.net.URLClassLoader$1.run(URLClassLoader.java:368)
at java.net.URLClassLoader$1.run(URLClassLoader.java:362)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:361)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.compute(IndexedRDD.scala:72)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ClassNotFoundException: org.apache.spark.Logging
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 41 more
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 2.0 failed 1 times, most recent failure: Lost task 0.0 in stage 2.0 (TID 4, localhost, executor driver): java.lang.NoClassDefFoundError: edu/berkeley/cs/amplab/spark/indexedrdd/impl/PARTPartition
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.compute(IndexedRDD.scala:72)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2022)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2043)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2062)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2087)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:936)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.collect(RDD.scala:935)
... 50 elided
Caused by: java.lang.NoClassDefFoundError: edu/berkeley/cs/amplab/spark/indexedrdd/impl/PARTPartition
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$$anonfun$5.apply(IndexedRDD.scala:422)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.compute(IndexedRDD.scala:72)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
scala>
scala> // Perform insertions and deletions.
scala> val indexed4 = indexed2.put(-100L, 111).delete(Array(998L, 999L)).cache()
17/07/16 16:51:33 WARN util.ClosureCleaner: Expected a closure; got edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$MultiputZipper
17/07/16 16:51:33 WARN util.ClosureCleaner: Expected a closure; got edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD$DeleteZipper
indexed4: edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD[Long,Int] = IndexedRDD[19] at RDD at IndexedRDD.scala:37
scala> indexed2.get(-100L) // => None
java.lang.NoSuchMethodError: org.apache.spark.SparkContext.runJob(Lorg/apache/spark/rdd/RDD;Lscala/Function2;Lscala/collection/Seq;ZLscala/reflect/ClassTag;)Ljava/lang/Object;
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.multiget(IndexedRDD.scala:83)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.get(IndexedRDD.scala:76)
... 50 elided
scala> indexed4.get(-100L) // => Some(111)
java.lang.NoSuchMethodError: org.apache.spark.SparkContext.runJob(Lorg/apache/spark/rdd/RDD;Lscala/Function2;Lscala/collection/Seq;ZLscala/reflect/ClassTag;)Ljava/lang/Object;
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.multiget(IndexedRDD.scala:83)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.get(IndexedRDD.scala:76)
... 50 elided
scala> indexed2.get(999L) // => Some(0)
java.lang.NoSuchMethodError: org.apache.spark.SparkContext.runJob(Lorg/apache/spark/rdd/RDD;Lscala/Function2;Lscala/collection/Seq;ZLscala/reflect/ClassTag;)Ljava/lang/Object;
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.multiget(IndexedRDD.scala:83)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.get(IndexedRDD.scala:76)
... 50 elided
scala> indexed4.get(999L) // => None
java.lang.NoSuchMethodError: org.apache.spark.SparkContext.runJob(Lorg/apache/spark/rdd/RDD;Lscala/Function2;Lscala/collection/Seq;ZLscala/reflect/ClassTag;)Ljava/lang/Object;
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.multiget(IndexedRDD.scala:83)
at edu.berkeley.cs.amplab.spark.indexedrdd.IndexedRDD.get(IndexedRDD.scala:76)
... 50 elided
should I use earlier version or do something else?
thanks
Henry
The text was updated successfully, but these errors were encountered: