I have been running SKAT just fine in hail version 0.2.20-dd6c996e7db5 on databricks, but when I tried a certain set of weights (from EigenPC), it threw an error. The only difference between this run and several others that succeeded was the set of weights used. Thanks for your help, here is the command and error:
skat_table = hl.skat(key_expr=mt.gene,
weight_expr=mt.eigenpc,
y=hl.float64(mt.phe),
x=mt.GT.n_alt_alleles(),
covariates=[1, mt.scores[0],mt.scores[1],mt.scores[3],mt.scores[4],mt.scores[5],mt.scores[6],mt.scores[7],mt.scores[8],mt.scores[9], hl.int32(mt.age),hl.int32(mt.sex)],
accuracy=1e-11,
iterations=1000000000)
skat_table.filter(skat_table.p_value<0.0005).show(100)
FatalError: SparkException: Job aborted due to stage failure: Task 228 in stage 2.0 failed 4 times, most recent failure: Lost task 228.3 in stage 2.0 (TID 2064, 10.25.97.164, executor 5): breeze.linalg.NotConvergedException:
FatalError Traceback (most recent call last)
/databricks/python/lib/python3.6/site-packages/IPython/core/formatters.py in call(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
–> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
/databricks/python/lib/python3.6/site-packages/IPython/lib/pretty.py in pretty(self, obj)
398 if cls is not object
399 and callable(cls.dict.get(‘repr’)):
–> 400 return _repr_pprint(obj, self, cycle)
401
402 return _default_pprint(obj, self, cycle)
/databricks/python/lib/python3.6/site-packages/IPython/lib/pretty.py in repr_pprint(obj, p, cycle)
693 “”“A pprint that just redirects to the normal repr function.”""
694 # Find newlines and replace them with p.break()
–> 695 output = repr(obj)
696 for idx,output_line in enumerate(output.splitlines()):
697 if idx:
/databricks/spark/python/hail/table.py in repr(self)
1244
1245 def repr(self):
-> 1246 return self.str()
1247
1248 def data(self):
/databricks/spark/python/hail/table.py in str(self)
1241
1242 def str(self):
-> 1243 return self._ascii_str()
1244
1245 def repr(self):
/databricks/spark/python/hail/table.py in _ascii_str(self)
1268 return s
1269
-> 1270 rows, has_more, dtype = self.data()
1271 fields = list(dtype)
1272 trunc_fields = [trunc(f) for f in fields]
/databricks/spark/python/hail/table.py in data(self)
1251 row_dtype = t.row.dtype
1252 t = t.select(**{k: Table._hl_format(v, self.truncate) for (k, v) in t.row.items()})
-> 1253 rows, has_more = t._take_n(self.n)
1254 self._data = (rows, has_more, row_dtype)
1255 return self._data
/databricks/spark/python/hail/table.py in _take_n(self, n)
1372 has_more = False
1373 else:
-> 1374 rows = self.take(n + 1)
1375 has_more = len(rows) > n
1376 rows = rows[:n]
in take(self, n, _localize)
/databricks/spark/python/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
583 def wrapper(original_func, *args, **kwargs):
584 args, kwargs = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
–> 585 return original_func(*args, **kwargs)
586
587 return wrapper
/databricks/spark/python/hail/table.py in take(self, n, _localize)
2011 “”"
2012
-> 2013 return self.head(n).collect(_localize)
2014
2015 @typecheck_method(n=int)
in collect(self, _localize)
/databricks/spark/python/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
583 def wrapper(original_func, *args, **kwargs):
584 args, kwargs = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
–> 585 return original_func(*args, **kwargs)
586
587 return wrapper
/databricks/spark/python/hail/table.py in collect(self, _localize)
1825 e = construct_expr(ir, hl.tarray(self.row.dtype))
1826 if _localize:
-> 1827 return Env.backend().execute(e._ir)
1828 else:
1829 return e
/databricks/spark/python/hail/backend/backend.py in execute(self, ir, timed)
106
107 def execute(self, ir, timed=False):
–> 108 result = json.loads(Env.hc()._jhc.backend().executeJSON(self._to_java_ir(ir)))
109 value = ir.typ._from_json(result[‘value’])
110 timings = result[‘timings’]
/databricks/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in call(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
-> 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:
/databricks/spark/python/hail/utils/java.py in deco(*args, **kwargs)
223 raise FatalError(’%s\n\nJava stack trace:\n%s\n’
224 ‘Hail version: %s\n’
–> 225 ‘Error summary: %s’ % (deepest, full, hail.version, deepest)) from None
226 except pyspark.sql.utils.CapturedException as e:
227 raise FatalError(’%s\n\nJava stack trace:\n%s\n’
FatalError: SparkException: Job aborted due to stage failure: Task 228 in stage 2.0 failed 4 times, most recent failure: Lost task 228.3 in stage 2.0 (TID 2064, 10.25.97.164, executor 5): breeze.linalg.NotConvergedException:
at is.hail.stats.eigSymD$.doeigSymD(eigSymD.scala:69)
at is.hail.stats.eigSymD$justEigenvalues$eigSymD_DM_Impl$.apply(eigSymD.scala:32)
at is.hail.stats.eigSymD$justEigenvalues$eigSymD_DM_Impl$.apply(eigSymD.scala:30)
at breeze.generic.UFunc$class.apply(UFunc.scala:48)
at is.hail.stats.eigSymD$justEigenvalues$.apply(eigSymD.scala:29)
at is.hail.methods.Skat$.computePval(Skat.scala:141)
at is.hail.methods.Skat$$anonfun$linearSkat$1$1.apply(Skat.scala:250)
at is.hail.methods.Skat$$anonfun$linearSkat$1$1.apply(Skat.scala:242)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:445)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:445)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:445)
at is.hail.rvd.RVDPartitionInfo$$anonfun$apply$1.apply(RVDPartitionInfo.scala:67)
at is.hail.rvd.RVDPartitionInfo$$anonfun$apply$1.apply(RVDPartitionInfo.scala:38)
at is.hail.utils.package$.using(package.scala:596)
at is.hail.rvd.RVDPartitionInfo$.apply(RVDPartitionInfo.scala:38)
at is.hail.rvd.RVD$$anonfun$29.apply(RVD.scala:1246)
at is.hail.rvd.RVD$$anonfun$29.apply(RVD.scala:1244)
at is.hail.sparkextras.ContextRDD$$anonfun$cmapPartitionsWithIndex$1$$anonfun$apply$32.apply(ContextRDD.scala:448)
at is.hail.sparkextras.ContextRDD$$anonfun$cmapPartitionsWithIndex$1$$anonfun$apply$32.apply(ContextRDD.scala:448)
at is.hail.sparkextras.ContextRDD$$anonfun$run$1$$anonfun$apply$8.apply(ContextRDD.scala:218)
at is.hail.sparkextras.ContextRDD$$anonfun$run$1$$anonfun$apply$8.apply(ContextRDD.scala:218)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441)
at scala.collection.Iterator$class.foreach(Iterator.scala:891)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
at scala.collection.AbstractIterator.to(Iterator.scala:1334)
at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1334)
at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1334)
at org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$13.apply(RDD.scala:961)
at org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$13.apply(RDD.scala:961)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2284)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2284)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.doRunTask(Task.scala:139)
at org.apache.spark.scheduler.Task.run(Task.scala:112)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$13.apply(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1495)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:503)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
Java stack trace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 228 in stage 2.0 failed 4 times, most recent failure: Lost task 228.3 in stage 2.0 (TID 2064, 10.25.97.164, executor 5): breeze.linalg.NotConvergedException:
at is.hail.stats.eigSymD$.doeigSymD(eigSymD.scala:69)
at is.hail.stats.eigSymD$justEigenvalues$eigSymD_DM_Impl$.apply(eigSymD.scala:32)
at is.hail.stats.eigSymD$justEigenvalues$eigSymD_DM_Impl$.apply(eigSymD.scala:30)
at breeze.generic.UFunc$class.apply(UFunc.scala:48)
at is.hail.stats.eigSymD$justEigenvalues$.apply(eigSymD.scala:29)
at is.hail.methods.Skat$.computePval(Skat.scala:141)
at is.hail.methods.Skat$$anonfun$linearSkat$1$1.apply(Skat.scala:250)
at is.hail.methods.Skat$$anonfun$linearSkat$1$1.apply(Skat.scala:242)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:445)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:445)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:445)
at is.hail.rvd.RVDPartitionInfo$$anonfun$apply$1.apply(RVDPartitionInfo.scala:67)
at is.hail.rvd.RVDPartitionInfo$$anonfun$apply$1.apply(RVDPartitionInfo.scala:38)
at is.hail.utils.package$.using(package.scala:596)
at is.hail.rvd.RVDPartitionInfo$.apply(RVDPartitionInfo.scala:38)
at is.hail.rvd.RVD$$anonfun$29.apply(RVD.scala:1246)
at is.hail.rvd.RVD$$anonfun$29.apply(RVD.scala:1244)
at is.hail.sparkextras.ContextRDD$$anonfun$cmapPartitionsWithIndex$1$$anonfun$apply$32.apply(ContextRDD.scala:448)
at is.hail.sparkextras.ContextRDD$$anonfun$cmapPartitionsWithIndex$1$$anonfun$apply$32.apply(ContextRDD.scala:448)
at is.hail.sparkextras.ContextRDD$$anonfun$run$1$$anonfun$apply$8.apply(ContextRDD.scala:218)
at is.hail.sparkextras.ContextRDD$$anonfun$run$1$$anonfun$apply$8.apply(ContextRDD.scala:218)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441)
at scala.collection.Iterator$class.foreach(Iterator.scala:891)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
at scala.collection.AbstractIterator.to(Iterator.scala:1334)
at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1334)
at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1334)
at org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$13.apply(RDD.scala:961)
at org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$13.apply(RDD.scala:961)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2284)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2284)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.doRunTask(Task.scala:139)
at org.apache.spark.scheduler.Task.run(Task.scala:112)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$13.apply(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1495)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:503)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:2355)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:2343)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:2342)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2342)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:1096)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:1096)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1096)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2574)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2522)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2510)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:893)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2243)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2265)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2284)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2309)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:961)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:379)
at org.apache.spark.rdd.RDD.collect(RDD.scala:960)
at is.hail.sparkextras.ContextRDD.collect(ContextRDD.scala:222)
at is.hail.rvd.RVD$.getKeyInfo(RVD.scala:1250)
at is.hail.rvd.RVD$.makeCoercer(RVD.scala:1314)
at is.hail.rvd.RVD$.coerce(RVD.scala:1272)
at is.hail.rvd.RVD$.coerce(RVD.scala:1258)
at is.hail.expr.ir.TableValue$.apply(TableValue.scala:41)
at is.hail.methods.Skat.execute(Skat.scala:318)
at is.hail.expr.ir.functions.WrappedMatrixToTableFunction.execute(RelationalFunctions.scala:52)
at is.hail.expr.ir.TableToTableApply.execute(TableIR.scala:1688)
at is.hail.expr.ir.TableFilter.execute(TableIR.scala:432)
at is.hail.expr.ir.TableOrderBy.execute(TableIR.scala:1563)
at is.hail.expr.ir.TableHead.execute(TableIR.scala:465)
at is.hail.expr.ir.TableMapRows.execute(TableIR.scala:933)
at is.hail.expr.ir.Interpret$.apply(Interpret.scala:725)
at is.hail.expr.ir.Interpret$.apply(Interpret.scala:91)
at is.hail.expr.ir.Interpret$.apply(Interpret.scala:61)
at is.hail.expr.ir.InterpretNonCompilable$$anonfun$5.apply(InterpretNonCompilable.scala:16)
at is.hail.expr.ir.InterpretNonCompilable$$anonfun$5.apply(InterpretNonCompilable.scala:16)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186)
at is.hail.expr.ir.InterpretNonCompilable$.apply(InterpretNonCompilable.scala:16)
at is.hail.expr.ir.CompileAndEvaluate$$anonfun$2.apply(CompileAndEvaluate.scala:37)
at is.hail.expr.ir.CompileAndEvaluate$$anonfun$2.apply(CompileAndEvaluate.scala:37)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:24)
at is.hail.expr.ir.CompileAndEvaluate$.apply(CompileAndEvaluate.scala:37)
at is.hail.backend.Backend$$anonfun$execute$1.apply(Backend.scala:86)
at is.hail.backend.Backend$$anonfun$execute$1.apply(Backend.scala:86)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:8)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:7)
at is.hail.utils.package$.using(package.scala:596)
at is.hail.annotations.Region$.scoped(Region.scala:18)
at is.hail.expr.ir.ExecuteContext$.scoped(ExecuteContext.scala:7)
at is.hail.backend.Backend.execute(Backend.scala:86)
at is.hail.backend.Backend.executeJSON(Backend.scala:92)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
at py4j.Gateway.invoke(Gateway.java:295)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:251)
at java.lang.Thread.run(Thread.java:748)
Hail version: 0.2.20-dd6c996e7db5
Error summary: SparkException: Job aborted due to stage failure: Task 228 in stage 2.0 failed 4 times, most recent failure: Lost task 228.3 in stage 2.0 (TID 2064, 10.25.97.164, executor 5): breeze.linalg.NotConvergedException:
at is.hail.stats.eigSymD$.doeigSymD(eigSymD.scala:69)
at is.hail.stats.eigSymD$justEigenvalues$eigSymD_DM_Impl$.apply(eigSymD.scala:32)
at is.hail.stats.eigSymD$justEigenvalues$eigSymD_DM_Impl$.apply(eigSymD.scala:30)
at breeze.generic.UFunc$class.apply(UFunc.scala:48)
at is.hail.stats.eigSymD$justEigenvalues$.apply(eigSymD.scala:29)
at is.hail.methods.Skat$.computePval(Skat.scala:141)
at is.hail.methods.Skat$$anonfun$linearSkat$1$1.apply(Skat.scala:250)
at is.hail.methods.Skat$$anonfun$linearSkat$1$1.apply(Skat.scala:242)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:445)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:445)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:445)
at is.hail.rvd.RVDPartitionInfo$$anonfun$apply$1.apply(RVDPartitionInfo.scala:67)
at is.hail.rvd.RVDPartitionInfo$$anonfun$apply$1.apply(RVDPartitionInfo.scala:38)
at is.hail.utils.package$.using(package.scala:596)
at is.hail.rvd.RVDPartitionInfo$.apply(RVDPartitionInfo.scala:38)
at is.hail.rvd.RVD$$anonfun$29.apply(RVD.scala:1246)
at is.hail.rvd.RVD$$anonfun$29.apply(RVD.scala:1244)
at is.hail.sparkextras.ContextRDD$$anonfun$cmapPartitionsWithIndex$1$$anonfun$apply$32.apply(ContextRDD.scala:448)
at is.hail.sparkextras.ContextRDD$$anonfun$cmapPartitionsWithIndex$1$$anonfun$apply$32.apply(ContextRDD.scala:448)
at is.hail.sparkextras.ContextRDD$$anonfun$run$1$$anonfun$apply$8.apply(ContextRDD.scala:218)
at is.hail.sparkextras.ContextRDD$$anonfun$run$1$$anonfun$apply$8.apply(ContextRDD.scala:218)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441)
at scala.collection.Iterator$class.foreach(Iterator.scala:891)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
at scala.collection.AbstractIterator.to(Iterator.scala:1334)
at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1334)
at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1334)
at org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$13.apply(RDD.scala:961)
at org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$13.apply(RDD.scala:961)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2284)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2284)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.doRunTask(Task.scala:139)
at org.apache.spark.scheduler.Task.run(Task.scala:112)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$13.apply(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1495)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:503)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace: