Bug Hail 1 to Elastic Search


#1

Launched:
/nethome/juarezespinosoh/hail/spark/spark-2.0.2-bin-hadoop2.7/bin/spark-submit --master local --jars /nethome/juarezespinosoh/hail/spark/hail_repo_point_one/hail/build/libs/hail-all-spark.jar --py-files /nethome/juarezespinosoh/hail/spark/hail_repo_point_one/hail/build/distributions/hail-python.zip test_hail/example3.py

Code:
import hail
from pprint import pprint
from hail_scripts.utils.elasticsearch_client import ElasticsearchClient
client = ElasticsearchClient(
host=‘localhost’,
port=9200,
)
hc = hail.HailContext()
#/var/lib/spark/vep/vep.properties
vds = hc.import_vcf(’/sample.vcf’)
vds = vds.vep(config="/nethome/juarezespinosoh/pipelines_hail/hail-elasticsearch-pipelines/vep.properties", root=‘va.vep’, block_size=1000)
vds = (
hc
.import_vcf(’/sample.vcf’)

)

pprint(vds.variant_schema)

vds.summarize().report()
pprint(vds.sample_schema)

client.export_vds_to_elasticsearch2(
vds,
is_split_vds=False,
verbose=True,
)

I got this error:
Error summary: EsHadoopIllegalArgumentException: Unsupported/Unknown Elasticsearch version 6.4.0
Empty
Config Map(es.batch.size.entries -> 5000, es.index.auto.create -> true, es.write.operation -> index, es.port -> 9200, es.nodes -> localhost)
Traceback (most recent call last):
File “/nethome/juarezespinosoh/pipelines_hail/hail-elasticsearch-pipelines/test_hail/example3.py”, line 29, in
verbose=True,
File “/nethome/juarezespinosoh/pipelines_hail/hail-elasticsearch-pipelines/hail_scripts/utils/elasticsearch_client.py”, line 181, in export_vds_to_elasticsearch2
verbose=verbose)
File “/nethome/juarezespinosoh/pipelines_hail/hail-elasticsearch-pipelines/hail_scripts/utils/elasticsearch_client.py”, line 500, in export_kt_to_elasticsearch
kt.export_elasticsearch(self._host, int(self._port), index_name, index_type_name, block_size, config=elasticsearch_config)
File “”, line 2, in export_elasticsearch
File “/nethome/juarezespinosoh/hail/spark/hail_repo_point_one/hail/build/distributions/hail-python.zip/hail/java.py”, line 121, in handle_py4j
hail.java.FatalError: EsHadoopIllegalArgumentException: Unsupported/Unknown Elasticsearch version 6.4.0

Java stack trace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 6.0 failed 1 times, most recent failure: Lost task 0.0 in stage 6.0 (TID 6, localhost): org.elasticsearch.hadoop.EsHadoopIllegalArgumentException: Cannot detect ES version - typically this happens if the network/Elasticsearch cluster is not accessible or when targeting a WAN/Cloud instance without the proper setting ‘es.nodes.wan.only’
at org.elasticsearch.hadoop.rest.InitializationUtils.discoverEsVersion(InitializationUtils.java:247)
at org.elasticsearch.hadoop.rest.RestService.createWriter(RestService.java:545)
at org.elasticsearch.spark.rdd.EsRDDWriter.write(EsRDDWriter.scala:58)
at org.elasticsearch.spark.rdd.EsSpark$$anonfun$doSaveToEs$1.apply(EsSpark.scala:102)
at org.elasticsearch.spark.rdd.EsSpark$$anonfun$doSaveToEs$1.apply(EsSpark.scala:102)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
at org.apache.spark.scheduler.Task.run(Task.scala:86)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.elasticsearch.hadoop.EsHadoopIllegalArgumentException: Unsupported/Unknown Elasticsearch version 6.4.0
at org.elasticsearch.hadoop.util.EsMajorVersion.parse(EsMajorVersion.java:79)
at org.elasticsearch.hadoop.rest.RestClient.remoteEsVersion(RestClient.java:613)
at org.elasticsearch.hadoop.rest.InitializationUtils.discoverEsVersion(InitializationUtils.java:240)
… 10 more

Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1454)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1442)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1441)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1441)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:811)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1667)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1622)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1611)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1873)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1886)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1906)
at org.elasticsearch.spark.rdd.EsSpark$.doSaveToEs(EsSpark.scala:102)
at org.elasticsearch.spark.rdd.EsSpark$.saveToEs(EsSpark.scala:76)
at org.elasticsearch.spark.rdd.EsSpark$.saveToEs(EsSpark.scala:73)
at is.hail.io.ElasticsearchConnector$.export(ElasticsearchConnector.scala:33)
at is.hail.keytable.KeyTable.exportElasticsearch(KeyTable.scala:751)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:237)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:280)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)org.elasticsearch.hadoop.EsHadoopIllegalArgumentException: Cannot detect ES version - typically this happens if the network/Elasticsearch cluster is not accessible or when targeting a WAN/Cloud instance without the proper setting ‘es.nodes.wan.only’
at org.elasticsearch.hadoop.rest.InitializationUtils.discoverEsVersion(InitializationUtils.java:247)
at org.elasticsearch.hadoop.rest.RestService.createWriter(RestService.java:545)
at org.elasticsearch.spark.rdd.EsRDDWriter.write(EsRDDWriter.scala:58)
at org.elasticsearch.spark.rdd.EsSpark$$anonfun$doSaveToEs$1.apply(EsSpark.scala:102)
at org.elasticsearch.spark.rdd.EsSpark$$anonfun$doSaveToEs$1.apply(EsSpark.scala:102)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
at org.apache.spark.scheduler.Task.run(Task.scala:86)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)org.elasticsearch.hadoop.EsHadoopIllegalArgumentException: Unsupported/Unknown Elasticsearch version 6.4.0
at org.elasticsearch.hadoop.util.EsMajorVersion.parse(EsMajorVersion.java:79)
at org.elasticsearch.hadoop.rest.RestClient.remoteEsVersion(RestClient.java:613)
at org.elasticsearch.hadoop.rest.InitializationUtils.discoverEsVersion(InitializationUtils.java:240)
at org.elasticsearch.hadoop.rest.RestService.createWriter(RestService.java:545)
at org.elasticsearch.spark.rdd.EsRDDWriter.write(EsRDDWriter.scala:58)
at org.elasticsearch.spark.rdd.EsSpark$$anonfun$doSaveToEs$1.apply(EsSpark.scala:102)
at org.elasticsearch.spark.rdd.EsSpark$$anonfun$doSaveToEs$1.apply(EsSpark.scala:102)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
at org.apache.spark.scheduler.Task.run(Task.scala:86)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)

Hail version: 0.1-74bf1eb
Error summary: EsHadoopIllegalArgumentException: Unsupported/Unknown Elasticsearch version 6.4.0


#2

I don’t think this is a Hail problem – I think comes from using a Hadoop version incompatible with that version of Elasticsearch.