I have the following code:
session = boto3.session.Session()
region = session.region_name
credentials = session.get_credentials()
#sts = session.client('sts')
#current_identity = sts.get_caller_identity()
#print(f"current AWS identity identifier:\t{current_identity.get('Arn')}")
#awsauth = AssumeRoleAWS4Auth(credentials, region, 'es')
#credentials = awsauth.get_credentials()
logger.info('******************************')
logger.info(f'credentials: {credentials}')
logger.info(credentials.access_key)
logger.info(credentials.secret_key)
logger.info(credentials.token)
logger.info('******************************')
sc = hl.spark_context()
sc._jsc.hadoopConfiguration().set("fs.s3.awsAccessKeyId", credentials.access_key)
sc._jsc.hadoopConfiguration().set("fs.s3.awsSecretAccessKey", credentials.secret_key)
sc._jsc.hadoopConfiguration().set("fs.s3.session.token", credentials.token)
sc._jsc.hadoopConfiguration().set("fs.s3.impl", "org.apache.hadoop.fs.s3.S3FileSystem")
return hl.import_vcf([vcf_file for vcf_file in self.source_paths],
reference_genome='GRCh' + self.genome_version, skip_invalid_loci=True, contig_recoding=recode,
force_bgz=True, min_partitions=500, array_elements_required=self.array_elements_required)
I verified that access_key, secret_key and token are printed out (I suppose they are correct since I am getting them from boto3). AWS uses roles also. The code above results in an error:
File "/home/hadoop/.local/lib/python3.7/site-packages/hail/backend/py4j_backend.py", line 31, in deco
raise fatal_error_from_java_error_triplet(deepest, full, error_id) from None
hail.utils.java.FatalError: S3ServiceException: S3 Error Message.
Java stack trace:
org.apache.hadoop.fs.s3.S3Exception: org.jets3t.service.S3ServiceException: S3 Error Message. -- ResponseCode: 403, ResponseStatus: Forbidden, XML Error Message: <?xml version="1.0" encoding="UTF-8"?><Error><Code>InvalidAccessKeyId</Code><Message>The AWS Access Key Id you provided does not exist in our records.</Message><AWSAccessKeyId>ASIAVEUCPANIVIXR3BSO</AWSAccessKeyId><RequestId>1ZWY15WJ0MP8KJDD</RequestId><HostId>LAgmEmT+7CuBCp+nBfLFQrL7ups+HkUPqoIcARpNY8u9PbR3Kt1bkmjAgzp5bkNDpbul1yYEYy8=</HostId></Error>
at org.apache.hadoop.fs.s3.Jets3tFileSystemStore.get(Jets3tFileSystemStore.java:175)
at org.apache.hadoop.fs.s3.Jets3tFileSystemStore.retrieveINode(Jets3tFileSystemStore.java:221)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:422)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:165)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:157)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:359)
at com.sun.proxy.$Proxy13.retrieveINode(Unknown Source)
at org.apache.hadoop.fs.s3.S3FileSystem.getFileStatus(S3FileSystem.java:340)
at org.apache.hadoop.fs.Globber.getFileStatus(Globber.java:65)
at org.apache.hadoop.fs.Globber.doGlob(Globber.java:281)
at org.apache.hadoop.fs.Globber.glob(Globber.java:149)
at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:2016)
at is.hail.io.fs.HadoopFS.glob(HadoopFS.scala:157)
at is.hail.io.fs.HadoopFS.$anonfun$globAll$1(HadoopFS.scala:136)
at is.hail.io.fs.HadoopFS.$anonfun$globAll$1$adapted(HadoopFS.scala:135)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490)
at scala.collection.Iterator.foreach(Iterator.scala:941)
at scala.collection.Iterator.foreach$(Iterator.scala:941)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
at scala.collection.generic.Growable.$plus$plus$eq(Growable.scala:62)
at scala.collection.generic.Growable.$plus$plus$eq$(Growable.scala:53)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:105)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:49)
at scala.collection.TraversableOnce.to(TraversableOnce.scala:315)
at scala.collection.TraversableOnce.to$(TraversableOnce.scala:313)
at scala.collection.AbstractIterator.to(Iterator.scala:1429)
at scala.collection.TraversableOnce.toBuffer(TraversableOnce.scala:307)
at scala.collection.TraversableOnce.toBuffer$(TraversableOnce.scala:307)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1429)
at scala.collection.TraversableOnce.toArray(TraversableOnce.scala:294)
at scala.collection.TraversableOnce.toArray$(TraversableOnce.scala:288)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1429)
at is.hail.io.fs.HadoopFS.globAll(HadoopFS.scala:141)
at is.hail.io.vcf.MatrixVCFReader$.apply(LoadVCF.scala:1570)
at is.hail.io.vcf.MatrixVCFReader$.fromJValue(LoadVCF.scala:1666)
at is.hail.expr.ir.MatrixReader$.fromJson(MatrixIR.scala:89)
at is.hail.expr.ir.IRParser$.matrix_ir_1(Parser.scala:1720)
at is.hail.expr.ir.IRParser$.$anonfun$matrix_ir$1(Parser.scala:1646)
at is.hail.utils.StackSafe$More.advance(StackSafe.scala:64)
at is.hail.utils.StackSafe$.run(StackSafe.scala:16)
at is.hail.utils.StackSafe$StackFrame.run(StackSafe.scala:32)
at is.hail.expr.ir.IRParser$.$anonfun$parse_matrix_ir$1(Parser.scala:1986)
at is.hail.expr.ir.IRParser$.parse(Parser.scala:1973)
at is.hail.expr.ir.IRParser$.parse_matrix_ir(Parser.scala:1986)
at is.hail.backend.spark.SparkBackend.$anonfun$parse_matrix_ir$2(SparkBackend.scala:689)
at is.hail.backend.ExecuteContext$.$anonfun$scoped$3(ExecuteContext.scala:69)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.backend.ExecuteContext$.$anonfun$scoped$2(ExecuteContext.scala:69)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.annotations.RegionPool$.scoped(RegionPool.scala:17)
at is.hail.backend.ExecuteContext$.scoped(ExecuteContext.scala:58)
at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:308)
at is.hail.backend.spark.SparkBackend.$anonfun$parse_matrix_ir$1(SparkBackend.scala:688)
at is.hail.utils.ExecutionTimer$.time(ExecutionTimer.scala:52)
at is.hail.utils.ExecutionTimer$.logTime(ExecutionTimer.scala:59)
at is.hail.backend.spark.SparkBackend.parse_matrix_ir(SparkBackend.scala:687)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:750)
org.jets3t.service.S3ServiceException: S3 Error Message.
at org.jets3t.service.S3Service.getObject(S3Service.java:1379)
at org.apache.hadoop.fs.s3.Jets3tFileSystemStore.get(Jets3tFileSystemStore.java:163)
at org.apache.hadoop.fs.s3.Jets3tFileSystemStore.retrieveINode(Jets3tFileSystemStore.java:221)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:422)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:165)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:157)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:359)
at com.sun.proxy.$Proxy13.retrieveINode(Unknown Source)
at org.apache.hadoop.fs.s3.S3FileSystem.getFileStatus(S3FileSystem.java:340)
at org.apache.hadoop.fs.Globber.getFileStatus(Globber.java:65)
at org.apache.hadoop.fs.Globber.doGlob(Globber.java:281)
at org.apache.hadoop.fs.Globber.glob(Globber.java:149)
at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:2016)
at is.hail.io.fs.HadoopFS.glob(HadoopFS.scala:157)
at is.hail.io.fs.HadoopFS.$anonfun$globAll$1(HadoopFS.scala:136)
at is.hail.io.fs.HadoopFS.$anonfun$globAll$1$adapted(HadoopFS.scala:135)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:484)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:490)
at scala.collection.Iterator.foreach(Iterator.scala:941)
at scala.collection.Iterator.foreach$(Iterator.scala:941)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
at scala.collection.generic.Growable.$plus$plus$eq(Growable.scala:62)
at scala.collection.generic.Growable.$plus$plus$eq$(Growable.scala:53)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:105)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:49)
at scala.collection.TraversableOnce.to(TraversableOnce.scala:315)
at scala.collection.TraversableOnce.to$(TraversableOnce.scala:313)
at scala.collection.AbstractIterator.to(Iterator.scala:1429)
at scala.collection.TraversableOnce.toBuffer(TraversableOnce.scala:307)
at scala.collection.TraversableOnce.toBuffer$(TraversableOnce.scala:307)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1429)
at scala.collection.TraversableOnce.toArray(TraversableOnce.scala:294)
at scala.collection.TraversableOnce.toArray$(TraversableOnce.scala:288)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1429)
at is.hail.io.fs.HadoopFS.globAll(HadoopFS.scala:141)
at is.hail.io.vcf.MatrixVCFReader$.apply(LoadVCF.scala:1570)
at is.hail.io.vcf.MatrixVCFReader$.fromJValue(LoadVCF.scala:1666)
at is.hail.expr.ir.MatrixReader$.fromJson(MatrixIR.scala:89)
at is.hail.expr.ir.IRParser$.matrix_ir_1(Parser.scala:1720)
at is.hail.expr.ir.IRParser$.$anonfun$matrix_ir$1(Parser.scala:1646)
at is.hail.utils.StackSafe$More.advance(StackSafe.scala:64)
at is.hail.utils.StackSafe$.run(StackSafe.scala:16)
at is.hail.utils.StackSafe$StackFrame.run(StackSafe.scala:32)
at is.hail.expr.ir.IRParser$.$anonfun$parse_matrix_ir$1(Parser.scala:1986)
at is.hail.expr.ir.IRParser$.parse(Parser.scala:1973)
at is.hail.expr.ir.IRParser$.parse_matrix_ir(Parser.scala:1986)
at is.hail.backend.spark.SparkBackend.$anonfun$parse_matrix_ir$2(SparkBackend.scala:689)
at is.hail.backend.ExecuteContext$.$anonfun$scoped$3(ExecuteContext.scala:69)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.backend.ExecuteContext$.$anonfun$scoped$2(ExecuteContext.scala:69)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.annotations.RegionPool$.scoped(RegionPool.scala:17)
at is.hail.backend.ExecuteContext$.scoped(ExecuteContext.scala:58)
at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:308)
at is.hail.backend.spark.SparkBackend.$anonfun$parse_matrix_ir$1(SparkBackend.scala:688)
at is.hail.utils.ExecutionTimer$.time(ExecutionTimer.scala:52)
at is.hail.utils.ExecutionTimer$.logTime(ExecutionTimer.scala:59)
at is.hail.backend.spark.SparkBackend.parse_matrix_ir(SparkBackend.scala:687)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:750)
Hail version: 0.2.93-d77cdf0157c9
Error summary: S3ServiceException: S3 Error Message.
python 3.7.10
hail 0.2.93
spark 3.1.2
emr 6.5.0
Could you help with resolving it?