I am trying to build Hail against Spark 3.4. I am getting below error:
/home/hadoop/hail/hail/src/main/scala/is/hail/HailContext.scala:119:21: value implOpMulMatrix_DMD_DVD_eq_DVD is not a member of object breeze.linalg.DenseMatrix
I understand only Spark 3.3.0 version is tested. Any pointers how can i resolve above for spark 3.4.1.
Scala version is 2.12.15
breezeVersion 1.1 (Tried with other versions as well, but did not work)
Thanks in advance.
Hi @Anubhav, it looks like Spark 3.4 included a major version upgrade of Breeze, which as you’ve noticed breaks the build. While we can’t guarantee that everything will work until GCP Dataproc upgrades to 3.4 (currently in preview), I’ve managed to get hail to build with the following patch. Can you try this out and see if it works for you?
Best,
Daniel
diff --git a/hail/build.gradle b/hail/build.gradle
index 1b65904484..d1feb0e578 100644
--- a/hail/build.gradle
+++ b/hail/build.gradle
@@ -40,7 +40,7 @@ tasks.withType(JavaCompile) {
}
project.ext {
- breezeVersion = "1.1"
+ breezeVersion = "2.1.0"
sparkVersion = System.getProperty("spark.version", "3.3.0")
diff --git a/hail/src/main/scala/is/hail/HailContext.scala b/hail/src/main/scala/is/hail/HailContext.scala
index 4e4063378b..4d2f9056a5 100644
--- a/hail/src/main/scala/is/hail/HailContext.scala
+++ b/hail/src/main/scala/is/hail/HailContext.scala
@@ -113,10 +113,10 @@ object HailContext {
{
import breeze.linalg._
- import breeze.linalg.operators.{BinaryRegistry, OpMulMatrix}
+ import breeze.linalg.operators.{BinaryRegistry, HasOps, OpMulMatrix}
implicitly[BinaryRegistry[DenseMatrix[Double], Vector[Double], OpMulMatrix.type, DenseVector[Double]]].register(
- DenseMatrix.implOpMulMatrix_DMD_DVD_eq_DVD)
+ HasOps.impl_OpMulMatrix_DMD_DVD_eq_DVD)
}
theContext = new HailContext(backend, branchingFactor, optimizerIterations)
diff --git a/hail/src/main/scala/is/hail/rvd/RVDPartitioner.scala b/hail/src/main/scala/is/hail/rvd/RVDPartitioner.scala
index d1a7213246..7d73d55c57 100644
--- a/hail/src/main/scala/is/hail/rvd/RVDPartitioner.scala
+++ b/hail/src/main/scala/is/hail/rvd/RVDPartitioner.scala
@@ -5,7 +5,7 @@ import is.hail.backend.{ExecuteContext, HailStateManager}
import is.hail.expr.ir.Literal
import is.hail.types.virtual._
import is.hail.utils._
-import org.apache.commons.lang.builder.HashCodeBuilder
+import org.apache.commons.lang3.builder.HashCodeBuilder
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.sql.Row
import org.apache.spark.{Partitioner, SparkContext}
diff --git a/hail/src/main/scala/is/hail/variant/ReferenceGenome.scala b/hail/src/main/scala/is/hail/variant/ReferenceGenome.scala
index 98a4d8a200..d018b8b6a9 100644
--- a/hail/src/main/scala/is/hail/variant/ReferenceGenome.scala
+++ b/hail/src/main/scala/is/hail/variant/ReferenceGenome.scala
@@ -464,7 +464,7 @@ case class ReferenceGenome(name: String, contigs: Array[String], lengths: Map[St
@transient lazy val broadcast: BroadcastValue[ReferenceGenome] = HailContext.backend.broadcast(this)
override def hashCode: Int = {
- import org.apache.commons.lang.builder.HashCodeBuilder
+ import org.apache.commons.lang3.builder.HashCodeBuilder
val b = new HashCodeBuilder()
.append(name)