Hail with Spark 3.4

I am trying to build Hail against Spark 3.4. I am getting below error:
/home/hadoop/hail/hail/src/main/scala/is/hail/HailContext.scala:119:21: value implOpMulMatrix_DMD_DVD_eq_DVD is not a member of object breeze.linalg.DenseMatrix

I understand only Spark 3.3.0 version is tested. Any pointers how can i resolve above for spark 3.4.1.
Scala version is 2.12.15
breezeVersion 1.1 (Tried with other versions as well, but did not work)

Thanks in advance.

Hi @Anubhav, it looks like Spark 3.4 included a major version upgrade of Breeze, which as you’ve noticed breaks the build. While we can’t guarantee that everything will work until GCP Dataproc upgrades to 3.4 (currently in preview), I’ve managed to get hail to build with the following patch. Can you try this out and see if it works for you?

Best,
Daniel

diff --git a/hail/build.gradle b/hail/build.gradle
index 1b65904484..d1feb0e578 100644
--- a/hail/build.gradle
+++ b/hail/build.gradle
@@ -40,7 +40,7 @@ tasks.withType(JavaCompile) {
 }
 
 project.ext {
-    breezeVersion = "1.1"
+    breezeVersion = "2.1.0"
 
     sparkVersion = System.getProperty("spark.version", "3.3.0")
 
diff --git a/hail/src/main/scala/is/hail/HailContext.scala b/hail/src/main/scala/is/hail/HailContext.scala
index 4e4063378b..4d2f9056a5 100644
--- a/hail/src/main/scala/is/hail/HailContext.scala
+++ b/hail/src/main/scala/is/hail/HailContext.scala
@@ -113,10 +113,10 @@ object HailContext {
 
     {
       import breeze.linalg._
-      import breeze.linalg.operators.{BinaryRegistry, OpMulMatrix}
+      import breeze.linalg.operators.{BinaryRegistry, HasOps, OpMulMatrix}
 
       implicitly[BinaryRegistry[DenseMatrix[Double], Vector[Double], OpMulMatrix.type, DenseVector[Double]]].register(
-        DenseMatrix.implOpMulMatrix_DMD_DVD_eq_DVD)
+        HasOps.impl_OpMulMatrix_DMD_DVD_eq_DVD)
     }
 
     theContext = new HailContext(backend, branchingFactor, optimizerIterations)
diff --git a/hail/src/main/scala/is/hail/rvd/RVDPartitioner.scala b/hail/src/main/scala/is/hail/rvd/RVDPartitioner.scala
index d1a7213246..7d73d55c57 100644
--- a/hail/src/main/scala/is/hail/rvd/RVDPartitioner.scala
+++ b/hail/src/main/scala/is/hail/rvd/RVDPartitioner.scala
@@ -5,7 +5,7 @@ import is.hail.backend.{ExecuteContext, HailStateManager}
 import is.hail.expr.ir.Literal
 import is.hail.types.virtual._
 import is.hail.utils._
-import org.apache.commons.lang.builder.HashCodeBuilder
+import org.apache.commons.lang3.builder.HashCodeBuilder
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.Row
 import org.apache.spark.{Partitioner, SparkContext}
diff --git a/hail/src/main/scala/is/hail/variant/ReferenceGenome.scala b/hail/src/main/scala/is/hail/variant/ReferenceGenome.scala
index 98a4d8a200..d018b8b6a9 100644
--- a/hail/src/main/scala/is/hail/variant/ReferenceGenome.scala
+++ b/hail/src/main/scala/is/hail/variant/ReferenceGenome.scala
@@ -464,7 +464,7 @@ case class ReferenceGenome(name: String, contigs: Array[String], lengths: Map[St
   @transient lazy val broadcast: BroadcastValue[ReferenceGenome] = HailContext.backend.broadcast(this)
 
   override def hashCode: Int = {
-    import org.apache.commons.lang.builder.HashCodeBuilder
+    import org.apache.commons.lang3.builder.HashCodeBuilder
 
     val b = new HashCodeBuilder()
       .append(name)