Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adapter for Scala Grounders (rvacareanu/grounder) #887

Merged
merged 12 commits into from
Apr 30, 2024
1 change: 1 addition & 0 deletions skema/text_reading/scala/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ libraryDependencies ++= {
"com.lihaoyi" %% "requests" % "0.1.8",
"io.cequence" %% "openai-scala-client" % "1.0.0.RC.1",
"org.scalatest" %% "scalatest" % "3.0.9" % Test,
"org.clulab" %% "scala-grounders" % "0.0.35",
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ import org.clulab.processors.Processor
import org.clulab.processors.fastnlp.FastNLPProcessor

import scala.collection.JavaConverters._
import scala.io.Source
import org.ml4ai.skema.grounding.scala_grounders.ScalaGroundersAdapter


object GrounderFactory {

Expand Down Expand Up @@ -52,7 +55,11 @@ object GrounderFactory {
new PipelineGrounder(Seq(manualGrounder, grounder))
else
grounder
case "manual" => manualGrounder
case "manual" => manualGrounder
case "scala-grounders" =>
// Similar to `miraembeddings`
val ontologyFilePath = domainConfig.getString("ontologyPath")
ScalaGroundersAdapter.fromFile(groundingConceptsPath=ontologyFilePath)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey, that's Python! ;-)

groundingConceptsPath = ontologyFilePath

case other =>
throw new RuntimeException(s"$other - is not implemented as a grounding engine")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import org.json4s.JsonDSL._
* @param concept instance returned by a grounder implementations
* @param score of the grounding algorithm given to concept
*/
case class GroundingCandidate(concept: GroundingConcept, score: Float) {
case class GroundingCandidate(concept: GroundingConcept, score: Float, details: Option[String] = None) {

def toJValue: JValue = {
("groundingConcept" -> concept.toJValue) ~
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package org.ml4ai.skema.grounding.scala_grounders

import org.clulab.scala_grounders.grounding.GroundingConfig
import org.ml4ai.skema.text_reading.grounding.Grounder
import org.ml4ai.skema.text_reading.grounding.GroundingCandidate
import org.ml4ai.skema.text_reading.grounding.GroundingConcept
import com.typesafe.config.Config
import org.clulab.scala_grounders.grounding.SequentialGrounder
import org.clulab.scala_grounders.model.DKG
import org.clulab.scala_grounders.model.DKGSynonym
import com.typesafe.config.ConfigFactory
import org.clulab.scala_grounders.using
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Recently we've been using the the using from

"org.scala-lang.modules" %% "scala-collection-compat" % version

often looking like Using.resource() { resource => . It's in the standard library for newer Scala versions.

import org.clulab.scala_grounders.model.DKG
import scala.io.Source


/**
* This class adapts the data definitions from this project to work with scala-grounder's definition
* Concretely, the changes needed are:
* - SKEMA's GroundingConcept to scala-grounder's DKG (avalaible in `fromConceptToDKG`)
* - scala-grounder's DKG to SKEMA's GroundingConcept (avalaible in `fromDKGToConcept`)
* - Create the scala-grounder Grounder (`grounder = SequentialGrounder()`)
* - Changing `groundingCandidates` to call the right method from the scala-grounder side
*
* @param groundingConcepts -> The concepts which we will use to do the grounding
* Every candidate text for grounding (i.e. any text that we
* want to ground) will be grounded on these concepts
* (Note: depending on the implementation, it is possible that
* none of these groundingConcepts candidates are suitable, so
* we might not return anything; however, we will never return
* a concept that is outside this)
*/
class ScalaGroundersAdapter(groundingConcepts: Seq[GroundingConcept]) extends Grounder {
lazy val concepts = groundingConcepts.map(fromConceptToDKG)
lazy val grounder = SequentialGrounder().mkFast(concepts)
def groundingCandidates(texts: Seq[String], k: Int): Seq[Seq[GroundingCandidate]] = {
texts.map { text =>
// TODO Maybe provide additional context (useful for NeuralGrounder)
grounder.ground(text, None, concepts, k)
.map { result =>
GroundingCandidate(fromDKGToConcept(result.dkg), result.score, details = Some(result.groundingDetails.grounderName))
}
.force.toSeq
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a clever use of Streams. IntelliJ says that the .toSeq is superfluous.

}
}

/**
* Transform a SKEMA's `GroundingConcept` to a scala-grounders' `DKG`
* They have similar meanings, so the map is 1:1
*
* @param concept
* @return
*/
def fromConceptToDKG(concept: GroundingConcept): DKG = {
DKG(concept.id, concept.name, concept.description, concept.synonyms.map { synonyms => synonyms.map { s => DKGSynonym(s, None) } }.getOrElse(Seq.empty))
}

/**
* Transform a scala-grounder' `DKG` to SKEMA's `GroundingConcept`
* They have similar meanings, so the map is 1:1
*
* @param dkg
* @return
*/
def fromDKGToConcept(dkg: DKG): GroundingConcept = {
GroundingConcept(dkg.id, dkg.name, dkg.description, Option(dkg.synonyms.map(_.value)), None)
}

}
/**
* Provide altenatives way of creating a `ScalaGroundersAdapter`
*/
object ScalaGroundersAdapter {
def apply(groundingConcepts: Seq[GroundingConcept]): ScalaGroundersAdapter = new ScalaGroundersAdapter(groundingConcepts)
def fromDkgs(dkgs: Seq[DKG]): ScalaGroundersAdapter = new ScalaGroundersAdapter(dkgs.map(dkg => GroundingConcept(dkg.id, dkg.name, dkg.description, Option(dkg.synonyms.map(_.value)), None)))
def fromFile(groundingConceptsPath: String): ScalaGroundersAdapter = {
val concepts = using(Source.fromFile(groundingConceptsPath)) { it =>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Source.fromFile without an encoding specified may be a problem on machines where they have Java using an unexpected default encoding. It might be easier to play it safe here and save tracking it down later.

ujson.read(it.mkString).arr.map(it => DKG.fromJson(it))
}
ScalaGroundersAdapter.fromDkgs(concepts)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package org.ml4ai.skema.grounding.scala_grounders

import org.ml4ai.skema.test.Test

import java.nio.{Buffer, ByteBuffer, ByteOrder}

import org.ml4ai.skema.text_reading.grounding.Grounder
import org.ml4ai.skema.text_reading.grounding.GroundingCandidate
import org.ml4ai.skema.text_reading.grounding.GroundingConcept

/**
*
* Running command:
* sbt "testOnly org.ml4ai.skema.grounding.scala_grounders.TestAdapter"
*/
class TestAdapter extends Test {

behavior of "ScalaGroundersAdapter"

val gcs = Seq(
GroundingConcept(
id = "id1",
name = "dog",
description = Some("this is a cute dog"),
synonyms = None,
embedding = None
),
GroundingConcept(
id = "id2",
name = "cat",
description = Some("this is a cute cat"),
synonyms = None,
embedding = None
),
GroundingConcept(
id = "id3",
name = "dog cat",
description = Some("here we have a dog and a cat"),
synonyms = None,
embedding = None
),
GroundingConcept(
id = "id4",
name = "cat",
description = Some("this is a cute cat"),
synonyms = None,
embedding = None
),
)

val sga = new ScalaGroundersAdapter(gcs)

val result = sga.groundingCandidates(Seq("dog"), 10).head

// Check that the first one is a GroundingCandidate with id1
it should "ground" in {
result.foreach(println)
result.head.concept.id should be ("id1")
}


}
Loading