Skip to content

Commit

Permalink
Merge pull request #814 from clulab/kwalcock/debugNumbers
Browse files Browse the repository at this point in the history
Fail better on percentages
  • Loading branch information
kwalcock authored Jan 22, 2025
2 parents b19b571 + 64e93a1 commit 97c9cb9
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 42 deletions.
4 changes: 2 additions & 2 deletions main/src/main/resources/org/clulab/numeric/number.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ rules:
# numbers 10-19
(ten | eleven | twelve | dozen | thirteen | fourteen | fifteen | sixteen | seventeen | eighteen | nineteen |
# or numbers 20-99
twenty @WordDigit? | thirty @WordDigit? | fourty @WordDigit? | fifty @WordDigit? |
twenty @WordDigit? | thirty @WordDigit? | forty @WordDigit? | fifty @WordDigit? |
sixty @WordDigit? | seventy @WordDigit? | eighty @WordDigit? | ninety @WordDigit? |
"twenty-one" | "twenty-two" | "twenty-three" | "twenty-four" | "twenty-five" | "twenty-six" | "twenty-seven" | "twenty-eight" | "twenty-nine" |
"thirty-one" | "thirty-two" | "thirty-three" | "thirty-four" | "thirty-five" | "thirty-six" | "thirty-seven" | "thirty-eight" | "thirty-nine" |
Expand Down Expand Up @@ -91,7 +91,7 @@ rules:
# numbers 10-19
ten | eleven | twelve | dozen | thirteen | fourteen | fifteen | sixteen | seventeen | eighteen | nineteen |
# or numbers 20-99
twenty @WordDigit? | thirty @WordDigit? | fourty @WordDigit? | fifty @WordDigit? |
twenty @WordDigit? | thirty @WordDigit? | forty @WordDigit? | fifty @WordDigit? |
sixty @WordDigit? | seventy @WordDigit? | eighty @WordDigit? | ninety @WordDigit? |
"twenty-one" | "twenty-two" | "twenty-three" | "twenty-four" | "twenty-five" | "twenty-six" | "twenty-seven" | "twenty-eight" | "twenty-nine" |
"thirty-one" | "thirty-two" | "thirty-three" | "thirty-four" | "thirty-five" | "thirty-six" | "thirty-seven" | "thirty-eight" | "thirty-nine" |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ class NumericActions(seasonNormalizer: SeasonNormalizer, unitNormalizer: UnitNor
/** Converts a sequence of mentions to new types given the converter function */
private def convert(mentions: Seq[Mention], converter: Mention => Mention, converterName: String): Seq[Mention] = {
val convertedMentions = new ArrayBuffer[Mention]()
for(m <- mentions) {
for (m <- mentions) {
try {
convertedMentions += converter(m )
convertedMentions += converter(m)
} catch {
case e: Exception =>
// sometimes these conversions fail, mainly on broken texts
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,34 +5,38 @@ import org.clulab.odin.{Attachment, TextBoundMention}
import org.clulab.processors.Document
import org.clulab.struct.Interval

class MeasurementMention ( labels: Seq[String],
tokenInterval: Interval,
sentence: Int,
document: Document,
keep: Boolean,
foundBy: String,
attachments: Set[Attachment],
val value: Option[Seq[String]],
val unit: Option[Seq[String]],
val fromRange: Boolean,
unitNormalizer: BaseUnitNormalizer = NullUnitNormalizer)
extends TextBoundMention(labels, tokenInterval, sentence, document, keep, foundBy, attachments) with Norm {

override def neNorm: String = {
class MeasurementMention(
labels: Seq[String],
tokenInterval: Interval,
sentence: Int,
document: Document,
keep: Boolean,
foundBy: String,
attachments: Set[Attachment],
val value: Option[Seq[String]],
val unit: Option[Seq[String]],
val fromRange: Boolean,
unitNormalizer: BaseUnitNormalizer = NullUnitNormalizer
) extends TextBoundMention(labels, tokenInterval, sentence, document, keep, foundBy, attachments) with Norm {
val cachedNeNorm: String = calcNeNorm()

override def neNorm: String = cachedNeNorm

def calcNeNorm(): String = {
assert(value.nonEmpty)
assert(unit.nonEmpty)

val numValueOpt =
if(fromRange) {
Some(value.get.head)
} else {
NumberParser.parse(value.get)
}
if(numValueOpt.isEmpty)
throw new RuntimeException(s"ERROR: could not parse the number [${value.mkString(" ")}] in the measurement ${raw.mkString(" ")}!")
val unitNorm = unitNormalizer.norm(unit.get)

s"${numValueOpt.get} $unitNorm"
if (fromRange) Some(value.get.head)
else NumberParser.parse(value.get)

numValueOpt
.map { numValue =>
val unitNorm = unitNormalizer.norm(unit.get)

s"$numValue $unitNorm"
}
.getOrElse(throw new RuntimeException(s"ERROR: could not parse the number [${value.mkString(" ")}] in the measurement ${raw.mkString(" ")}!"))
}

override def neLabel: String = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,30 @@ import org.clulab.odin.{Attachment, TextBoundMention}
import org.clulab.processors.Document
import org.clulab.struct.Interval

class PercentageMention ( labels: Seq[String],
tokenInterval: Interval,
sentence: Int,
document: Document,
keep: Boolean,
foundBy: String,
attachments: Set[Attachment],
val value: Option[Seq[String]])
extends TextBoundMention(labels, tokenInterval, sentence, document, keep, foundBy, attachments) with Norm {
class PercentageMention(
labels: Seq[String],
tokenInterval: Interval,
sentence: Int,
document: Document,
keep: Boolean,
foundBy: String,
attachments: Set[Attachment],
val value: Option[Seq[String]]
) extends TextBoundMention(labels, tokenInterval, sentence, document, keep, foundBy, attachments) with Norm {
val cachedNeNorm: String = calcNeNorm()

override def neNorm: String = {
override def neNorm: String = cachedNeNorm

// This isn't a valid PercentageMention unless the neNorm can be calculated.
// See if it can be in the constructor and cache the calculated value for reuse.
def calcNeNorm(): String = {
assert(value.nonEmpty)

val numValueOpt = NumberParser.parse(value.get)
if(numValueOpt.isEmpty)
throw new RuntimeException(s"ERROR: could not parse the number [${value.mkString(" ")}] in the percentage ${raw.mkString(" ")}!")

s"${numValueOpt.get} %"
numValueOpt
.map(numValue => s"$numValue %")
.getOrElse(throw new RuntimeException(s"ERROR: could not parse the number [${value.mkString(" ")}] in the percentage ${raw.mkString(" ")}!"))
}

override def neLabel: String = {
Expand Down
26 changes: 26 additions & 0 deletions main/src/test/scala/org/clulab/numeric/TestNumericMentions.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package org.clulab.numeric

import org.clulab.processors.clu.CluProcessor
import org.clulab.utils.Test

class TestNumericMentions extends Test {
val processor = new CluProcessor()

behavior of "MeasurementMention"

it should "not throw an exception" in {
val text = "(c) a retention licence, fourty five days; and ."
val document = processor.annotate(text)

document
}

behavior of "PercentageMention"

it should "not throw an exception" in {
val text = "pdf 75 Some of the major sectoral plans include the national energy policy, transport policy, forest and wildlife policy, national environmental sanitation strategy etc. 76 https://www.thegasconsortium.com/documents/GMP-Final-Jun16.pdf 77 http://www.energycom.gov.gh/files/Renewable-Energy-Masterplan-February-2019.pdf 78 http://energycom.gov.gh/files/SE4ALL-GHANA%20ACTION%20PLAN.pdf 79 http://www.energycom.gov.gh/files/Ghana%20Integrated%20Power%20System%20Master%20Plan%20_Volume%202.pdf Page | 80 In 2017, the Ministry of Energy started a comprehensive review of the National Energy Policy, which is still ongoing."
val document = processor.annotate(text)

document
}
}
2 changes: 1 addition & 1 deletion version.sbt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version in ThisBuild := "8.5.4"
version in ThisBuild := "8.5.5-SNAPSHOT"

0 comments on commit 97c9cb9

Please sign in to comment.