diff --git a/main/src/main/resources/org/clulab/numeric/number.yml b/main/src/main/resources/org/clulab/numeric/number.yml index 3909a24de..55ef5d768 100644 --- a/main/src/main/resources/org/clulab/numeric/number.yml +++ b/main/src/main/resources/org/clulab/numeric/number.yml @@ -27,7 +27,7 @@ rules: # numbers 10-19 (ten | eleven | twelve | dozen | thirteen | fourteen | fifteen | sixteen | seventeen | eighteen | nineteen | # or numbers 20-99 - twenty @WordDigit? | thirty @WordDigit? | fourty @WordDigit? | fifty @WordDigit? | + twenty @WordDigit? | thirty @WordDigit? | forty @WordDigit? | fifty @WordDigit? | sixty @WordDigit? | seventy @WordDigit? | eighty @WordDigit? | ninety @WordDigit? | "twenty-one" | "twenty-two" | "twenty-three" | "twenty-four" | "twenty-five" | "twenty-six" | "twenty-seven" | "twenty-eight" | "twenty-nine" | "thirty-one" | "thirty-two" | "thirty-three" | "thirty-four" | "thirty-five" | "thirty-six" | "thirty-seven" | "thirty-eight" | "thirty-nine" | @@ -91,7 +91,7 @@ rules: # numbers 10-19 ten | eleven | twelve | dozen | thirteen | fourteen | fifteen | sixteen | seventeen | eighteen | nineteen | # or numbers 20-99 - twenty @WordDigit? | thirty @WordDigit? | fourty @WordDigit? | fifty @WordDigit? | + twenty @WordDigit? | thirty @WordDigit? | forty @WordDigit? | fifty @WordDigit? | sixty @WordDigit? | seventy @WordDigit? | eighty @WordDigit? | ninety @WordDigit? | "twenty-one" | "twenty-two" | "twenty-three" | "twenty-four" | "twenty-five" | "twenty-six" | "twenty-seven" | "twenty-eight" | "twenty-nine" | "thirty-one" | "thirty-two" | "thirty-three" | "thirty-four" | "thirty-five" | "thirty-six" | "thirty-seven" | "thirty-eight" | "thirty-nine" | diff --git a/main/src/main/scala/org/clulab/numeric/actions/NumericActions.scala b/main/src/main/scala/org/clulab/numeric/actions/NumericActions.scala index a28a7e11e..bb9cfa6d9 100644 --- a/main/src/main/scala/org/clulab/numeric/actions/NumericActions.scala +++ b/main/src/main/scala/org/clulab/numeric/actions/NumericActions.scala @@ -16,9 +16,9 @@ class NumericActions(seasonNormalizer: SeasonNormalizer, unitNormalizer: UnitNor /** Converts a sequence of mentions to new types given the converter function */ private def convert(mentions: Seq[Mention], converter: Mention => Mention, converterName: String): Seq[Mention] = { val convertedMentions = new ArrayBuffer[Mention]() - for(m <- mentions) { + for (m <- mentions) { try { - convertedMentions += converter(m ) + convertedMentions += converter(m) } catch { case e: Exception => // sometimes these conversions fail, mainly on broken texts diff --git a/main/src/main/scala/org/clulab/numeric/mentions/MeasurementMention.scala b/main/src/main/scala/org/clulab/numeric/mentions/MeasurementMention.scala index d9e6feee0..560ccd199 100644 --- a/main/src/main/scala/org/clulab/numeric/mentions/MeasurementMention.scala +++ b/main/src/main/scala/org/clulab/numeric/mentions/MeasurementMention.scala @@ -5,34 +5,38 @@ import org.clulab.odin.{Attachment, TextBoundMention} import org.clulab.processors.Document import org.clulab.struct.Interval -class MeasurementMention ( labels: Seq[String], - tokenInterval: Interval, - sentence: Int, - document: Document, - keep: Boolean, - foundBy: String, - attachments: Set[Attachment], - val value: Option[Seq[String]], - val unit: Option[Seq[String]], - val fromRange: Boolean, - unitNormalizer: BaseUnitNormalizer = NullUnitNormalizer) - extends TextBoundMention(labels, tokenInterval, sentence, document, keep, foundBy, attachments) with Norm { - - override def neNorm: String = { +class MeasurementMention( + labels: Seq[String], + tokenInterval: Interval, + sentence: Int, + document: Document, + keep: Boolean, + foundBy: String, + attachments: Set[Attachment], + val value: Option[Seq[String]], + val unit: Option[Seq[String]], + val fromRange: Boolean, + unitNormalizer: BaseUnitNormalizer = NullUnitNormalizer +) extends TextBoundMention(labels, tokenInterval, sentence, document, keep, foundBy, attachments) with Norm { + val cachedNeNorm: String = calcNeNorm() + + override def neNorm: String = cachedNeNorm + + def calcNeNorm(): String = { assert(value.nonEmpty) assert(unit.nonEmpty) val numValueOpt = - if(fromRange) { - Some(value.get.head) - } else { - NumberParser.parse(value.get) - } - if(numValueOpt.isEmpty) - throw new RuntimeException(s"ERROR: could not parse the number [${value.mkString(" ")}] in the measurement ${raw.mkString(" ")}!") - val unitNorm = unitNormalizer.norm(unit.get) - - s"${numValueOpt.get} $unitNorm" + if (fromRange) Some(value.get.head) + else NumberParser.parse(value.get) + + numValueOpt + .map { numValue => + val unitNorm = unitNormalizer.norm(unit.get) + + s"$numValue $unitNorm" + } + .getOrElse(throw new RuntimeException(s"ERROR: could not parse the number [${value.mkString(" ")}] in the measurement ${raw.mkString(" ")}!")) } override def neLabel: String = { diff --git a/main/src/main/scala/org/clulab/numeric/mentions/PercentageMention.scala b/main/src/main/scala/org/clulab/numeric/mentions/PercentageMention.scala index 6e2057d69..669d54c3c 100644 --- a/main/src/main/scala/org/clulab/numeric/mentions/PercentageMention.scala +++ b/main/src/main/scala/org/clulab/numeric/mentions/PercentageMention.scala @@ -5,24 +5,30 @@ import org.clulab.odin.{Attachment, TextBoundMention} import org.clulab.processors.Document import org.clulab.struct.Interval -class PercentageMention ( labels: Seq[String], - tokenInterval: Interval, - sentence: Int, - document: Document, - keep: Boolean, - foundBy: String, - attachments: Set[Attachment], - val value: Option[Seq[String]]) - extends TextBoundMention(labels, tokenInterval, sentence, document, keep, foundBy, attachments) with Norm { +class PercentageMention( + labels: Seq[String], + tokenInterval: Interval, + sentence: Int, + document: Document, + keep: Boolean, + foundBy: String, + attachments: Set[Attachment], + val value: Option[Seq[String]] +) extends TextBoundMention(labels, tokenInterval, sentence, document, keep, foundBy, attachments) with Norm { + val cachedNeNorm: String = calcNeNorm() - override def neNorm: String = { + override def neNorm: String = cachedNeNorm + + // This isn't a valid PercentageMention unless the neNorm can be calculated. + // See if it can be in the constructor and cache the calculated value for reuse. + def calcNeNorm(): String = { assert(value.nonEmpty) val numValueOpt = NumberParser.parse(value.get) - if(numValueOpt.isEmpty) - throw new RuntimeException(s"ERROR: could not parse the number [${value.mkString(" ")}] in the percentage ${raw.mkString(" ")}!") - s"${numValueOpt.get} %" + numValueOpt + .map(numValue => s"$numValue %") + .getOrElse(throw new RuntimeException(s"ERROR: could not parse the number [${value.mkString(" ")}] in the percentage ${raw.mkString(" ")}!")) } override def neLabel: String = { diff --git a/main/src/test/scala/org/clulab/numeric/TestNumericMentions.scala b/main/src/test/scala/org/clulab/numeric/TestNumericMentions.scala new file mode 100644 index 000000000..6b42a611f --- /dev/null +++ b/main/src/test/scala/org/clulab/numeric/TestNumericMentions.scala @@ -0,0 +1,26 @@ +package org.clulab.numeric + +import org.clulab.processors.clu.CluProcessor +import org.clulab.utils.Test + +class TestNumericMentions extends Test { + val processor = new CluProcessor() + + behavior of "MeasurementMention" + + it should "not throw an exception" in { + val text = "(c) a retention licence, fourty five days; and ." + val document = processor.annotate(text) + + document + } + + behavior of "PercentageMention" + + it should "not throw an exception" in { + val text = "pdf 75 Some of the major sectoral plans include the national energy policy, transport policy, forest and wildlife policy, national environmental sanitation strategy etc. 76 https://www.thegasconsortium.com/documents/GMP-Final-Jun16.pdf 77 http://www.energycom.gov.gh/files/Renewable-Energy-Masterplan-February-2019.pdf 78 http://energycom.gov.gh/files/SE4ALL-GHANA%20ACTION%20PLAN.pdf 79 http://www.energycom.gov.gh/files/Ghana%20Integrated%20Power%20System%20Master%20Plan%20_Volume%202.pdf Page | 80 In 2017, the Ministry of Energy started a comprehensive review of the National Energy Policy, which is still ongoing." + val document = processor.annotate(text) + + document + } +} diff --git a/version.sbt b/version.sbt index f278fda01..4ee80bf1f 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -version in ThisBuild := "8.5.4" +version in ThisBuild := "8.5.5-SNAPSHOT"