From b1a44b8931dd5b44658fda0807f5f60f669fb96a Mon Sep 17 00:00:00 2001 From: Keith Alcock Date: Sat, 5 Oct 2024 21:23:58 -0700 Subject: [PATCH 1/7] Adjust spacing --- .../scala/org/clulab/numeric/actions/NumericActions.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/src/main/scala/org/clulab/numeric/actions/NumericActions.scala b/main/src/main/scala/org/clulab/numeric/actions/NumericActions.scala index a28a7e11e..bb9cfa6d9 100644 --- a/main/src/main/scala/org/clulab/numeric/actions/NumericActions.scala +++ b/main/src/main/scala/org/clulab/numeric/actions/NumericActions.scala @@ -16,9 +16,9 @@ class NumericActions(seasonNormalizer: SeasonNormalizer, unitNormalizer: UnitNor /** Converts a sequence of mentions to new types given the converter function */ private def convert(mentions: Seq[Mention], converter: Mention => Mention, converterName: String): Seq[Mention] = { val convertedMentions = new ArrayBuffer[Mention]() - for(m <- mentions) { + for (m <- mentions) { try { - convertedMentions += converter(m ) + convertedMentions += converter(m) } catch { case e: Exception => // sometimes these conversions fail, mainly on broken texts From 461552dbdbac6e8dbf446bd165ab24f2226d033b Mon Sep 17 00:00:00 2001 From: Keith Alcock Date: Sat, 5 Oct 2024 21:33:18 -0700 Subject: [PATCH 2/7] Check for neNorm during construction --- .../numeric/mentions/PercentageMention.scala | 32 +++++++++++-------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/main/src/main/scala/org/clulab/numeric/mentions/PercentageMention.scala b/main/src/main/scala/org/clulab/numeric/mentions/PercentageMention.scala index 6e2057d69..669d54c3c 100644 --- a/main/src/main/scala/org/clulab/numeric/mentions/PercentageMention.scala +++ b/main/src/main/scala/org/clulab/numeric/mentions/PercentageMention.scala @@ -5,24 +5,30 @@ import org.clulab.odin.{Attachment, TextBoundMention} import org.clulab.processors.Document import org.clulab.struct.Interval -class PercentageMention ( labels: Seq[String], - tokenInterval: Interval, - sentence: Int, - document: Document, - keep: Boolean, - foundBy: String, - attachments: Set[Attachment], - val value: Option[Seq[String]]) - extends TextBoundMention(labels, tokenInterval, sentence, document, keep, foundBy, attachments) with Norm { +class PercentageMention( + labels: Seq[String], + tokenInterval: Interval, + sentence: Int, + document: Document, + keep: Boolean, + foundBy: String, + attachments: Set[Attachment], + val value: Option[Seq[String]] +) extends TextBoundMention(labels, tokenInterval, sentence, document, keep, foundBy, attachments) with Norm { + val cachedNeNorm: String = calcNeNorm() - override def neNorm: String = { + override def neNorm: String = cachedNeNorm + + // This isn't a valid PercentageMention unless the neNorm can be calculated. + // See if it can be in the constructor and cache the calculated value for reuse. + def calcNeNorm(): String = { assert(value.nonEmpty) val numValueOpt = NumberParser.parse(value.get) - if(numValueOpt.isEmpty) - throw new RuntimeException(s"ERROR: could not parse the number [${value.mkString(" ")}] in the percentage ${raw.mkString(" ")}!") - s"${numValueOpt.get} %" + numValueOpt + .map(numValue => s"$numValue %") + .getOrElse(throw new RuntimeException(s"ERROR: could not parse the number [${value.mkString(" ")}] in the percentage ${raw.mkString(" ")}!")) } override def neLabel: String = { From 7256423e5d588100ff59b38862f6b80e5ac82774 Mon Sep 17 00:00:00 2001 From: Keith Alcock Date: Mon, 7 Oct 2024 11:03:59 -0700 Subject: [PATCH 3/7] Add test --- .../clulab/numeric/TestNumericMentions.scala | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 main/src/test/scala/org/clulab/numeric/TestNumericMentions.scala diff --git a/main/src/test/scala/org/clulab/numeric/TestNumericMentions.scala b/main/src/test/scala/org/clulab/numeric/TestNumericMentions.scala new file mode 100644 index 000000000..cf21368e7 --- /dev/null +++ b/main/src/test/scala/org/clulab/numeric/TestNumericMentions.scala @@ -0,0 +1,17 @@ +package org.clulab.numeric + +import org.clulab.processors.clu.CluProcessor +import org.clulab.utils.Test + +class TestNumericMentions extends Test { + val processor = new CluProcessor() + + behavior of "MeasurementMention" + + it should "not throw an exception" in { + val text = "(c) a retention licence, fourty five days; and ." + val document = processor.annotate(text) + + document + } +} From 7f5d98bedc097448a6fc473d0e577e76715b57e3 Mon Sep 17 00:00:00 2001 From: Keith Alcock Date: Mon, 7 Oct 2024 11:04:14 -0700 Subject: [PATCH 4/7] Update version --- version.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.sbt b/version.sbt index f278fda01..4ee80bf1f 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -version in ThisBuild := "8.5.4" +version in ThisBuild := "8.5.5-SNAPSHOT" From 1f07179c20d831a1e911d397ee1f725aa120c1c5 Mon Sep 17 00:00:00 2001 From: Keith Alcock Date: Mon, 7 Oct 2024 11:25:17 -0700 Subject: [PATCH 5/7] Correct fourty to forty --- main/src/main/resources/org/clulab/numeric/number.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/src/main/resources/org/clulab/numeric/number.yml b/main/src/main/resources/org/clulab/numeric/number.yml index 3909a24de..55ef5d768 100644 --- a/main/src/main/resources/org/clulab/numeric/number.yml +++ b/main/src/main/resources/org/clulab/numeric/number.yml @@ -27,7 +27,7 @@ rules: # numbers 10-19 (ten | eleven | twelve | dozen | thirteen | fourteen | fifteen | sixteen | seventeen | eighteen | nineteen | # or numbers 20-99 - twenty @WordDigit? | thirty @WordDigit? | fourty @WordDigit? | fifty @WordDigit? | + twenty @WordDigit? | thirty @WordDigit? | forty @WordDigit? | fifty @WordDigit? | sixty @WordDigit? | seventy @WordDigit? | eighty @WordDigit? | ninety @WordDigit? | "twenty-one" | "twenty-two" | "twenty-three" | "twenty-four" | "twenty-five" | "twenty-six" | "twenty-seven" | "twenty-eight" | "twenty-nine" | "thirty-one" | "thirty-two" | "thirty-three" | "thirty-four" | "thirty-five" | "thirty-six" | "thirty-seven" | "thirty-eight" | "thirty-nine" | @@ -91,7 +91,7 @@ rules: # numbers 10-19 ten | eleven | twelve | dozen | thirteen | fourteen | fifteen | sixteen | seventeen | eighteen | nineteen | # or numbers 20-99 - twenty @WordDigit? | thirty @WordDigit? | fourty @WordDigit? | fifty @WordDigit? | + twenty @WordDigit? | thirty @WordDigit? | forty @WordDigit? | fifty @WordDigit? | sixty @WordDigit? | seventy @WordDigit? | eighty @WordDigit? | ninety @WordDigit? | "twenty-one" | "twenty-two" | "twenty-three" | "twenty-four" | "twenty-five" | "twenty-six" | "twenty-seven" | "twenty-eight" | "twenty-nine" | "thirty-one" | "thirty-two" | "thirty-three" | "thirty-four" | "thirty-five" | "thirty-six" | "thirty-seven" | "thirty-eight" | "thirty-nine" | From f44716afc15d19d1e2cb59ed77a3e6245e86f143 Mon Sep 17 00:00:00 2001 From: Keith Alcock Date: Mon, 7 Oct 2024 11:05:36 -0700 Subject: [PATCH 6/7] Throw exception early on fourty --- .../numeric/mentions/MeasurementMention.scala | 52 ++++++++++--------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/main/src/main/scala/org/clulab/numeric/mentions/MeasurementMention.scala b/main/src/main/scala/org/clulab/numeric/mentions/MeasurementMention.scala index d9e6feee0..560ccd199 100644 --- a/main/src/main/scala/org/clulab/numeric/mentions/MeasurementMention.scala +++ b/main/src/main/scala/org/clulab/numeric/mentions/MeasurementMention.scala @@ -5,34 +5,38 @@ import org.clulab.odin.{Attachment, TextBoundMention} import org.clulab.processors.Document import org.clulab.struct.Interval -class MeasurementMention ( labels: Seq[String], - tokenInterval: Interval, - sentence: Int, - document: Document, - keep: Boolean, - foundBy: String, - attachments: Set[Attachment], - val value: Option[Seq[String]], - val unit: Option[Seq[String]], - val fromRange: Boolean, - unitNormalizer: BaseUnitNormalizer = NullUnitNormalizer) - extends TextBoundMention(labels, tokenInterval, sentence, document, keep, foundBy, attachments) with Norm { - - override def neNorm: String = { +class MeasurementMention( + labels: Seq[String], + tokenInterval: Interval, + sentence: Int, + document: Document, + keep: Boolean, + foundBy: String, + attachments: Set[Attachment], + val value: Option[Seq[String]], + val unit: Option[Seq[String]], + val fromRange: Boolean, + unitNormalizer: BaseUnitNormalizer = NullUnitNormalizer +) extends TextBoundMention(labels, tokenInterval, sentence, document, keep, foundBy, attachments) with Norm { + val cachedNeNorm: String = calcNeNorm() + + override def neNorm: String = cachedNeNorm + + def calcNeNorm(): String = { assert(value.nonEmpty) assert(unit.nonEmpty) val numValueOpt = - if(fromRange) { - Some(value.get.head) - } else { - NumberParser.parse(value.get) - } - if(numValueOpt.isEmpty) - throw new RuntimeException(s"ERROR: could not parse the number [${value.mkString(" ")}] in the measurement ${raw.mkString(" ")}!") - val unitNorm = unitNormalizer.norm(unit.get) - - s"${numValueOpt.get} $unitNorm" + if (fromRange) Some(value.get.head) + else NumberParser.parse(value.get) + + numValueOpt + .map { numValue => + val unitNorm = unitNormalizer.norm(unit.get) + + s"$numValue $unitNorm" + } + .getOrElse(throw new RuntimeException(s"ERROR: could not parse the number [${value.mkString(" ")}] in the measurement ${raw.mkString(" ")}!")) } override def neLabel: String = { From 64e93a16e97689c2d3a11bbb9b5ad6c32799b607 Mon Sep 17 00:00:00 2001 From: Keith Alcock Date: Mon, 7 Oct 2024 12:21:13 -0700 Subject: [PATCH 7/7] Test previous PercentageMention as well --- .../scala/org/clulab/numeric/TestNumericMentions.scala | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/main/src/test/scala/org/clulab/numeric/TestNumericMentions.scala b/main/src/test/scala/org/clulab/numeric/TestNumericMentions.scala index cf21368e7..6b42a611f 100644 --- a/main/src/test/scala/org/clulab/numeric/TestNumericMentions.scala +++ b/main/src/test/scala/org/clulab/numeric/TestNumericMentions.scala @@ -14,4 +14,13 @@ class TestNumericMentions extends Test { document } + + behavior of "PercentageMention" + + it should "not throw an exception" in { + val text = "pdf 75 Some of the major sectoral plans include the national energy policy, transport policy, forest and wildlife policy, national environmental sanitation strategy etc. 76 https://www.thegasconsortium.com/documents/GMP-Final-Jun16.pdf 77 http://www.energycom.gov.gh/files/Renewable-Energy-Masterplan-February-2019.pdf 78 http://energycom.gov.gh/files/SE4ALL-GHANA%20ACTION%20PLAN.pdf 79 http://www.energycom.gov.gh/files/Ghana%20Integrated%20Power%20System%20Master%20Plan%20_Volume%202.pdf Page | 80 In 2017, the Ministry of Energy started a comprehensive review of the National Energy Policy, which is still ongoing." + val document = processor.annotate(text) + + document + } }