Skip to content

Commit

Permalink
Use the negative pattern of textNumberPattern to resolve padding ambi…
Browse files Browse the repository at this point in the history
…guities

ICU only uses the positive pattern of the textNumberPattern property to
determine pad character and position, completely ignoring the negative
pattern. If the positive pattern has no affix associated with the pad
character, then there is an ambiguity if the pad character should appear
before or after the affix when unparsing. In this case, ICU defaults to
before the affix, with no way to change it via the pattern. This
effectively means it is not possible for ICU number padding to appear
after a negative affix if there is no positive affix.

To resolve this ambiguity and allow configuring where pad characters
appear in this case, we inspect the negative pattern. If both negative
and positive patterns define padding on the same affix, and the positive
pattern has an empty string for that affix, then we use the pad position
from the negative pattern. In all other cases, the pad character in the
negative pattern is ignored following usual ICU behavior.

For example, a textNumberPattern of "*0####0;-*00" formats a negative
number with zero padding after hyphen, whereas normal ICU behavior would
ignore the negative pattern and zero pad before the hyphen.
  • Loading branch information
stevedlawrence committed Jan 11, 2024
1 parent 9e0bdf6 commit 46cf5df
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import org.apache.daffodil.lib.schema.annotation.props.gen.TextNumberRounding
import org.apache.daffodil.lib.util.Maybe
import org.apache.daffodil.lib.util.Maybe._
import org.apache.daffodil.lib.util.MaybeDouble
import org.apache.daffodil.lib.util.MaybeInt
import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType
import org.apache.daffodil.runtime1.processors.Delimiter
import org.apache.daffodil.runtime1.processors.TextNumberFormatEv
Expand All @@ -38,6 +39,10 @@ import org.apache.daffodil.runtime1.processors.unparsers.Unparser
import org.apache.daffodil.unparsers.runtime1.ConvertTextCombinatorUnparser
import org.apache.daffodil.unparsers.runtime1.ConvertTextNumberUnparser

import com.ibm.icu.impl.number.AffixPatternProvider
import com.ibm.icu.impl.number.Padder.PadPosition
import com.ibm.icu.impl.number.PatternStringParser
import com.ibm.icu.impl.number.PatternStringParser.ParsedPatternInfo
import com.ibm.icu.text.DecimalFormat

case class ConvertTextCombinator(e: ElementBase, value: Gram, converter: Gram)
Expand Down Expand Up @@ -399,14 +404,20 @@ trait ConvertTextNumberMixin {
} else pattern
}

final protected def checkPatternWithICU(e: ElementBase) = {
// Load the pattern to make sure it is valid
/**
* Validates the textNumberPattern using ICU's PatternStringParser. Although this class is
* public, it is not part of the ICU API, so it maybe not be stable. However, this is what
* DecimalFormat uses internally to parse patterns and extract information for initialization,
* and that likely won't change significantly. Plus, by using this class instead parsing with
* DeciamlFormat we can return the ParsedPatternInfo to give callers raw access to what was in
* the pattern without having to parse it ourselves. This can be useful for additional
* validation or logic using parts of the pattern ICU might normally ignore.
*/
final protected def checkPatternWithICU(e: ElementBase): ParsedPatternInfo = {
try {
if (hasV || hasP) {
new DecimalFormat(runtimePattern)
} else {
new DecimalFormat(pattern)
}
val patternToCheck = if (hasV || hasP) runtimePattern else pattern
val parsedPatternInfo = PatternStringParser.parseToPatternInfo(patternToCheck)
parsedPatternInfo
} catch {
case ex: IllegalArgumentException =>
if (hasV || hasP) {
Expand Down Expand Up @@ -551,7 +562,7 @@ case class ConvertTextStandardNumberPrim(e: ElementBase)
pattern.startsWith(";"),
"The positive part of the dfdl:textNumberPattern is required. The dfdl:textNumberPattern cannot begin with ';'.",
)
checkPatternWithICU(e)
val parsedPatternInfo = checkPatternWithICU(e)

val (roundingIncrement: MaybeDouble, roundingMode) =
e.textNumberRounding match {
Expand Down Expand Up @@ -586,6 +597,29 @@ case class ConvertTextStandardNumberPrim(e: ElementBase)
Nope
}

// ICU does not have a way to set the pad position to after an affix if the positive pattern
// does not have that affix. For example, "* 0", will always have a pad position of
// BEFORE_PREFIX, with no way to set it to AFTER_PREFIX because the positive pattern has no
// prefix. In cases where formats do not have a postive affix but want to specify the pad
// position to AFTER, we allow them to do so in the negative pattern. For example, a pattern
// of "* 0;-* 0" will have a pad position of AFTER_PREFIX. ICU normally ignores the negative
// pattern for pad position. Note that we require the pad char to be defined on the same
// affix or else it is ignored.
val posPadLoc = parsedPatternInfo.positive.paddingLocation
val negPadLoc =
if (parsedPatternInfo.negative != null) parsedPatternInfo.negative.paddingLocation
else null
val posPrefix = parsedPatternInfo.getString(AffixPatternProvider.FLAG_POS_PREFIX)
val posSuffix = parsedPatternInfo.getString(AffixPatternProvider.FLAG_POS_SUFFIX)
val icuPadPosition =
(posPadLoc, negPadLoc, posPrefix, posSuffix) match {
case (PadPosition.BEFORE_PREFIX, PadPosition.AFTER_PREFIX, "", _) =>
MaybeInt(DecimalFormat.PAD_AFTER_PREFIX)
case (PadPosition.BEFORE_SUFFIX, PadPosition.AFTER_SUFFIX, _, "") =>
MaybeInt(DecimalFormat.PAD_AFTER_SUFFIX)
case _ => MaybeInt.Nope
}

val ev = new TextNumberFormatEv(
e.tci,
decSep,
Expand All @@ -599,6 +633,7 @@ case class ConvertTextStandardNumberPrim(e: ElementBase)
roundingMode,
roundingIncrement,
zeroRepsRaw,
icuPadPosition,
e.primType,
)
ev.compile(tunable)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import org.apache.daffodil.lib.util.DecimalUtils.OverpunchLocation
import org.apache.daffodil.lib.util.Maybe
import org.apache.daffodil.lib.util.Maybe._
import org.apache.daffodil.lib.util.MaybeDouble
import org.apache.daffodil.lib.util.MaybeInt
import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType
import org.apache.daffodil.runtime1.processors.TextNumberFormatEv
import org.apache.daffodil.runtime1.processors.parsers.ConvertZonedCombinatorParser
Expand Down Expand Up @@ -179,6 +180,7 @@ case class ConvertZonedNumberPrim(e: ElementBase)
roundingMode,
roundingIncrement,
Nil,
MaybeInt.Nope,
e.primType,
)
ev.compile(tunable)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import org.apache.daffodil.lib.util.Maybe
import org.apache.daffodil.lib.util.Maybe._
import org.apache.daffodil.lib.util.MaybeChar
import org.apache.daffodil.lib.util.MaybeDouble
import org.apache.daffodil.lib.util.MaybeInt
import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType
import org.apache.daffodil.runtime1.dsom._

Expand Down Expand Up @@ -81,6 +82,7 @@ class TextNumberFormatEv(
roundingMode: Maybe[TextNumberRoundingMode],
roundingIncrement: MaybeDouble,
zeroRepsRaw: List[String],
icuPadPosition: MaybeInt,
primType: PrimType,
) extends Evaluatable[DecimalFormat](tci)
with InfosetCachedEvaluatable[DecimalFormat] {
Expand Down Expand Up @@ -186,6 +188,10 @@ class TextNumberFormatEv(
}
}

if (icuPadPosition.isDefined) {
df.setPadPosition(icuPadPosition.get)
}

df
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,20 @@
<xs:element name="tnp103" type="xs:integer" dfdl:textNumberPattern="###0.0##"
dfdl:textNumberCheckPolicy="strict" />

<!--
pad position is after prefix because of ambiguity in the postive pattern
resolved by the negative pattern. Otherwise ICU defaults to before prefix
-->
<xs:element name="tnp104" type="xs:integer" dfdl:textNumberPattern="*_####0;(*_0)"
dfdl:textNumberCheckPolicy="strict" />

<!--
pad position is after suffix because of ambiguity in the postive pattern
resolved by the negative pattern. Otherwise ICU defaults to before suffix
-->
<xs:element name="tnp105" type="xs:integer" dfdl:textNumberPattern="####0*_;(0)*_"
dfdl:textNumberCheckPolicy="strict" />

</tdml:defineSchema>

<tdml:defineSchema name="textNumberPattern2" elementFormDefault="qualified">
Expand Down Expand Up @@ -4504,4 +4518,26 @@
</tdml:errors>
</tdml:parserTestCase>

<tdml:parserTestCase name="textNumberPaddingAmbiguity01" root="tnp104" model="textNumberPattern">
<tdml:document>
<tdml:documentPart type="text">(__1)</tdml:documentPart>
</tdml:document>
<tdml:infoset>
<tdml:dfdlInfoset>
<tnp104>-1</tnp104>
</tdml:dfdlInfoset>
</tdml:infoset>
</tdml:parserTestCase>

<tdml:parserTestCase name="textNumberPaddingAmbiguity02" root="tnp105" model="textNumberPattern">
<tdml:document>
<tdml:documentPart type="text">(1)__</tdml:documentPart>
</tdml:document>
<tdml:infoset>
<tdml:dfdlInfoset>
<tnp105>-1</tnp105>
</tdml:dfdlInfoset>
</tdml:infoset>
</tdml:parserTestCase>

</tdml:testSuite>
Original file line number Diff line number Diff line change
Expand Up @@ -523,4 +523,11 @@ class TestTextNumberProps {
@Test def test_textNumberIntegerWithDecimal04(): Unit = {
runner.runOneTest("textNumberIntegerWithDecimal04")
}

@Test def test_textNumberPaddingAmbiguity01(): Unit = {
runner.runOneTest("textNumberPaddingAmbiguity01")
}
@Test def test_textNumberPaddingAmbiguity02(): Unit = {
runner.runOneTest("textNumberPaddingAmbiguity02")
}
}

0 comments on commit 46cf5df

Please sign in to comment.