Skip to content

Commit

Permalink
allow trailing punct in scripts (#1588)
Browse files Browse the repository at this point in the history
* allow trailing puncts in scripts

* actually come to terms with endPunct(?) handling

* clarify comments
  • Loading branch information
dginev authored Jul 29, 2021
1 parent 3f1fb75 commit 9c84912
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 16 deletions.
25 changes: 13 additions & 12 deletions lib/LaTeXML/Core/Document.pm
Original file line number Diff line number Diff line change
Expand Up @@ -221,14 +221,14 @@ sub computeIndirectModel_aux {
sub canContainSomehow {
my ($self, $tag, $child) = @_;
my $model = $$self{model};
$tag = $model->getNodeQName($tag) if ref $tag; # In case tag is a node.
$child = $model->getNodeQName($child) if ref $child; # In case child is a node.
$tag = $model->getNodeQName($tag) if ref $tag; # In case tag is a node.
$child = $model->getNodeQName($child) if ref $child; # In case child is a node.
return $model->canContain($tag, $child) || $self->canContainIndirect($tag, $child); }

sub canHaveAttribute {
my ($self, $tag, $attrib) = @_;
my $model = $$self{model};
$tag = $model->getNodeQName($tag) if ref $tag; # In case tag is a node.
$tag = $model->getNodeQName($tag) if ref $tag; # In case tag is a node.
return $model->canHaveAttribute($tag, $attrib); }

sub canAutoOpen {
Expand Down Expand Up @@ -257,7 +257,7 @@ sub canAutoClose {
# get the actions that should be performed on afterOpen or afterClose
sub getTagActionList {
my ($self, $tag, $when) = @_;
$tag = $$self{model}->getNodeQName($tag) if ref $tag; # In case tag is a node.
$tag = $$self{model}->getNodeQName($tag) if ref $tag; # In case tag is a node.
my ($p, $n) = (undef, $tag);
if ($tag =~ /^([^:]+):(.+)$/) {
($p, $n) = ($1, $2); }
Expand Down Expand Up @@ -435,11 +435,11 @@ sub finalize_rec {
# Add (or combine) attributes
foreach my $attr (keys %pending_declaration) {
my $value = $pending_declaration{$attr}{value};
if ($attr eq 'class') { # Generalize?
if ($attr eq 'class') { # Generalize?
if (my $ovalue = $text->getAttribute('class')) {
$value .= ' ' . $ovalue; } }
$self->setAttribute($text, $attr => $value); }
$self->finalize_rec($text); # Now have to clean up the new node!
$self->finalize_rec($text); # Now have to clean up the new node!
}
} }

Expand Down Expand Up @@ -667,7 +667,7 @@ sub insertMathToken {
$self->setNodeFont($node, $font);
$self->setNodeBox($node, $box);
$self->openMathText_internal($string) if defined $string;
$self->closeNode_internal($node); # Should be safe.
$self->closeNode_internal($node); # Should be safe.
return $node; } }

# Insert a new comment, or append to previous comment.
Expand Down Expand Up @@ -1108,7 +1108,7 @@ sub openText_internal {
if $LaTeXML::DEBUG{document};
$point->appendChild($node);
$self->setNode($node); }
return $$self{node}; } # return the text node (current)
return $$self{node}; } # return the text node (current)

# Since xml text nodes don't have attributes to record the origining box,
# we need to manage the accumulation of autoOpen'ed boxes
Expand Down Expand Up @@ -1201,7 +1201,7 @@ sub closeText_internal {
next if ($fonttest = $$ligature{fontTest}) && !&$fonttest($font);
$string = &{ $$ligature{code} }($string); } }
$node->setData($string) unless $string eq $ostring;
$self->setNode($parent); # Now, effectively Closed
$self->setNode($parent); # Now, effectively Closed
return $parent; }
else {
return $node; } }
Expand Down Expand Up @@ -1363,7 +1363,7 @@ sub setAttribute {
sub addSSValues {
my ($self, $node, $key, $values) = @_;
$values = $values->toAttribute if ref $values;
if ((defined $values) && ($values ne '')) { # Skip if `empty'; but 0 is OK!
if ((defined $values) && ($values ne '')) { # Skip if `empty'; but 0 is OK!
my @values = split(/\s/, $values);
if (my $oldvalues = $node->getAttribute($key)) { # previous values?
my @old = split(/\s/, $oldvalues);
Expand Down Expand Up @@ -1574,7 +1574,7 @@ sub collapseXMDual {
# The other branch is not visible, nor referenced,
# but the dual may have an id and be referenced
if (my $dualid = $dual->getAttribute('xml:id')) {
$self->unRecordID($dualid); # We'll move or remove the ID from the dual
$self->unRecordID($dualid); # We'll move or remove the ID from the dual
if (my $branchid = $branch->getAttribute('xml:id')) { # branch has id too!
foreach my $ref ($self->findnodes("//*[\@idref='$dualid']")) {
$ref->setAttribute(idref => $branchid); } } # Change dualid refs to branchid
Expand All @@ -1595,6 +1595,7 @@ sub setNodeBox {

sub getNodeBox {
my ($self, $node) = @_;
return unless $node;
my $t = $node->nodeType;
return if $t != XML_ELEMENT_NODE;
if (my $boxid = $node->getAttribute('_box')) {
Expand Down Expand Up @@ -1664,7 +1665,7 @@ sub removeNode {
sub removeNode_aux {
my ($self, $node) = @_;
my $chopped = $$self{node}->isSameNode($node);
if ($node->nodeType == XML_ELEMENT_NODE) { # If an element, do ID bookkeeping.
if ($node->nodeType == XML_ELEMENT_NODE) { # If an element, do ID bookkeeping.
if (my $id = $node->getAttribute('xml:id')) {
$self->unRecordID($id); }
$chopped ||= grep { $self->removeNode_aux($_) } $node->childNodes; }
Expand Down
8 changes: 4 additions & 4 deletions lib/LaTeXML/MathGrammar
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,13 @@ AnythingAny :
# a top level rule for sub and superscripts that can accept all sorts of junk.
Subscript : <rulevar: local $MaxAbsDepth = $LaTeXML::MathParser::MAX_ABS_DEPTH>
Subscript :
aSubscript (PUNCT(?) aSubscript {[$item[1]->[0] || InvisibleComma(),$item[2]]; })(s?)
{ NewList($item[1],map(@$_,@{$item[2]})); }
aSubscript (PUNCT(?) aSubscript {[$item[1]->[0] || InvisibleComma(),$item[2]]; })(s?) endPunct(?)
{ NewList($item[1],map(@$_,@{$item[2]}),$item[3]->[0]||Absent()); }

Superscript : <rulevar: local $MaxAbsDepth = $LaTeXML::MathParser::MAX_ABS_DEPTH>
Superscript :
aSuperscript (PUNCT(?) aSuperscript {[$item[1]->[0] || InvisibleComma(),$item[2]]; })(s?)
{ NewList($item[1],map(@$_,@{$item[2]})); }
aSuperscript (PUNCT(?) aSuperscript {[$item[1]->[0] || InvisibleComma(),$item[2]]; })(s?) endPunct(?)
{ NewList($item[1],map(@$_,@{$item[2]}),$item[3]->[0]||Absent()); }

aSubscript :
Formulae
Expand Down
4 changes: 4 additions & 0 deletions lib/LaTeXML/MathParser.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1236,6 +1236,7 @@ sub extract_separators {
. p_getValue($p)
. spacingToString(getXMHintSpacing(p_getAttribute($p, 'rpadding')));
push(@args, shift(@stuff)); } } # Collect the next expression.
@args = grep { $_ } @args; # drop all undef args, trailing punct could have added an undef
return ($punct, @args); }

# ================================================================================
Expand Down Expand Up @@ -1363,6 +1364,9 @@ sub NewFormula {

sub NewList {
my (@stuff) = @_;
# drop placeholder token for missing trailing punct, if any
if (scalar(@stuff) > 1 && (p_getTokenMeaning($stuff[-1]) eq 'absent')) {
pop(@stuff); }
if (@stuff == 1) {
return $stuff[0]; }
else {
Expand Down
4 changes: 4 additions & 0 deletions t/170_grammar_coverage.t
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ delete $grammar_dependencies{'argPunct'}{'VERTBAR'};
delete $grammar_dependencies{'Expression'}{'punctExpr'}; # Unreachable, due to Formula -> punctExpr
delete $grammar_dependencies{'aSuperscri'}{'AnyOp'};
delete $grammar_dependencies{'aSuperscri'}{'Expression'};
# These are odd to have been recorded, since at least the intention is that they are preceded by
# aSubscript/aSuperscript. We have explicit tests for the relevant cases.
delete $grammar_dependencies{'Superscrip'}{'endPunct'};
delete $grammar_dependencies{'Subscript'}{'endPunct'};
# forbid rules should never match, don't check them here.
# TODO: We need tests for the always-failing productions!
delete $grammar_dependencies{'doubtArgs'}{'forbidArgs'};
Expand Down
Binary file modified t/parse/scripts.pdf
Binary file not shown.
7 changes: 7 additions & 0 deletions t/parse/scripts.tex
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,11 @@
$ a ^{(\neq 0)} $
$ a _{(>0)} $

% scripts with trailing puncts
$ a_{n.n.n.} $
$ b^{n.n.n.} $
$ a_{i,j,} $
$ b^{-,} $
$ a_{k,} $

\end{document}
131 changes: 131 additions & 0 deletions t/parse/scripts.xml
Original file line number Diff line number Diff line change
Expand Up @@ -286,4 +286,135 @@
</XMath>
</Math></p>
</para>
<para xml:id="p17">
<p><Math mode="inline" tex="a_{n.n.n.}" text="a _ (list@(formulae@(n, n, n)))" xml:id="p17.m1">
<XMath>
<XMApp>
<XMTok role="SUBSCRIPTOP" scriptpos="post1"/>
<XMTok font="italic" role="UNKNOWN">a</XMTok>
<XMDual>
<XMApp>
<XMTok meaning="list"/>
<XMRef idref="p17.m1.4"/>
</XMApp>
<XMWrap>
<XMDual xml:id="p17.m1.4">
<XMApp>
<XMTok meaning="formulae"/>
<XMRef idref="p17.m1.1"/>
<XMRef idref="p17.m1.2"/>
<XMRef idref="p17.m1.3"/>
</XMApp>
<XMWrap>
<XMTok font="italic" fontsize="70%" role="UNKNOWN" xml:id="p17.m1.1">n</XMTok>
<XMTok fontsize="70%" role="PERIOD">.</XMTok>
<XMTok font="italic" fontsize="70%" role="UNKNOWN" xml:id="p17.m1.2">n</XMTok>
<XMTok fontsize="70%" role="PERIOD">.</XMTok>
<XMTok font="italic" fontsize="70%" role="UNKNOWN" xml:id="p17.m1.3">n</XMTok>
</XMWrap>
</XMDual>
<XMTok fontsize="70%" role="PERIOD">.</XMTok>
</XMWrap>
</XMDual>
</XMApp>
</XMath>
</Math>
<Math mode="inline" tex="b^{n.n.n.}" text="b ^ (list@(formulae@(n, n, n)))" xml:id="p17.m2">
<XMath>
<XMApp>
<XMTok role="SUPERSCRIPTOP" scriptpos="post1"/>
<XMTok font="italic" role="UNKNOWN">b</XMTok>
<XMDual>
<XMApp>
<XMTok meaning="list"/>
<XMRef idref="p17.m2.4"/>
</XMApp>
<XMWrap>
<XMDual xml:id="p17.m2.4">
<XMApp>
<XMTok meaning="formulae"/>
<XMRef idref="p17.m2.1"/>
<XMRef idref="p17.m2.2"/>
<XMRef idref="p17.m2.3"/>
</XMApp>
<XMWrap>
<XMTok font="italic" fontsize="70%" role="UNKNOWN" xml:id="p17.m2.1">n</XMTok>
<XMTok fontsize="70%" role="PERIOD">.</XMTok>
<XMTok font="italic" fontsize="70%" role="UNKNOWN" xml:id="p17.m2.2">n</XMTok>
<XMTok fontsize="70%" role="PERIOD">.</XMTok>
<XMTok font="italic" fontsize="70%" role="UNKNOWN" xml:id="p17.m2.3">n</XMTok>
</XMWrap>
</XMDual>
<XMTok fontsize="70%" role="PERIOD">.</XMTok>
</XMWrap>
</XMDual>
</XMApp>
</XMath>
</Math>
<Math mode="inline" tex="a_{i,j,}" text="a _ (list@(list@(i, j)))" xml:id="p17.m3">
<XMath>
<XMApp>
<XMTok role="SUBSCRIPTOP" scriptpos="post1"/>
<XMTok font="italic" role="UNKNOWN">a</XMTok>
<XMDual>
<XMApp>
<XMTok meaning="list"/>
<XMRef idref="p17.m3.3"/>
</XMApp>
<XMWrap>
<XMDual xml:id="p17.m3.3">
<XMApp>
<XMTok meaning="list"/>
<XMRef idref="p17.m3.1"/>
<XMRef idref="p17.m3.2"/>
</XMApp>
<XMWrap>
<XMTok font="italic" fontsize="70%" role="UNKNOWN" xml:id="p17.m3.1">i</XMTok>
<XMTok fontsize="70%" role="PUNCT">,</XMTok>
<XMTok font="italic" fontsize="70%" role="UNKNOWN" xml:id="p17.m3.2">j</XMTok>
</XMWrap>
</XMDual>
<XMTok fontsize="70%" role="PUNCT">,</XMTok>
</XMWrap>
</XMDual>
</XMApp>
</XMath>
</Math>
<Math mode="inline" tex="b^{-,}" text="b ^ (list@(-))" xml:id="p17.m4">
<XMath>
<XMApp>
<XMTok role="SUPERSCRIPTOP" scriptpos="post1"/>
<XMTok font="italic" role="UNKNOWN">b</XMTok>
<XMDual>
<XMApp>
<XMTok meaning="list"/>
<XMRef idref="p17.m4.1"/>
</XMApp>
<XMWrap>
<XMTok fontsize="70%" meaning="minus" role="ADDOP" xml:id="p17.m4.1">-</XMTok>
<XMTok fontsize="70%" role="PUNCT">,</XMTok>
</XMWrap>
</XMDual>
</XMApp>
</XMath>
</Math>
<Math mode="inline" tex="a_{k,}" text="a _ (list@(k))" xml:id="p17.m5">
<XMath>
<XMApp>
<XMTok role="SUBSCRIPTOP" scriptpos="post1"/>
<XMTok font="italic" role="UNKNOWN">a</XMTok>
<XMDual>
<XMApp>
<XMTok meaning="list"/>
<XMRef idref="p17.m5.1"/>
</XMApp>
<XMWrap>
<XMTok font="italic" fontsize="70%" role="UNKNOWN" xml:id="p17.m5.1">k</XMTok>
<XMTok fontsize="70%" role="PUNCT">,</XMTok>
</XMWrap>
</XMDual>
</XMApp>
</XMath>
</Math></p>
</para>
</document>

0 comments on commit 9c84912

Please sign in to comment.