Skip to content

Commit

Permalink
Fix #107 – postgenerator fails when ~ in middle of wblanked word
Browse files Browse the repository at this point in the history
There was already support for [[w:text:foo]]~zzy[[/]] but not for
[[w:text:foo]]xy~zzy[[/]]

This change should keep the old feature of moving wblank-start after
the post-generated part of [[w:text:foo]]~zzy[[/]] (so if there was
a rule turning ~z into Z we get Z[[w:text:foo]]zy[[/]]), but if the
wake-up-mark ~ is seen later in the wblank, it stays surrounded, so
[[w:text:foo]]xy~zzy[[/]] turns into [[w:text:foo]]xyZzy[[/]].

+ tests for #107
  • Loading branch information
unhammer committed Jun 11, 2021
1 parent 51b0651 commit 2bb8ae0
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 13 deletions.
30 changes: 19 additions & 11 deletions lttoolbox/fst_processor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -284,11 +284,28 @@ FSTProcessor::wblankPostGen(InputFile& input, UFILE *output)
{
UString result = WBLANK_START;
UChar32 c = 0;
bool in_content = false;

while(!input.eof())
{
c = input.get();
result += c;

if(in_content && c == '~')
{
if(result[result.size()-1]==']') {
// We just saw the end of a wblank, may want to merge
wblankqueue.push(result);
}
else {
// wake-up-mark happened some characters into the wblanked word
write(result, output);
}
return true;
}
else
{
result += c;
}

if(c == '\\')
{
Expand All @@ -309,16 +326,7 @@ FSTProcessor::wblankPostGen(InputFile& input, UFILE *output)
}
else
{
c = input.get();
if(c == '~')
{
wblankqueue.push(result);
return true;
}
else
{
result += c;
}
in_content = true; // Assumption: No nested wblanks, always balanced
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions tests/data/postgen.dix
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@
</p>
</e>

<e><p><l><a/>sss</l><r>ss</r></p></e>

</section>

</dictionary>
Expand Down
8 changes: 6 additions & 2 deletions tests/lt_proc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,9 @@ class PostgenerationWordboundBlankTest(ProcTest):
"[[t:b:Z9eiLA]]abc[[/]] [[t:i:123456]]~les[[/]] [[t:i:4_tPUA]]~de[[/]] [[t:b:Z9eiLA]]el[[/]] [[t:i:wSM6RQ]]testword[[/]]",
"[[t:b:Z9eiLA]]abc[[/]] [[t:i:123456]]~les pes[[/]] [[t:i:4_tPUA]]~de[[/]] [[t:b:Z9eiLA]]el[[/]] [[t:i:wSM6RQ]]testword[[/]]",
"[[t:b:Z9eiLA]]abc[[/]] [[t:i:123456]]~les[[/]] [[t:b:12bsa23]]pes[[/]] [[t:i:4_tPUA]]~de[[/]] [[t:b:Z9eiLA]]el[[/]] [[t:i:wSM6RQ]]testword[[/]]",
"[[t:b:Z9eiLA]]abc[[/]] ~les [[t:b:12bsa23]]pes[[/]] [[t:i:4_tPUA]]~de[[/]] [[t:b:Z9eiLA]]el[[/]] [[t:i:wSM6RQ]]testword[[/]]"]
"[[t:b:Z9eiLA]]abc[[/]] ~les [[t:b:12bsa23]]pes[[/]] [[t:i:4_tPUA]]~de[[/]] [[t:b:Z9eiLA]]el[[/]] [[t:i:wSM6RQ]]testword[[/]]",
"[[t:text:NaNaNa]]pla~ssar[[/]]",
"[[t:text:NaNaNa]]pla~sssar[[/]]"]

expectedOutputs = [ "xyz ejemplo [[t:i:123456; t:b:abc123; t:i:123456]]u ho[[/]] [[t:b:iopmnb]]nombre[[/]].",
"xyz ejemplo [[t:b:poim230]]u ho[[/]] [[t:i:mnbj203]]nombre[[/]].",
Expand All @@ -193,7 +195,9 @@ class PostgenerationWordboundBlankTest(ProcTest):
"[[t:b:Z9eiLA]]abc[[/]] [[t:i:123456]]le pe test[[/]] [[t:i:4_tPUA; t:b:Z9eiLA]]del[[/]] [[t:i:wSM6RQ]]testword[[/]]",
"[[t:b:Z9eiLA]]abc[[/]] [[t:i:123456]]les pes test[[/]] [[t:i:4_tPUA; t:b:Z9eiLA]]del[[/]] [[t:i:wSM6RQ]]testword[[/]]",
"[[t:b:Z9eiLA]]abc[[/]] [[t:i:123456; t:b:12bsa23]]les pes test[[/]] [[t:i:4_tPUA; t:b:Z9eiLA]]del[[/]] [[t:i:wSM6RQ]]testword[[/]]",
"[[t:b:Z9eiLA]]abc[[/]] [[t:b:12bsa23]]les pes test[[/]] [[t:i:4_tPUA; t:b:Z9eiLA]]del[[/]] [[t:i:wSM6RQ]]testword[[/]]"]
"[[t:b:Z9eiLA]]abc[[/]] [[t:b:12bsa23]]les pes test[[/]] [[t:i:4_tPUA; t:b:Z9eiLA]]del[[/]] [[t:i:wSM6RQ]]testword[[/]]",
"[[t:text:NaNaNa]]plassar[[/]]",
"[[t:text:NaNaNa]]plassar[[/]]"]


class PostgenerationWordboundBlankEscapingTest(ProcTest):
Expand Down

0 comments on commit 2bb8ae0

Please sign in to comment.