diff --git a/lttoolbox/fst_processor.cc b/lttoolbox/fst_processor.cc index a676a6d9..d46ba019 100644 --- a/lttoolbox/fst_processor.cc +++ b/lttoolbox/fst_processor.cc @@ -280,11 +280,28 @@ FSTProcessor::wblankPostGen(FILE *input, FILE *output) wstring result = L""; result += L"[["; wchar_t c = 0; + bool in_content = false; while(!feof(input)) { c = static_cast(fgetwc_unlocked(input)); - result += c; + + if(in_content && c == L'~') + { + if(result[result.size()-1] == L']') { + // We just saw the end of a wblank, may want to merge + wblankqueue.push(result); + } + else { + // wake-up-mark happened some characters into the wblanked word + fputws(result.c_str(), output); + } + return true; + } + else + { + result += c; + } if(c == L'\\') { @@ -305,16 +322,7 @@ FSTProcessor::wblankPostGen(FILE *input, FILE *output) } else { - c = static_cast(fgetwc_unlocked(input)); - if(c == L'~') - { - wblankqueue.push(result); - return true; - } - else - { - result += c; - } + in_content = true; // Assumption: No nested wblanks, always balanced } } } diff --git a/tests/data/postgen.dix b/tests/data/postgen.dix index 1bea34b0..6f90d270 100644 --- a/tests/data/postgen.dix +++ b/tests/data/postgen.dix @@ -92,6 +92,8 @@

+

sssss

+ diff --git a/tests/lt_proc/__init__.py b/tests/lt_proc/__init__.py index ffc9236b..e46b8ad2 100644 --- a/tests/lt_proc/__init__.py +++ b/tests/lt_proc/__init__.py @@ -173,7 +173,9 @@ class PostgenerationWordboundBlankTest(ProcTest): "[[t:b:Z9eiLA]]abc[[/]] [[t:i:123456]]~les[[/]] [[t:i:4_tPUA]]~de[[/]] [[t:b:Z9eiLA]]el[[/]] [[t:i:wSM6RQ]]testword[[/]]", "[[t:b:Z9eiLA]]abc[[/]] [[t:i:123456]]~les pes[[/]] [[t:i:4_tPUA]]~de[[/]] [[t:b:Z9eiLA]]el[[/]] [[t:i:wSM6RQ]]testword[[/]]", "[[t:b:Z9eiLA]]abc[[/]] [[t:i:123456]]~les[[/]] [[t:b:12bsa23]]pes[[/]] [[t:i:4_tPUA]]~de[[/]] [[t:b:Z9eiLA]]el[[/]] [[t:i:wSM6RQ]]testword[[/]]", - "[[t:b:Z9eiLA]]abc[[/]] ~les [[t:b:12bsa23]]pes[[/]] [[t:i:4_tPUA]]~de[[/]] [[t:b:Z9eiLA]]el[[/]] [[t:i:wSM6RQ]]testword[[/]]"] + "[[t:b:Z9eiLA]]abc[[/]] ~les [[t:b:12bsa23]]pes[[/]] [[t:i:4_tPUA]]~de[[/]] [[t:b:Z9eiLA]]el[[/]] [[t:i:wSM6RQ]]testword[[/]]", + "[[t:text:NaNaNa]]pla~ssar[[/]]", + "[[t:text:NaNaNa]]pla~sssar[[/]]"] expectedOutputs = [ "xyz ejemplo [[t:i:123456; t:b:abc123; t:i:123456]]u ho[[/]] [[t:b:iopmnb]]nombre[[/]].", "xyz ejemplo [[t:b:poim230]]u ho[[/]] [[t:i:mnbj203]]nombre[[/]].", @@ -193,7 +195,9 @@ class PostgenerationWordboundBlankTest(ProcTest): "[[t:b:Z9eiLA]]abc[[/]] [[t:i:123456]]le pe test[[/]] [[t:i:4_tPUA; t:b:Z9eiLA]]del[[/]] [[t:i:wSM6RQ]]testword[[/]]", "[[t:b:Z9eiLA]]abc[[/]] [[t:i:123456]]les pes test[[/]] [[t:i:4_tPUA; t:b:Z9eiLA]]del[[/]] [[t:i:wSM6RQ]]testword[[/]]", "[[t:b:Z9eiLA]]abc[[/]] [[t:i:123456; t:b:12bsa23]]les pes test[[/]] [[t:i:4_tPUA; t:b:Z9eiLA]]del[[/]] [[t:i:wSM6RQ]]testword[[/]]", - "[[t:b:Z9eiLA]]abc[[/]] [[t:b:12bsa23]]les pes test[[/]] [[t:i:4_tPUA; t:b:Z9eiLA]]del[[/]] [[t:i:wSM6RQ]]testword[[/]]"] + "[[t:b:Z9eiLA]]abc[[/]] [[t:b:12bsa23]]les pes test[[/]] [[t:i:4_tPUA; t:b:Z9eiLA]]del[[/]] [[t:i:wSM6RQ]]testword[[/]]", + "[[t:text:NaNaNa]]plassar[[/]]", + "[[t:text:NaNaNa]]plassar[[/]]"] class PostgenerationWordboundBlankEscapingTest(ProcTest):