Skip to content

Commit

Permalink
Make -C option instead of mode; works with -g/-d & handles *some* unk…
Browse files Browse the repository at this point in the history
…nowns

fix #34 - Carefulcase option -C not compatible with -g

but #35 - Carefulcase eats words it can't generate
still doesn't work if we get started on an ambiguous path
  • Loading branch information
unhammer committed Nov 17, 2018
1 parent 0926efb commit fd6e6dc
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 13 deletions.
9 changes: 7 additions & 2 deletions lttoolbox/fst_processor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1877,7 +1877,6 @@ FSTProcessor::generation(FILE *input, FILE *output, GenerationMode mode)
writeEscapedWithTags(sf, output);
fputwc_unlocked(L'$', output);
}

}
else
{
Expand Down Expand Up @@ -1932,7 +1931,7 @@ FSTProcessor::generation(FILE *input, FILE *output, GenerationMode mode)
{
if(!alphabet.isTag(val) && iswupper(val) && !caseSensitive)
{
if(mode == gm_carefulcase)
if(carefulCase)
{
current_state.step_careful(val, towlower(val));
}
Expand Down Expand Up @@ -3523,6 +3522,12 @@ FSTProcessor::setCaseSensitiveMode(bool const value)
caseSensitive = value;
}

void
FSTProcessor::setCarefulCaseMode(bool const value)
{
carefulCase = value;
}

void
FSTProcessor::setDictionaryCaseMode(bool const value)
{
Expand Down
7 changes: 7 additions & 0 deletions lttoolbox/fst_processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,12 @@ class FSTProcessor
*/
bool caseSensitive;

/**
* if true, prefer generating upper using case of dictionary lemma
* if we can, fallback to uppercasing
*/
bool carefulCase;

/**
* if true, uses the dictionary case, discarding surface case
* information
Expand Down Expand Up @@ -444,6 +450,7 @@ class FSTProcessor
bool valid() const;

void setCaseSensitiveMode(bool const value);
void setCarefulCaseMode(bool const value);
void setDictionaryCaseMode(bool const value);
void setBiltransSurfaceForms(bool const value);
void setIgnoredChars(bool const value);
Expand Down
15 changes: 6 additions & 9 deletions lttoolbox/lt_proc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ using namespace std;
void endProgram(char *name)
{
cout << basename(name) << ": process a stream with a letter transducer" << endl;
cout << "USAGE: " << basename(name) << " [ -a | -b | -c | -d | -e | -g | -n | -p | -x | -s | -t | -v | -h -z -w ] [-W] [-N N] [-L N] [ -i icx_file ] [ -r rcx_file ] fst_file [input_file [output_file]]" << endl;
cout << "USAGE: " << basename(name) << " [ -a | -b | -d | -e | -g | -n | -p | -x | -s | -t | -v | -h ] [-c] [-w] [-z] [-C] [-W] [-N N] [-L N] [ -i icx_file ] [ -r rcx_file ] fst_file [input_file [output_file]]" << endl;
cout << "Options:" << endl;
#if HAVE_GETOPT_LONG
cout << " -a, --analysis: morphological analysis (default behavior)" << endl;
Expand Down Expand Up @@ -160,6 +160,10 @@ int main(int argc, char *argv[])
fstp.setCaseSensitiveMode(true);
break;

case 'C':
fstp.setCarefulCaseMode(true);
break;

case 'i':
fstp.setIgnoredChars(true);
fstp.parseICX(optarg);
Expand All @@ -168,10 +172,10 @@ int main(int argc, char *argv[])
case 'r':
fstp.setRestoreChars(true);
fstp.parseRCX(optarg);
break;

case 'I':
fstp.setUseDefaultIgnoredChars(false);

break;

case 'W':
Expand Down Expand Up @@ -211,7 +215,6 @@ int main(int argc, char *argv[])
case 'x':
case 't':
case 's':
case 'C':
if(cmd == 0)
{
cmd = c;
Expand Down Expand Up @@ -344,12 +347,6 @@ int main(int argc, char *argv[])
fstp.generation(input, output, gm_tagged_nm);
break;

case 'C':
fstp.initGeneration();
checkValidity(fstp);
fstp.generation(input, output, gm_carefulcase);
break;

case 'p':
fstp.initPostgeneration();
checkValidity(fstp);
Expand Down
21 changes: 21 additions & 0 deletions tests/data/careful-ambig-mono.dix
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<dictionary>
<alphabet/>
<sdefs>
<sdef n="n"/>
<sdef n="np"/>
<sdef n="m"/>
<sdef n="f"/>
<sdef n="pl"/>
<sdef n="def"/>
</sdefs>
<section id="main" type="standard">

<e><p><l>kakene</l><r>kake<s n="n"/><s n="f"/><s n="pl"/><s n="def"/></r></p></e>
<e><p><l>KK</l><r>KK<s n="np"/></r></p></e>

<e><p><l>pc-ane</l><r>pc<s n="n"/><s n="m"/><s n="pl"/><s n="def"/></r></p></e>
<e><p><l>PC-ane</l><r>PC<s n="n"/><s n="m"/><s n="pl"/><s n="def"/></r></p></e>

</section>
</dictionary>
19 changes: 19 additions & 0 deletions tests/data/careful-unknown-mono.dix
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<dictionary>
<alphabet/>
<sdefs>
<sdef n="n"/>
<sdef n="m"/>
<sdef n="f"/>
<sdef n="pl"/>
<sdef n="def"/>
</sdefs>
<section id="main" type="standard">

<e><p><l>kakene</l><r>kake<s n="n"/><s n="f"/><s n="pl"/><s n="def"/></r></p></e>

<e><p><l>pc-ane</l><r>pc<s n="n"/><s n="m"/><s n="pl"/><s n="def"/></r></p></e>
<e><p><l>PC-ane</l><r>PC<s n="n"/><s n="m"/><s n="pl"/><s n="def"/></r></p></e>

</section>
</dictionary>
25 changes: 23 additions & 2 deletions tests/lt_proc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import unittest
from proctest import ProcTest

# These fail on some systems:
#from null_flush_invalid_stream_format import *

class ValidInput(unittest.TestCase, ProcTest):
inputs = ["ab",
"ABC jg",
Expand Down Expand Up @@ -92,5 +95,23 @@ class Intergeneration(unittest.TestCase, ProcTest):
inputs = ["la dona ~dóna tot"]
expectedOutputs = ["la dona dona tot"]

# These fail on some systems:
#from null_flush_invalid_stream_format import *
class CarefulUnknownD(unittest.TestCase, ProcTest):
procdix = "data/careful-unknown-mono.dix"
procdir = "rl"
procflags = ["-C", "-d", "-z"]
inputs = ['^kake<n><f><pl><def>$ ^KAKE<n><f><pl><def>$ ^pc<n><m><pl><def>$ ^PC<n><m><pl><def>$']
expectedOutputs = ["kakene KAKENE pc-ane PC-ane"]

class CarefulUnknownG(unittest.TestCase, ProcTest):
procdix = "data/careful-unknown-mono.dix"
procdir = "rl"
procflags = ["-C", "-g", "-z"]
inputs = ['^kake<n><f><pl><def>$ ^KAKE<n><f><pl><def>$ ^pc<n><m><pl><def>$ ^PC<n><m><pl><def>$']
expectedOutputs = ["kakene KAKENE pc-ane PC-ane"]

class CarefulAmbig(unittest.TestCase, ProcTest):
procdix = "data/careful-ambig-mono.dix"
procdir = "rl"
procflags = ["-C", "-g", "-z"]
inputs = ['^KK<np>$ ^kk<np>$ ^kake<n><f><pl><def>$ ^KAKE<n><f><pl><def>$ ^pc<n><m><pl><def>$ ^PC<n><m><pl><def>$']
expectedOutputs = ["KK #kk kakene KAKENE pc-ane PC-ane"]

0 comments on commit fd6e6dc

Please sign in to comment.