Skip to content

Commit

Permalink
Don't fail if no initial epsilons! But do fail on initial whitespace
Browse files Browse the repository at this point in the history
  • Loading branch information
unhammer committed Aug 28, 2020
1 parent c016057 commit 340887b
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 12 deletions.
4 changes: 2 additions & 2 deletions lttoolbox/alphabet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -261,10 +261,10 @@ Alphabet::decode(int const code) const
}

set<int>
Alphabet::getLeftEpsilons() const {
Alphabet::symbolsWhereLeftIs(wchar_t l) const {
set<int> eps;
for(const auto& sp: spair) { // [(l, r) : tag]
if(sp.first.first == 0) {
if(sp.first.first == l) {
eps.insert(sp.second);
}
}
Expand Down
5 changes: 4 additions & 1 deletion lttoolbox/alphabet.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,10 @@ class Alphabet
*/
pair<int, int> const & decode(int const code) const;

set<int> getLeftEpsilons() const;
/**
* Get all symbols where the left-hand side of the symbol-pair is l.
*/
set<int> symbolsWhereLeftIs(wchar_t l) const;

enum Side
{
Expand Down
12 changes: 9 additions & 3 deletions lttoolbox/compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,16 +138,22 @@ Compiler::parse(string const &file, wstring const &dir)
bool
Compiler::valid(wstring const& dir) const
{
const wstring side = dir == COMPILER_RESTRICTION_RL_VAL ? L"right" : L"left";
const set<int> epsilonSymbols = alphabet.symbolsWhereLeftIs(0);
const set<int> spaceSymbols = alphabet.symbolsWhereLeftIs(L' ');
for (auto &section : sections) {
auto &fst = section.second;
set<int> initialClosure = fst.closure(fst.getInitial(), alphabet.getLeftEpsilons());
auto finals = fst.getFinals();
for(const auto i : initialClosure) {
auto initial = fst.getInitial();
for(const auto i : fst.closure(initial, epsilonSymbols)) {
if (finals.count(i)) {
const wstring side = dir == COMPILER_RESTRICTION_RL_VAL ? L"right" : L"left";
wcerr << L"Error: Invalid dictionary (hint: the " << side << " side of an entry is empty)" << endl;
return false;
}
if(fst.closure(i, spaceSymbols).size() > 1) { // >1 since closure always includes self
wcerr << L"Error: Invalid dictionary (hint: entry on the " << side << " beginning with whitespace)" << endl;
return false;
}
}
}
return true;
Expand Down
16 changes: 10 additions & 6 deletions lttoolbox/transducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -220,13 +220,17 @@ Transducer::closure(int const state, set<int> const &epsilon_tags) const
while (nonvisited.size() > 0) {
int auxest = *nonvisited.begin();
for (const int epsilon_tag : epsilon_tags) {
auto range = transitions.at(auxest).equal_range(epsilon_tag);
while (range.first != range.second) {
if (result.find(range.first->second.first) == result.end()) {
result.insert(range.first->second.first);
nonvisited.insert(range.first->second.first);
try {
auto range = transitions.at(auxest).equal_range(epsilon_tag);
while (range.first != range.second) {
if (result.find(range.first->second.first) == result.end()) {
result.insert(range.first->second.first);
nonvisited.insert(range.first->second.first);
}
range.first++;
}
range.first++;
} catch (out_of_range const &e) {
// No transition from any of the epsilon_tags – this is fine
}
}
nonvisited.erase(auxest);
Expand Down
12 changes: 12 additions & 0 deletions tests/data/lhs-ws-mono.dix
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<dictionary>
<alphabet>ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÄÅÆÇÈÉÊËÍÑÒÓÔÕÖØÙÚÜČĐŊŠŦŽabcdefghijklmnopqrstuvwxyzàáâäåæçèéêëíñòóôõöøùúüčđŋšŧž­-</alphabet>
<sdefs>
<sdef n="n" c="Noun"/>
</sdefs>
<pardefs>
</pardefs>
<section id="main" type="standard">
<e><p><l><b/>a</l><r>a<s n="n"/></r></p></e>
</section>
</dictionary>
12 changes: 12 additions & 0 deletions tests/data/rhs-ws-mono.dix
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<dictionary>
<alphabet>ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÄÅÆÇÈÉÊËÍÑÒÓÔÕÖØÙÚÜČĐŊŠŦŽabcdefghijklmnopqrstuvwxyzàáâäåæçèéêëíñòóôõöøùúüčđŋšŧž­-</alphabet>
<sdefs>
<sdef n="n" c="Noun"/>
</sdefs>
<pardefs>
</pardefs>
<section id="main" type="standard">
<e><p><l>a<s n="n"/></l><r><b/>a</r></p></e>
</section>
</dictionary>
11 changes: 11 additions & 0 deletions tests/lt_comp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,14 @@ class CompEmptyRhsShouldError(unittest.TestCase, ProcTest):
procdir = "rl"
procdix = "data/rhs-empty-mono.dix"
expectedCompRetCodeFail = True


class CompLhsInitialSpaceShouldError(unittest.TestCase, ProcTest):
procdix = "data/lhs-ws-mono.dix"
expectedCompRetCodeFail = True


class CompRhsInitialSpaceShouldError(unittest.TestCase, ProcTest):
procdix = "data/rhs-ws-mono.dix"
procdir = "rl"
expectedCompRetCodeFail = True

0 comments on commit 340887b

Please sign in to comment.