Skip to content

Commit

Permalink
lt-proc: Implement option to output n-best paths.
Browse files Browse the repository at this point in the history
Using the same option names as `hfst-proc` we add options in
lt-proc to output n-best paths using the weight values.

Closes apertium#3
  • Loading branch information
Techievena committed Jun 26, 2018
1 parent 6a50cde commit be903a2
Show file tree
Hide file tree
Showing 8 changed files with 534 additions and 156 deletions.
16 changes: 8 additions & 8 deletions lttoolbox/att_compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ AttCompiler::parse(string const &file_name, wstring const &dir)
}
split(line, L'\t', tokens);

from = convert(tokens[0]);
from = static_cast<int>(convert(tokens[0]));

AttNode* source = get_node(from);
/* First line: the initial state is of both types. */
Expand All @@ -174,7 +174,7 @@ AttCompiler::parse(string const &file_name, wstring const &dir)
}
else
{
to = convert(tokens[1]);
to = static_cast<int>(convert(tokens[1]));
if(dir == L"RL")
{
upper = tokens[3];
Expand Down Expand Up @@ -266,8 +266,8 @@ AttCompiler::extract_transducer(TransducerType type)
*/
void
AttCompiler::_extract_transducer(TransducerType type, int from,
Transducer& transducer, map<int, int>& corr,
set<int>& visited, double& cost)
Transducer& transducer, map<int, int>& corr,
set<int>& visited, double& cost)
{
if (visited.find(from) != visited.end())
{
Expand All @@ -287,7 +287,7 @@ AttCompiler::_extract_transducer(TransducerType type, int from,
for (vector<Transduction>::const_iterator it = source->transductions.begin();
it != source->transductions.end(); ++it)
{
if ((it->type & type) != type)
if ((it->type & type) != type)
{
continue; // Not the right type
}
Expand All @@ -310,7 +310,7 @@ AttCompiler::_extract_transducer(TransducerType type, int from,
to_t = corr[it->to];
transducer.linkStates(from_t, to_t, it->tag, it->weight);
}
else
else
{
/* We haven't seen it yet: add a new state! */
to_t = transducer.insertNewSingleTransduction(it->tag, from_t, it->weight);
Expand All @@ -336,7 +336,7 @@ AttCompiler::_extract_transducer(TransducerType type, int from,
*/
void
AttCompiler::classify(int from, map<int, TransducerType>& visited, bool path,
TransducerType type)
TransducerType type)
{
AttNode* source = get_node(from);
if (visited.find(from) != visited.end())
Expand Down Expand Up @@ -368,7 +368,7 @@ AttCompiler::classify(int from, map<int, TransducerType>& visited, bool path,
if (upper_word) next_type |= WORD;
if (upper_punct) next_type |= PUNCT;
next_path = true;
}
}
else
{
/* Otherwise (not yet, already): target's type is the same as ours. */
Expand Down
8 changes: 3 additions & 5 deletions lttoolbox/att_compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ namespace
}

/** Converts a string to a number. Slow, but at this point I don't care. */
int convert(const wstring& s)
double convert(const wstring& s)
{
int ret;
double ret;
wistringstream ss(s);
ss >> ret;
return ret;
Expand Down Expand Up @@ -134,9 +134,7 @@ class AttCompiler
*/
int starting_state;
/**
* Value of the final weight after a complete valid transduction. We assume it
* is the default value of weight for an entry if not specified otherwise or
* for the case of unweighted automata/transducers.
* Default value of weight of a transduction unless specified.
*/
double default_weight;

Expand Down
8 changes: 4 additions & 4 deletions lttoolbox/compression.cc
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ Compression::long_multibyte_write(const double& value, FILE *output)
{
int exp = 0;

long unsigned int mantissa = static_cast<long unsigned int>(std::numeric_limits<long int>::max() * frexp(value, &exp));
long unsigned int mantissa = static_cast<long unsigned int>(0x40000000 * frexp(value, &exp));
unsigned int exponent = static_cast<unsigned int>(exp);

multibyte_write(mantissa, output);
Expand All @@ -319,7 +319,7 @@ Compression::long_multibyte_write(const double& value, ostream &output)
{
int exp = 0;

long unsigned int mantissa = static_cast<long unsigned int>(std::numeric_limits<long int>::max() * frexp(value, &exp));
long unsigned int mantissa = static_cast<long unsigned int>(0x40000000 * frexp(value, &exp));
unsigned int exponent = static_cast<unsigned int>(exp);

multibyte_write(mantissa, output);
Expand All @@ -335,7 +335,7 @@ Compression::long_multibyte_read(FILE *input)
long unsigned int mantissa = multibyte_read(input);
int exponent = multibyte_read(input);

double value = static_cast<double>(static_cast<long int>(mantissa)) / std::numeric_limits<long int>::max();
double value = static_cast<double>(static_cast<long int>(mantissa)) / 0x40000000;
result = ldexp(value, exponent);

return result;
Expand All @@ -349,7 +349,7 @@ Compression::long_multibyte_read(istream &input)
long unsigned int mantissa = multibyte_read(input);
int exponent = multibyte_read(input);

double value = static_cast<double>(static_cast<long int>(mantissa)) / std::numeric_limits<long int>::max();
double value = static_cast<double>(static_cast<long int>(mantissa)) / 0x40000000;
result = ldexp(value, exponent);

return result;
Expand Down
Loading

0 comments on commit be903a2

Please sign in to comment.