Skip to content

Commit

Permalink
Merge branch 'master' of github.com:apertium/lttoolbox
Browse files Browse the repository at this point in the history
  • Loading branch information
Tommi A Pirinen committed Mar 14, 2019
2 parents f7e64b1 + 85fbf5c commit f73c541
Show file tree
Hide file tree
Showing 14 changed files with 555 additions and 554 deletions.
8 changes: 4 additions & 4 deletions lttoolbox/compression.cc
Original file line number Diff line number Diff line change
Expand Up @@ -307,8 +307,8 @@ Compression::long_multibyte_write(const double& value, FILE *output)
{
int exp = 0;

unsigned int mantissa = static_cast<unsigned int>(0x40000000 * frexp(value, &exp));
unsigned int exponent = static_cast<unsigned int>(exp);
unsigned int mantissa = static_cast<unsigned int>(static_cast<int>(0x40000000 * frexp(value, &exp)));
unsigned int exponent = static_cast<unsigned int>(static_cast<int>(exp));

if(mantissa < 0x04000000)
{
Expand Down Expand Up @@ -344,8 +344,8 @@ Compression::long_multibyte_write(const double& value, ostream &output)
{
int exp = 0;

unsigned int mantissa = static_cast<unsigned int>(0x40000000 * frexp(value, &exp));
unsigned int exponent = static_cast<unsigned int>(exp);
unsigned int mantissa = static_cast<unsigned int>(static_cast<int>(0x40000000 * frexp(value, &exp)));
unsigned int exponent = static_cast<unsigned int>(static_cast<int>(exp));

if(mantissa < 0x04000000)
{
Expand Down
75 changes: 14 additions & 61 deletions lttoolbox/fst_processor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -882,22 +882,19 @@ FSTProcessor::lsx(FILE *input, FILE *output)

alive_states.push_back(initial_state);

while(!feof(input))
{
int val = fgetwc_unlocked(input);
int val = -1;

if (val == 0) {
blankqueue.push(blank);
break;
}
while(!feof(input) && val != 0)
{
val = fgetwc_unlocked(input);

if(val == L'+' && isEscaped(val) && !outOfWord)
{
val = L'$';
plus_thing = true;
}

if((val == L'^' && isEscaped(val) && outOfWord) || feof(input))
if((val == L'^' && isEscaped(val) && outOfWord) || feof(input) || val == 0)
{
blankqueue.push(blank);

Expand Down Expand Up @@ -940,15 +937,15 @@ FSTProcessor::lsx(FILE *input, FILE *output)
continue;
}

//wcerr << L"\n[!] " << (wchar_t)val << L" ||| " << outOfWord << endl;
// wcerr << L"\n[!] " << (wchar_t)val << L" ||| " << outOfWord << endl;

if(outOfWord)
{
blank += val;
continue;
}

if((feof(input) || val == L'$') && !outOfWord) // && isEscaped(val)
if((val == 0 || feof(input) || val == L'$') && !outOfWord) // && isEscaped(val)
{
new_states.clear();
for(vector<State>::const_iterator it = alive_states.begin(); it != alive_states.end(); it++)
Expand Down Expand Up @@ -2150,11 +2147,14 @@ FSTProcessor::intergeneration(FILE *input, FILE *output)
}
else
{
if (isEscaped(val))
if(val != L'\0')
{
fputwc_unlocked(L'\\', output);
if (isEscaped(val))
{
fputwc_unlocked(L'\\', output);
}
fputwc_unlocked(val, output);
}
fputwc_unlocked(val, output);
}
}
else
Expand All @@ -2166,56 +2166,9 @@ FSTProcessor::intergeneration(FILE *input, FILE *output)
bool uppercase = source.size() > 1 && firstupper && iswupper(source[2]);
target = current_state.filterFinals(all_finals, alphabet,
empty_escaped_chars,
displayWeightsMode, maxAnalyses, maxWeightClasses,
uppercase, firstupper, 0);

// case of the beggining of the next word

wstring mybuf = L"";
for (size_t i = source.size(); i > 0; --i)
{
if (!isalpha(source[i - 1]))
{
break;
}
else
{
mybuf = source[i - 1] + mybuf;
}
}

if (mybuf.size() > 0)
{
bool myfirstupper = iswupper(mybuf[0]);
bool myuppercase = mybuf.size() > 1 && iswupper(mybuf[1]);

for (size_t i = target.size(); i > 0; --i)
{
if (!isalpha(target[i - 1]))
{
if (myfirstupper && i != target.size())
{
target[i] = towupper(target[i]);
}
else
{
target[i] = towlower(target[i]);
}
break;
}
else
{
if (myuppercase)
{
target[i - 1] = towupper(target[i - 1]);
}
else
{
target[i - 1] = towlower(target[i - 1]);
}
}
}
}

last = input_buffer.getPos();
}

Expand Down
170 changes: 80 additions & 90 deletions lttoolbox/lt-comp.1
Original file line number Diff line number Diff line change
@@ -1,97 +1,87 @@
.TH lt-comp 1 2006-03-08 "" ""
.SH NAME
lt-comp \- This application is part of the lexical processing modules
and tools (
.B lttoolbox
)
.PP
This tool is part of the apertium machine translation
architecture: \fBhttp://www.apertium.org\fR.
.SH SYNOPSIS
.B lt-comp
[
.B \-a \fR|
.B \-v \fR|
.B \-l \fR|
.B \-r \fR|
.B \-h
]
[
.B lr \fR|
.B rl
] dictionary_file output_file [acx_file]
.PP
.B lt-comp
[
.B \-\-alt \fR|
.B \-\-var \fR|
.B \-\-var\-left \fR|
.B \-\-var\-right \fR|
.B \-\-help
]
[
.B lr \fR|
.B rl
] dictionary_file output_file [acx_file]
.PP
.SH DESCRIPTION
.BR lt-comp
Is the application responsible of compiling dictionaries used by
\fBlt-proc\fR in \fIApertium\fR into a compact and efficient
representation (a class of finite-state transducers called augmented
letter transducers).
.PP
.SH OPTIONS
.TP
.B \-a, \-\-alt
Sets the value of the \fIalt\fR attribute to use in compilation.

.Dd March 8, 2006
.Dt LT-COMP 1
.Os Apertium
.Sh NAME
.Nm lt-comp
.Nd augmented letter transducer compiler for Apertium
.Sh SYNOPSIS
.Nm lt-comp
.Op Fl a | v | l | r | h
.Cm lr | rl
.Ar dictionary_file
.Ar output_file
.Op Ar acx_file
.Sh DESCRIPTION
.Nm lt-comp
is the application responsible for compiling dictionaries used by
.Xr lt-proc 1
in Apertium into a compact and efficient representation
(a class of finite-state transducers called augmented letter transducers).
.Sh OPTIONS
.Bl -tag -width Ds
.It Fl a , Fl Fl alt
Sets the value of the
.Sy alt
attribute to use in compilation.
.Pp
Note that if no value is set, all entries containing an \fIalt\fR
attribute are omitted.
.TP
.B \-v, \-\-var
Sets the value of the \fIv\fR attribute to use in compilation.
This should only be used with monodixes; for bidixes, see \-l and \-r.

Note that if no value is set, all entries containing a \fIv\fR
attribute are considered to be \fIleft-to-right\fR.
.TP
.B \-l, \-\-var\-left
Sets the value of the \fIvl\fR attribute for use in compilation of bidixes.
"Left" here refers to the side of the dictionary, so this option is only valid
in \fIrl\fR mode.
.TP
.B \-r, \-\-var\-right
Sets the value of the \fIvr\fR attribute for use in compilation of bidixes.
"Right" here refers to the side of the dictionary, so this option is only valid
in \fIlr\fR mode.
.TP
.B \-h, \-\-help
Prints a short help message
.TP
.B lr
.It Fl v , Fl Fl var
Sets the value of the
.Sy v
attribute to use in compilation.
This should only be used with monodixes; for bidixes, see
.Fl l
and
.Fl r .
.Pp
Note that if no value is set, all entries containing a
.Sy v
attribute are considered to be
.Em left-to-right .
.It Fl l , Fl Fl var-left
Sets the value of the
.Sy vl
attribute for use in compilation of bidixes.
.Dq Left
here refers to the side of the dictionary, so this option is only valid in
.Cm rl
mode.
.It Fl r , Fl Fl var-right
Sets the value of the
.Sy vr
attribute for use in compilation of bidixes.
.Dq Right
here refers to the side of the dictionary, so this option is only valid in
.Cm lr
mode.
.It Fl h , Fl Fl help
Prints a short help message.
.It Cm lr
The resulting transducer will process dictionary entries
\fIleft-to-right\fR.
.TP
.B rl
.Em left-to-right .
.It Cm rl
The resulting transducer will process dictionary entries
\fIright-to-left\fR.
.SH FILES
.B dictionary_file
.Em right-to-left .
.El
.Sh FILES
.Bl -tag -width Ds
.It Ar dictionary_file
The input dictionary.
.PP
.B output_file
.It Ar output_file
The compiled dictionary (a finite state transducer).
.PP
.B acx_file
.It Ar acx_file
Optional XML file of equivalent characters in monodices.

.SH SEE ALSO
.I lt-proc\fR(1),
.I lt-expand\fR(1),
.I apertium-tagger\fR(1),
.I apertium\fR(1).
.SH BUGS
Lots of...lurking in the dark and waiting for you!
.SH AUTHOR
(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante.
.El
.Sh SEE ALSO
.Xr apertium 1 ,
.Xr apertium-tagger 1 ,
.Xr lt-expand 1 ,
.Xr lt-proc 1
.Sh COPYRIGHT
Copyright \(co 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
This is free software.
You may redistribute copies of it under the terms of
.Lk https://www.gnu.org/licenses/gpl.html the GNU General Public License .
.Sh BUGS
Many... lurking in the dark and waiting for you!
Loading

0 comments on commit f73c541

Please sign in to comment.