Skip to content

Commit

Permalink
modified to accomodate proycon/foliapy#25
Browse files Browse the repository at this point in the history
  • Loading branch information
kosloot committed Feb 14, 2023
1 parent 70d7a59 commit 90c4bb6
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 26 deletions.
14 changes: 8 additions & 6 deletions src/FoLiA-2text.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ void usage( const string& name ){
cerr << "\t FoLiA-2text will produce a text from a FoLiA file, " << endl;
cerr << "\t or a whole directory of FoLiA files " << endl;
cerr << "\t-c OR --class='name'\t use 'name' as the folia class for <t> nodes. (default is 'current')" << endl;
cerr << "\t--retaintok\t retain tokenization. Default is attempt to remove." << endl;
cerr << "\t--retaintok\t Retain tokenization. Default is attempt to remove." << endl;
cerr << "\t--restore-formatting \tAttempt to restore the original formatting." << endl;
cerr << "\t\t\t Will insert (soft-)hypens and such." << endl;
cerr << "\t-t 'threads' or\n\t--threads='threads' Number of threads to run on." << endl;
cerr << "\t\t\t If 'threads' has the value \"max\", the number of threads is set to a" << endl;
cerr << "\t\t\t reasonable value. (OMP_NUM_TREADS - 2)" << endl;
Expand All @@ -70,7 +72,7 @@ UnicodeString handle_token_tag( const folia::FoliaElement *d,
int main( int argc, char *argv[] ){
TiCC::CL_Options opts( "hVvpe:t:o:c:",
"class:,help,version,retaintok,threads:,"
// "hyphens,"
"restore-formatting,"
"honour-tags,correction-handling:" );
try {
opts.init(argc,argv);
Expand Down Expand Up @@ -99,7 +101,7 @@ int main( int argc, char *argv[] ){
}
opts.extract( 'o', outputPrefix );
bool retaintok = opts.extract( "retaintok" );
// bool add_hyphens = opts.extract( "hyphens" );
bool restore = opts.extract( "restore-formatting" );
bool honour_tags = opts.extract( "honour-tags" );
CORRECTION_HANDLING ch = CORRECTION_HANDLING::CURRENT;
string handling;
Expand Down Expand Up @@ -210,9 +212,9 @@ int main( int argc, char *argv[] ){
if ( retaintok ){
tp.set( folia::TEXT_FLAGS::RETAIN );
}
// if ( add_hyphens ){
// tp.set( folia::TEXT_FLAGS::ADD_FORMATTING );
// }
if ( restore ){
tp.set( folia::TEXT_FLAGS::ADD_FORMATTING );
}
tp.set_correction_handling( ch );
tp.set_debug( verbosity > 0 );
if ( honour_tags ){
Expand Down
56 changes: 39 additions & 17 deletions src/FoLiA-abby.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -362,14 +362,13 @@ void update_formatting_info( formatting_info& line_font,

struct line_info {
line_info():
_line(0),
_spaces(0)
_line(0)
{};
UnicodeString _value;
formatting_info _fi;
xmlNode *_line;
UnicodeString _hyph;
int _spaces;
UnicodeString _spaces;
};

void process_line( xmlNode *block,
Expand Down Expand Up @@ -410,7 +409,7 @@ void process_line( xmlNode *block,
UnicodeString tmp = uresult;
tmp.trim();
if ( tmp.isEmpty() ){
li._spaces = uresult.length();
li._spaces = uresult;
}
line_parts.push_back( li );
}
Expand Down Expand Up @@ -499,14 +498,19 @@ folia::TextMarkupStyle* make_style_content( const formatting_info& info,
return content;
}

void add_hspace( folia::FoliaElement *content ){
void add_hspace( folia::FoliaElement *content,
const UnicodeString& value ){
//! insert a <t-hspace> node to a FoliaElement
/*!
\param content the node to connect to
*/
folia::KWargs args;
args["class"] = "space";
content->add_child<folia::TextMarkupHSpace>( args );
folia::FoliaElement *hs = content->add_child<folia::TextMarkupHSpace>( args );
if ( !value.isEmpty() ){
folia::XmlText *te = hs->add_child<folia::XmlText>();
te->setvalue( TiCC::UnicodeToUTF8(value) );
}
}

void add_value( folia::FoliaElement *content,
Expand All @@ -516,20 +520,38 @@ void add_value( folia::FoliaElement *content,
\param content the Folia to extend
\param value the Unicode string to add
this fuction will replace leading and trailing spaces by <t-hspace> nodes
*/
*/
if ( !value.isEmpty() ){
bool begin_space = u_isspace( value[0] );
bool end_space = u_isspace( value[value.length()-1] );
UnicodeString start_spaces;
for ( int i=0;i < value.length(); ++i ){
if ( u_isspace(value[i] ) ){
start_spaces += value[i];
}
else {
break;
}
}
UnicodeString end_spaces;
for ( int i=value.length()-1; i>0; --i ){
if ( u_isspace(value[i] ) ){
end_spaces = value[i] + end_spaces;
}
else {
break;
}
}
bool begin_space = !start_spaces.isEmpty();
bool end_space = !end_spaces.isEmpty();
UnicodeString out = value;
out.trim();
if ( begin_space ){
// represent ALL leading spaces as 1 TextMarkupHSpace
add_hspace( content );
add_hspace( content, start_spaces );
}
content->add_child<folia::XmlText>( TiCC::UnicodeToUTF8(out) );
if ( end_space ){
// represent ALL trailing spaces as 1 TextMarkupHSpace
add_hspace( content );
add_hspace( content, end_spaces );
}
}
}
Expand Down Expand Up @@ -643,21 +665,21 @@ bool process_paragraph( folia::Paragraph *paragraph,
else {
// a 'true' hyphen: add the value + <t-hbr/>
// cerr << "HYPH= '" << it._hyph << "'" << endl;
folia::KWargs args;
add_value( content, value );
folia::Hyphbreak *hb = content->add_child<folia::Hyphbreak>();
if ( it._hyph == "¬"
|| ( it._hyph == "-"
&& &it == &line_parts.back() ) ){
args["class"] = TiCC::UnicodeToUTF8(it._hyph);
folia::XmlText *e = hb->add_child<folia::XmlText>();
e->setvalue( TiCC::UnicodeToUTF8(it._hyph) );
previous_hyphen = true;
}
add_value( content, value );
content->add_child<folia::Hyphbreak>(args);
// cerr << "content now: " << content << endl;
no_break = true;
}
}
else if ( it._spaces > 0 ){
add_hspace( content );
else if ( !it._spaces.isEmpty() ){
add_hspace( content, it._spaces );
}
else {
add_value( content, value );
Expand Down
6 changes: 3 additions & 3 deletions src/FoLiA-txt.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -282,9 +282,9 @@ int main( int argc, char *argv[] ){
bool add_space = true;
if ( !hyp.isEmpty() ){
// add an extra HyphBreak to the stack
folia::KWargs args;
args["class"] = TiCC::UnicodeToUTF8(hyp);
FoliaElement *hb = new folia::Hyphbreak(args,d);
FoliaElement *hb = new folia::Hyphbreak();
XmlText *e = hb->add_child<folia::XmlText>(); // create partial text
e->setvalue( TiCC::UnicodeToUTF8(hyp) );
par_stack.push_back( hb );
add_space = false;
}
Expand Down

0 comments on commit 90c4bb6

Please sign in to comment.