Skip to content

Commit

Permalink
working on a fix for problems with leading/trailing whitespace (proyc…
Browse files Browse the repository at this point in the history
  • Loading branch information
proycon committed Dec 10, 2020
1 parent 685402f commit f538b60
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 5 deletions.
4 changes: 4 additions & 0 deletions include/libfolia/folia_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -2929,6 +2929,10 @@ namespace folia {
std::string VersionName();
std::string Version();

UnicodeString trim_space( const UnicodeString& in );
UnicodeString ltrim( const UnicodeString& in );
UnicodeString rtrim( const UnicodeString& in );

} // namespace folia

#endif
58 changes: 53 additions & 5 deletions src/folia_impl.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -1581,14 +1581,26 @@ namespace folia {
return "";
}
UnicodeString result;
int i = 0;
for ( const auto& d : _data ){
if ( d->printable() ){
if (d->isinstance( XmlText_t)) {
if ((i == 0) && (i == (int) _data.size() -1)) {
result += rtrim(ltrim(d->text( cls )));
} else if (i == 0) {
result += ltrim(d->text( cls ));
} else if (i == (int) _data.size() - 1) {
result += rtrim(d->text( cls ));
} else {
result += d->text( cls );
}
} else if ( d->printable() ){
if ( !result.isEmpty() ){
const string& delim = d->get_delimiter( retaintok );
result += TiCC::UnicodeFromUTF8(delim);
}
result += d->text( cls );
result += d->text( cls );
}
i++;
}
#ifdef DEBUG_TEXT
cerr << "TEXT(" << cls << ") on a textcontainer :" << xmltag()
Expand Down Expand Up @@ -1751,20 +1763,20 @@ namespace folia {
* \return an UnicodeString with all leading and trailing spaces removed.
* Other 'whitespace' characters like newline and tab are retained!
*/
UnicodeString cmp = " ";
const char16_t space = 0x0020;
// cerr << "in = '" << in << "'" << endl;
UnicodeString out;
int i = 0;
for( ; i < in.length(); ++i ){
// cerr << "start: bekijk:" << UnicodeString(in[i]) << endl;
if ( in[i] != cmp[0] ){
if ( in[i] != space ){
break;
}
}
int j = in.length()-1;
for( ; j >= 0; --j ){
// cerr << "end: bekijk:" << UnicodeString(in[j]) << endl;
if ( in[j] != cmp[0] ){
if ( in[j] != space ){
break;
}
}
Expand All @@ -1779,6 +1791,42 @@ namespace folia {
return out;
}

UnicodeString ltrim( const UnicodeString& in ){
/// remove leading whitespace (including newlines and tabs)
int begin = in.length();
for (int i = 0; i < in.length(); i++) {
if ((in[i] != 0x0020) && (in[i] != 0x0009) && (in[i] != 0x000a) && (in[i] != 0x000d)) {
begin = i;
break;
}
}
if (begin == 0) {
return in;
} else if (begin == in.length()) {
return "";
} else {
return UnicodeString(in, begin, in.length() - begin);
}
}

UnicodeString rtrim( const UnicodeString& in ){
/// remove trailing whitespace (including newlines and tabs)
int end = -1;
for (int i = in.length() - 1; i >= 0; i--) {
if ((in[i] != 0x0020) && (in[i] != 0x0009) && (in[i] != 0x000a) && (in[i] != 0x000d)) {
end = i;
break;
}
}
if (end == in.length()) {
return in;
} else if (end == -1) {
return "";
} else {
return UnicodeString(in, 0, end+1);
}
}

bool check_end( const UnicodeString& us, bool& only ){
/// check for newline characters at the end
/*!
Expand Down

0 comments on commit f538b60

Please sign in to comment.