Skip to content

Commit

Permalink
Squashed commit of the following:
Browse files Browse the repository at this point in the history
added support for the latest and greatest libfolia (version 221)
  • Loading branch information
kosloot committed Dec 5, 2024
1 parent 50c3655 commit f0d9020
Show file tree
Hide file tree
Showing 8 changed files with 270 additions and 77 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/foliatest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,13 @@ jobs:

- uses: LanguageMachines/ticcactions/cpp-submodule-build@v1
with:
branch: ${{ github.ref_name }}
module: ticcutils

- uses: LanguageMachines/ticcactions/cpp-submodule-build@v1
with:
module: libfolia
branch: ${{ github.ref_name }}

- name: Static Code-check
if: ${{ env.action_status == '' }}
Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ PKG_CHECK_MODULES([ICU], [icu-uc >= 50 icu-io] )
CXXFLAGS="$CXXFLAGS $ICU_CFLAGS"
LIBS="$ICU_LIBS $LIBS"

PKG_CHECK_MODULES([ticcutils], [ticcutils >= 0.29] )
PKG_CHECK_MODULES([ticcutils], [ticcutils >= 0.36] )
CXXFLAGS="$CXXFLAGS $ticcutils_CFLAGS"
LIBS="$LIBS $ticcutils_LIBS"

Expand Down
11 changes: 9 additions & 2 deletions src/edit_tests.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,11 @@ void edit_test001a( ){

// attribute check
assertTrue( s->index(0)->annotator() == "testscript" );
#if FOLIA_INT_VERSION < 221
assertTrue( s->index(2)->annotatortype() == AUTO );

#else
assertTrue( s->index(2)->annotatortype() == AnnotatorType::AUTO );
#endif
// adition to paragraph correct?
assertEqual( p->size(), (tmp+1) );
assertTrue( p->rindex(0) == s );
Expand Down Expand Up @@ -347,7 +350,11 @@ void edit_test005a( ){
assertTrue( p->isinstance<PosAnnotation>() );

std::vector<Alternative *> alt3;
#if FOLIA_INT_VERSION < 221
assertNoThrow( alt3 = w->alternatives(PosAnnotation_t, pos_set) );
#else
assertNoThrow( alt3 = w->alternatives(ElementType::PosAnnotation_t, pos_set) );
#endif
assertEqual( alt3.size(), 1 );
assertEqual( alt[0] , alt3[0] );

Expand Down Expand Up @@ -544,7 +551,7 @@ void edit_test011(){
f = new Feature( getArgs("subset='function', class='plural'"), &editDoc );
m->append( f );
assertTrue( len(l) == 2 ); // 2 morphemes
assertTrue( isinstance( l->index(0), Morpheme_t ) );
assertTrue( l->index(0)->isinstance<Morpheme>() );
assertTrue( l->index(0)->text() == "handschrift" );
assertTrue( l->index(0)->feat("type") == "stem" );
assertTrue( l->index(0)->feat("function") == "lexical" );
Expand Down
32 changes: 31 additions & 1 deletion src/engine_tests.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <map>
#include <algorithm>
#include <stdexcept>
#include <filesystem>
#include <unicode/unistr.h>
#include "libxml/tree.h"
#include "ticcutils/StringOps.h"
Expand Down Expand Up @@ -286,7 +287,7 @@ void engine_test004(){
}
}

void engine_test005(){
void engine_test005a(){
startTestSerie( " enumerate a document on xml_element nodes " );
Engine proc;
// proc.set_debug(true);
Expand All @@ -308,6 +309,35 @@ void engine_test005(){
}
}

#if FOLIA_INT_VERSION >= 221
void engine_test005b(){
startTestSerie( " enumerate a document on xml_element nodes with debugging" );
Engine proc;
Document d("debug='PARSING|SERIALIZE'");
ofstream os( "/tmp/foliaengine.dbg" );
TiCC::LogStream ds( os );
ds.set_level( LogHeavy );
proc.set_debug(true);
proc.set_dbg_stream( &ds );
assertNoThrow( proc.init_doc( "tests/example.xml" ) );
if ( proc.ok() ){
xml_tree *result = proc.create_simple_tree("tests/example.xml");
ofstream os( "/tmp/enum.tree" );
print( os, result );
delete result;
int stat = system( "diff /tmp/enum.tree tests/enum.tree.216.ok" );
assertMessage( "/tmp/enum.tree tests/enum.tree.216.ok differ!",
(stat == 0) );
}
size_t size = std::filesystem::file_size("/tmp/foliatest.dbg");
assertTrue( size > 5000 );
}
#else
void engine_test005b(){
// noop
}
#endif

void engine_test006a(){
startTestSerie( " enumerate a document on text node parents" );
TextEngine proc;
Expand Down
51 changes: 47 additions & 4 deletions src/foliapy_tests.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ void Test_E001_Tokens_Structure(){
{
startTestSerie("Simple Token & Structure Test - First word");
FoliaElement *w = words[0];
assertTrue( w->isinstance( Word_t ) );
assertTrue( w->isinstance<Word>() );
assertEqual( w->id(), "example.p.1.s.1.w.1" );
assertEqual( w->text(), "Hello" );
assertEqual( str(w), "Hello" );
Expand All @@ -114,7 +114,7 @@ void Test_E001_Tokens_Structure(){
startTestSerie( "Simple Token & Structure Test - Sentence" );
//grab second sentence
FoliaElement *s = doc.sentences(1);
assertTrue( isinstance( s, Sentence_t) );
assertTrue( s->isinstance<Sentence>() );
assertEqual( s->id(), "example.p.1.s.2" );
assertFalse( s->hastext() ); //no explicit text
assertEqual( str(s), "This is an example." );
Expand All @@ -123,7 +123,7 @@ void Test_E001_Tokens_Structure(){
startTestSerie( "Simple Token & Structure Test - Index" );
// rab something using the index
FoliaElement *w = doc["example.p.1.s.1.w.1"];
assertTrue( isinstance( w, Word_t ) );
assertTrue( w->isinstance<Word>() );
assertEqual( doc["example.p.1.s.1.w.1"],
doc.index("example.p.1.s.1.w.1") );
assertEqual( w->id(), "example.p.1.s.1.w.1" );
Expand All @@ -132,11 +132,18 @@ void Test_E001_Tokens_Structure(){
{
startTestSerie( "Simple Token & Structure Test - Declarations" );
assertTrue( doc.declared(AnnotationType::TOKEN) );
assertTrue( doc.declared(Word_t) ); // same as above, resolves automatically
assertTrue( doc.declared(AnnotationType::TEXT) );
#if FOLIA_INT_VERSION < 221
assertTrue( doc.declared(Word_t) ); // same as above, resolves automatically
assertTrue( doc.declared(TextContent_t) ); //same as above, resolves automatically
assertTrue( doc.declared(Sentence_t) );
assertTrue( doc.declared(Paragraph_t) );
#else
assertTrue( doc.declared(ElementType::Word_t) ); // same as above, resolves automatically
assertTrue( doc.declared(ElementType::TextContent_t) ); //same as above, resolves automatically
assertTrue( doc.declared(ElementType::Sentence_t) );
assertTrue( doc.declared(ElementType::Paragraph_t) );
#endif
}
}

Expand Down Expand Up @@ -286,13 +293,25 @@ void Test_Provenance(){
assertEqual((*provenance)["p0"]->name(), "ucto" );
assertEqual((*provenance)["p0.1"]->name(), "libfolia");
assertEqual((*provenance)["p1"]->name(), "frog");
#if FOLIA_INT_VERSION < 221
assertEqual((*provenance)["p1"]->type(), AUTO );
#else
assertEqual((*provenance)["p1"]->type(), AnnotatorType::AUTO );
#endif
assertEqual(provenance->index("p1")->version(), "0.16");
assertEqual((*provenance)["p1.0"]->name(), "libfolia");
#if FOLIA_INT_VERSION < 221
assertEqual((*provenance)["p1.0"]->type(), GENERATOR );
#else
assertEqual((*provenance)["p1.0"]->type(), AnnotatorType::GENERATOR );
#endif
assertEqual((*provenance)["p1.0"]->name(), "libfolia");
assertEqual((*provenance)["p2.1"]->name(), "proycon");
#if FOLIA_INT_VERSION < 221
assertEqual((*provenance)["p2.1"]->type(), MANUAL );
#else
assertEqual((*provenance)["p2.1"]->type(), AnnotatorType::MANUAL );
#endif
auto annotators = doc.get_annotators( AnnotationType::POS,
"http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" );
assertEqual(len(annotators), 3 );
Expand All @@ -303,9 +322,17 @@ void Test_Provenance(){
// let's see if we got the right ones:
assertEqual( processors[0]->id(), "p1.1" );
assertEqual( processors[0]->name(), "mbpos" );
#if FOLIA_INT_VERSION < 221
assertEqual( processors[0]->type(), AUTO );
#else
assertEqual( processors[0]->type(), AnnotatorType::AUTO );
#endif
assertEqual( processors[1]->name(), "proycon");
#if FOLIA_INT_VERSION < 221
assertEqual( processors[1]->type(), MANUAL );
#else
assertEqual( processors[1]->type(), AnnotatorType::MANUAL );
#endif
}
{
startTestSerie( "Provenance - Annotation sanity check" );
Expand All @@ -315,20 +342,32 @@ void Test_Provenance(){
auto proc = doc.get_processor( pid );
assertEqual( proc->id(), "p1.1" );
assertEqual( proc->name(), "mbpos" );
#if FOLIA_INT_VERSION < 221
assertEqual( proc->type(), AUTO );
#else
assertEqual( proc->type(), AnnotatorType::AUTO );
#endif
// The old annotator attribute can also still be used and refers to the
// processor name (for backward API compatibility)
assertEqual( proc->annotator(), "mbpos" );
// The old annotatortype attribute can also still be used and refers
// to the processor type:
#if FOLIA_INT_VERSION < 221
assertEqual( proc->annotatortype(), AUTO );
#else
assertEqual( proc->annotatortype(), AnnotatorType::AUTO );
#endif
word = doc["untitled.p.1.s.1.w.2"];
pid = word->annotation<PosAnnotation>()->processor();
assertEqual( pid, "p2.1" );
proc = doc.get_processor( pid );
assertEqual( proc->id(), "p2.1" );
assertEqual( proc->name(), "proycon" );
#if FOLIA_INT_VERSION < 221
assertEqual( proc->type(), MANUAL );
#else
assertEqual( proc->type(), AnnotatorType::MANUAL );
#endif
}
{
startTestSerie("Provenance - Checking default/implicit processor/annotator" );
Expand All @@ -337,7 +376,11 @@ void Test_Provenance(){
auto proc = doc.get_processor( pid );
assertEqual( proc->id(), "p1.2" );
assertEqual( proc->name(), "mblem" );
#if FOLIA_INT_VERSION < 221
assertEqual( proc->type(), AUTO );
#else
assertEqual( proc->type(), AnnotatorType::AUTO );
#endif
// The old annotator attribute can also still be used and refers to
// the processor name
assertEqual( proc->annotator(), "mblem");
Expand Down
54 changes: 49 additions & 5 deletions src/foliatest.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include <vector>
#include <map>
#include <algorithm>
#include <filesystem>
#include <stdexcept>
#include <unicode/unistr.h>
#include "libxml/tree.h"
Expand Down Expand Up @@ -148,7 +149,7 @@ void test0() {
void test1() {
startTestSerie( " Test lezen van een FoLiA file " );
Document d;
assertNoThrow( d.read_from_file( "tests/example.xml" ) );
assertNoThrow( d.read_from_file("tests/example.xml") );
assertNoThrow( d.save( "/tmp/example.xml" ) );
int stat = system( "./tests/foliadiff.sh /tmp/example.xml tests/example.xml" );
assertMessage( "/tmp/example.xml tests/example.xml differ!",
Expand Down Expand Up @@ -215,6 +216,28 @@ void test1f() {
(stat == 0) );
}

#if FOLIA_INT_VERSION >= 221
void test1g() {
startTestSerie( " Test testing document debugging " );
Document d("debug='PARSING|SERIALIZE'");
ofstream os( "/tmp/foliatest.dbg" );
TiCC::LogStream ds( os );
ds.set_level( LogHeavy );
d.set_dbg_stream( &ds );
assertNoThrow( d.read_from_file("tests/example.xml") );
assertNoThrow( d.save( "/tmp/example.xml" ) );
int stat = system( "./tests/foliadiff.sh /tmp/example.xml tests/example.xml" );
assertMessage( "/tmp/example.xml tests/example.xml differ!",
(stat == 0) );
size_t size = std::filesystem::file_size("/tmp/foliatest.dbg");
assertTrue( size > 5000 );
}
#else
void test1g() {
// noop
}
#endif

void test2() {
startTestSerie( " Test lezen van een FoLiA string " );
string s;
Expand Down Expand Up @@ -509,7 +532,8 @@ extern void engine_test002b();
extern void engine_test002c();
extern void engine_test003();
extern void engine_test004();
extern void engine_test005();
extern void engine_test005a();
extern void engine_test005b();
extern void engine_test006a();
extern void engine_test006b();
extern void engine_test006c();
Expand Down Expand Up @@ -797,7 +821,7 @@ void correction_test002(){
assertEqual( s->text(), "De site staat online ." );
// incorrection() test, check if newly added word correctly reports being part of a correction
FoliaElement *w = corDoc.index(corDoc.id() + ".s.1.w.4-5");
assertTrue( isinstance(w->incorrection(), Correction_t) );
assertTrue( w->incorrection()->isinstance<Correction>() );
//incorrection return the correction the word is part of, or None if not part of a correction,
assertEqual( s->xmlstring(), "<s xmlns=\"http://ilk.uvt.nl/folia\" xml:id=\"example.s.1\"><w xml:id=\"example.s.1.w.1\"><t>De</t></w><w xml:id=\"example.s.1.w.2\"><t>site</t></w><w xml:id=\"example.s.1.w.3\"><t>staat</t></w><correction xml:id=\"example.s.1.correction.1\"><new><w xml:id=\"example.s.1.w.4-5\"><t>online</t></w></new><original auth=\"no\"><w xml:id=\"example.s.1.w.4\"><t>on</t></w><w xml:id=\"example.s.1.w.5\"><t>line</t></w></original></correction><w xml:id=\"example.s.1.w.6\"><t>.</t></w></s>" );
}
Expand Down Expand Up @@ -880,8 +904,11 @@ void correction_test005(){
assertEqual( w->annotation<Correction>()->suggestions(0)->text(), "stippellijn" );
assertEqual( w->annotation<Correction>()->getNew()->text(), "stippellijn" );
assertEqual( w->annotation<Correction>()->annotator(), "John Doe" );
#if FOLIA_INT_VERSION < 221
assertEqual( w->annotation<Correction>()->annotatortype(), MANUAL );

#else
assertEqual( w->annotation<Correction>()->annotatortype(), AnnotatorType::MANUAL );
#endif
assertEqual( w->xmlstring(), "<w xmlns=\"http://ilk.uvt.nl/folia\" xml:id=\"WR-P-E-J-0000000001.p.1.s.8.w.11\"><pos class=\"FOUTN(soort,ev,basis,zijd,stan)\" set=\"https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn\"/><lemma class=\"stippelijn\"/><correction xml:id=\"WR-P-E-J-0000000001.p.1.s.8.w.11.correction.1\" annotator=\"John Doe\" class=\"spelling\"><suggestion auth=\"no\"><t>stippellijn</t></suggestion><new><t>stippellijn</t></new><original auth=\"no\"><t>stippelijn</t></original></correction></w>" );
delete corDoc;
}
Expand Down Expand Up @@ -1103,8 +1130,13 @@ void query_test002(){
void query_test003(){
startTestSerie( " Find Words by annotation " );
vector<string> words = { "de", "historisch", "wetenschap", "worden" };
#if FOLIA_INT_VERSION < 221
vector<vector<Word*> >matches = qDoc.findwords( Pattern( words,
LemmaAnnotation_t ) );
#else
vector<vector<Word*> >matches = qDoc.findwords( Pattern( words,
ElementType::LemmaAnnotation_t ) );
#endif
assertEqual( matches.size(), 1 );
assertEqual( len(matches[0]), 4 );

Expand All @@ -1117,7 +1149,12 @@ void query_test003(){
void query_test004(){
startTestSerie( " Find Words using multiple patterns " );
Pattern p1( { "de", "historische", "*", "wordt" } );
#if FOLIA_INT_VERSION < 221
Pattern p2( { "de", "historisch", "wetenschap", "worden" }, LemmaAnnotation_t );
#else
Pattern p2( { "de", "historisch", "wetenschap", "worden" },
ElementType::LemmaAnnotation_t );
#endif
list<Pattern> l;
l.push_back( p1 );
l.push_back( p2 );
Expand Down Expand Up @@ -1244,8 +1281,13 @@ void query_test010b(){
void query_test011(){
startTestSerie( " Find Words by non existing annotation " );
vector<string> words = { "bli", "bla", "blu" };
#if FOLIA_INT_VERSION < 221
vector<vector<Word*> >matches = qDoc.findwords( Pattern(words,
SenseAnnotation_t ) );
#else
vector<vector<Word*> >matches = qDoc.findwords( Pattern(words,
ElementType::SenseAnnotation_t ) );
#endif
assertEqual( matches.size(), 0 );
}

Expand Down Expand Up @@ -1398,6 +1440,7 @@ int main( int argc, char* argv[] ){
test1d();
test1e();
test1f();
test1g();
test2();
test3();
test4();
Expand Down Expand Up @@ -1636,7 +1679,8 @@ int main( int argc, char* argv[] ){
engine_test002c();
engine_test003();
engine_test004();
engine_test005();
engine_test005a();
engine_test005b();
engine_test006a();
engine_test006b();
engine_test006c();
Expand Down
Loading

0 comments on commit f0d9020

Please sign in to comment.