From 5b9aa96ffe2e5b319197552e4981bcf0fdf2df54 Mon Sep 17 00:00:00 2001 From: Joan Moreau Date: Thu, 27 Feb 2020 17:57:23 +0000 Subject: [PATCH] Cleanup --- README.md | 2 ++ src/fts-backend-xapian-functions.cpp | 52 ++++++++++++---------------- src/fts-backend-xapian.cpp | 18 +++++----- 3 files changed, 33 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 4d2eb92..6de9186 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,8 @@ Set "attachments=1" if you want to index attachments (this works only for text a If you face memory issues, you may set : ``` +default_vsz_limit = 0 + service indexer-worker { vsz_limit = 0 } diff --git a/src/fts-backend-xapian-functions.cpp b/src/fts-backend-xapian-functions.cpp index 067fc9d..ce7b94a 100644 --- a/src/fts-backend-xapian-functions.cpp +++ b/src/fts-backend-xapian-functions.cpp @@ -381,10 +381,6 @@ class XNGram } d->trim(); - long l = d->length(); - - if(lindexOf(" "); if(i>0) @@ -394,8 +390,10 @@ class XNGram delete(r); d->truncate(i); d->trim(); - l=d->length(); } + + long l = d->length(); + if(ltrim(); - long l=d->length(); if(lhardlimit) { if(verbose>0) i_warning("FTS Xapian: Term too long to be indexed (%s ...)",s.substr(0,100).c_str()); - /* this is indeed useless and timeconsuming - icu::UnicodeString * r = new icu::UnicodeString(*d,1); - add_stem(r); - delete(r); - r = new icu::UnicodeString(*d,0,d->length()-1); - add_stem(r); - delete(r); - */ return; } @@ -716,8 +705,21 @@ XResultSet * fts_backend_xapian_query(Xapian::Database * dbx, XQuerySet * query, return set; } -bool fts_backend_xapian_index_hdr(Xapian::WritableDatabase * dbx, uint uid, const char* field, const char* data,long p, long f) +bool fts_backend_xapian_index_hdr(Xapian::WritableDatabase * dbx, uint uid, const char* field, icu::UnicodeString* data,long p, long f) { + if(data->length()=HDRS_NB) return true; + + const char * h=hdrs_xapian[i]; + try { XQuerySet * xq = new XQuerySet(); @@ -743,16 +745,6 @@ bool fts_backend_xapian_index_hdr(Xapian::WritableDatabase * dbx, uint uid, cons delete(result); delete(xq); - if(strlen(field)<1) { return true; } - long i=0; - while((i=HDRS_NB) return true; - const char * h=hdrs_xapian[i]; - XNGram * ngram = new XNGram(p,f,h); ngram->add(data); @@ -784,8 +776,10 @@ bool fts_backend_xapian_index_hdr(Xapian::WritableDatabase * dbx, uint uid, cons return false; } -bool fts_backend_xapian_index_text(Xapian::WritableDatabase * dbx,uint uid, const char * field, const char * data,long p, long f) +bool fts_backend_xapian_index_text(Xapian::WritableDatabase * dbx,uint uid, const char * field, icu::UnicodeString * data,long p, long f) { + if(data->length()toUTF8String(s); termgenerator.set_stemming_strategy(Xapian::TermGenerator::STEM_ALL); - termgenerator.index_text(d, 1, h); + termgenerator.index_text(s, 1, h); long l = strlen(h); long n = doc2.termlist_count(); Xapian::TermIterator ti = doc2.termlist_begin(); XNGram * ngram = new XNGram(p,f,h); - std::string s; const char * c; while(n>0) { diff --git a/src/fts-backend-xapian.cpp b/src/fts-backend-xapian.cpp index e2c0a9f..59d9ac7 100644 --- a/src/fts-backend-xapian.cpp +++ b/src/fts-backend-xapian.cpp @@ -310,7 +310,7 @@ static bool fts_backend_xapian_update_set_build_key(struct fts_backend_update_co } // Verify content-disposition - if((disposition != NULL) && (!backend->attachments) && (strstr(disposition,"filename=")!=NULL)) + if((disposition != NULL) && (!backend->attachments) && ((strstr(disposition,"filename=")!=NULL) || (strstr(disposition,"attachment")!=NULL))) { if(verbose>0) i_info("FTS Xapian: Skipping part of type '%s' and disposition '%s'",type,disposition); return FALSE; @@ -347,6 +347,8 @@ static bool fts_backend_xapian_update_set_build_key(struct fts_backend_update_co return FALSE; } +// if(verbose>0) { i_info("FTS Xapian: Indexing FIELD=%s TYPE=%s DESC=%s",field,type,disposition); } + switch (key->type) { case FTS_BACKEND_BUILD_KEY_HDR: @@ -411,7 +413,9 @@ static int fts_backend_xapian_update_build_more(struct fts_backend_update_contex if(ctx->tbi_uid<1) return 0; if(data == NULL) return 0; - if(size<1) return 0; + icu::StringPiece sp_d((const char *)data,(int32_t )size); + icu::UnicodeString d2 = icu::UnicodeString::fromUTF8(sp_d); + if(d2.length() < backend->partial) return 0; if((backend->oldbox == NULL) || (strcmp(backend->oldbox,backend->box->name)!=0)) { @@ -426,23 +430,17 @@ static int fts_backend_xapian_update_build_more(struct fts_backend_update_contex return -1; } - char * s = (char*)i_malloc(sizeof(char)*(size+1)); - strncpy(s,(char *)data,size); - s[size]=0; - bool ok=true; if(ctx->tbi_isfield) { - ok=fts_backend_xapian_index_hdr(backend->dbw,ctx->tbi_uid,ctx->tbi_field, s, backend->partial,backend->full); + ok=fts_backend_xapian_index_hdr(backend->dbw,ctx->tbi_uid,ctx->tbi_field, &d2, backend->partial,backend->full); } else { - ok=fts_backend_xapian_index_text(backend->dbw,ctx->tbi_uid,ctx->tbi_field, s, backend->partial,backend->full); + ok=fts_backend_xapian_index_text(backend->dbw,ctx->tbi_uid,ctx->tbi_field, &d2, backend->partial,backend->full); } - i_free(s); - backend->nb_updates++; if(backend->nb_updates>XAPIAN_COMMIT_LIMIT) {