Skip to content

Commit

Permalink
Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
grosjo committed Mar 2, 2020
1 parent 9f95ddc commit 5b9aa96
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 39 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ Set "attachments=1" if you want to index attachments (this works only for text a

If you face memory issues, you may set :
```
default_vsz_limit = 0
service indexer-worker {
vsz_limit = 0
}
Expand Down
52 changes: 23 additions & 29 deletions src/fts-backend-xapian-functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -381,10 +381,6 @@ class XNGram
}

d->trim();
long l = d->length();

if(l<partial) return;

i = d->indexOf(" ");

if(i>0)
Expand All @@ -394,8 +390,10 @@ class XNGram
delete(r);
d->truncate(i);
d->trim();
l=d->length();
}

long l = d->length();
if(l<partial) return;

if(onlyone)
{
Expand All @@ -418,7 +416,6 @@ class XNGram
void add_stem(icu::UnicodeString *d)
{
d->trim();

long l=d->length();
if(l<partial) return;

Expand All @@ -428,14 +425,6 @@ class XNGram
if(l>hardlimit)
{
if(verbose>0) i_warning("FTS Xapian: Term too long to be indexed (%s ...)",s.substr(0,100).c_str());
/* this is indeed useless and timeconsuming
icu::UnicodeString * r = new icu::UnicodeString(*d,1);
add_stem(r);
delete(r);
r = new icu::UnicodeString(*d,0,d->length()-1);
add_stem(r);
delete(r);
*/
return;
}

Expand Down Expand Up @@ -716,8 +705,21 @@ XResultSet * fts_backend_xapian_query(Xapian::Database * dbx, XQuerySet * query,
return set;
}

bool fts_backend_xapian_index_hdr(Xapian::WritableDatabase * dbx, uint uid, const char* field, const char* data,long p, long f)
bool fts_backend_xapian_index_hdr(Xapian::WritableDatabase * dbx, uint uid, const char* field, icu::UnicodeString* data,long p, long f)
{
if(data->length()<p) { return true; }

if(strlen(field)<1) { return true; }

long i=0;
while((i<HDRS_NB) && (strcmp(field,hdrs_emails[i])!=0))
{
i++;
}
if(i>=HDRS_NB) return true;

const char * h=hdrs_xapian[i];

try
{
XQuerySet * xq = new XQuerySet();
Expand All @@ -743,16 +745,6 @@ bool fts_backend_xapian_index_hdr(Xapian::WritableDatabase * dbx, uint uid, cons
delete(result);
delete(xq);

if(strlen(field)<1) { return true; }
long i=0;
while((i<HDRS_NB) && (strcmp(field,hdrs_emails[i])!=0))
{
i++;
}

if(i>=HDRS_NB) return true;
const char * h=hdrs_xapian[i];

XNGram * ngram = new XNGram(p,f,h);
ngram->add(data);

Expand Down Expand Up @@ -784,8 +776,10 @@ bool fts_backend_xapian_index_hdr(Xapian::WritableDatabase * dbx, uint uid, cons
return false;
}

bool fts_backend_xapian_index_text(Xapian::WritableDatabase * dbx,uint uid, const char * field, const char * data,long p, long f)
bool fts_backend_xapian_index_text(Xapian::WritableDatabase * dbx,uint uid, const char * field, icu::UnicodeString * data,long p, long f)
{
if(data->length()<p) { return true; }

try
{
XQuerySet * xq = new XQuerySet();
Expand Down Expand Up @@ -827,15 +821,15 @@ bool fts_backend_xapian_index_text(Xapian::WritableDatabase * dbx,uint uid, cons
{
h="XBDY";
}
std::string d(data);
std::string s;
data->toUTF8String(s);
termgenerator.set_stemming_strategy(Xapian::TermGenerator::STEM_ALL);
termgenerator.index_text(d, 1, h);
termgenerator.index_text(s, 1, h);

long l = strlen(h);
long n = doc2.termlist_count();
Xapian::TermIterator ti = doc2.termlist_begin();
XNGram * ngram = new XNGram(p,f,h);
std::string s;
const char * c;
while(n>0)
{
Expand Down
18 changes: 8 additions & 10 deletions src/fts-backend-xapian.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ static bool fts_backend_xapian_update_set_build_key(struct fts_backend_update_co
}

// Verify content-disposition
if((disposition != NULL) && (!backend->attachments) && (strstr(disposition,"filename=")!=NULL))
if((disposition != NULL) && (!backend->attachments) && ((strstr(disposition,"filename=")!=NULL) || (strstr(disposition,"attachment")!=NULL)))
{
if(verbose>0) i_info("FTS Xapian: Skipping part of type '%s' and disposition '%s'",type,disposition);
return FALSE;
Expand Down Expand Up @@ -347,6 +347,8 @@ static bool fts_backend_xapian_update_set_build_key(struct fts_backend_update_co
return FALSE;
}

// if(verbose>0) { i_info("FTS Xapian: Indexing FIELD=%s TYPE=%s DESC=%s",field,type,disposition); }

switch (key->type)
{
case FTS_BACKEND_BUILD_KEY_HDR:
Expand Down Expand Up @@ -411,7 +413,9 @@ static int fts_backend_xapian_update_build_more(struct fts_backend_update_contex
if(ctx->tbi_uid<1) return 0;

if(data == NULL) return 0;
if(size<1) return 0;
icu::StringPiece sp_d((const char *)data,(int32_t )size);
icu::UnicodeString d2 = icu::UnicodeString::fromUTF8(sp_d);
if(d2.length() < backend->partial) return 0;

if((backend->oldbox == NULL) || (strcmp(backend->oldbox,backend->box->name)!=0))
{
Expand All @@ -426,23 +430,17 @@ static int fts_backend_xapian_update_build_more(struct fts_backend_update_contex
return -1;
}

char * s = (char*)i_malloc(sizeof(char)*(size+1));
strncpy(s,(char *)data,size);
s[size]=0;

bool ok=true;

if(ctx->tbi_isfield)
{
ok=fts_backend_xapian_index_hdr(backend->dbw,ctx->tbi_uid,ctx->tbi_field, s, backend->partial,backend->full);
ok=fts_backend_xapian_index_hdr(backend->dbw,ctx->tbi_uid,ctx->tbi_field, &d2, backend->partial,backend->full);
}
else
{
ok=fts_backend_xapian_index_text(backend->dbw,ctx->tbi_uid,ctx->tbi_field, s, backend->partial,backend->full);
ok=fts_backend_xapian_index_text(backend->dbw,ctx->tbi_uid,ctx->tbi_field, &d2, backend->partial,backend->full);
}

i_free(s);

backend->nb_updates++;
if(backend->nb_updates>XAPIAN_COMMIT_LIMIT)
{
Expand Down

0 comments on commit 5b9aa96

Please sign in to comment.