From 261281ab5cad1e565ed127725c834e4c9397f517 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 21 Oct 2021 21:39:29 +0800 Subject: [PATCH 01/10] add c interface --- include/lucene++/Lucene_c.h | 32 ++++++++++++++++ src/core/CMakeLists.txt | 1 + src/core/c/.Lucene_c.cc.swp | Bin 0 -> 12288 bytes src/core/c/Lucene_c.cc | 72 ++++++++++++++++++++++++++++++++++++ 4 files changed, 105 insertions(+) create mode 100644 include/lucene++/Lucene_c.h create mode 100644 src/core/c/.Lucene_c.cc.swp create mode 100644 src/core/c/Lucene_c.cc diff --git a/include/lucene++/Lucene_c.h b/include/lucene++/Lucene_c.h new file mode 100644 index 00000000..a763dd6e --- /dev/null +++ b/include/lucene++/Lucene_c.h @@ -0,0 +1,32 @@ +#ifndef _LUCENE_C_H +#define _LUCENE_C_H + + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + + +typedef struct index_t index_t; + +__attribute__((visibility("default"))) index_t* index_open(const char *path); + + +__attribute__((visibility("default"))) int index_put(index_t *index, const char **field, int32_t nField, const char **key, int32_t nKey, int32_t uid); + +__attribute__((visibility("default"))) int index_search(index_t *index, const char **field, int32_t nField, const char **key, int32_t nKey, int type, int **result, int32_t *nResult); + +__attribute__((visibility("default"))) void index_close(index_t *index); + +__attribute__((visibility("default"))) int index_optimize(index_t *index); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 690f3826..844e81e4 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -23,6 +23,7 @@ file(GLOB_RECURSE lucene_sources "queryparser/*.cpp" "store/*.cpp" "util/*.c*" + "c/*.cc" ) file(GLOB_RECURSE lucene_internal_headers diff --git a/src/core/c/.Lucene_c.cc.swp b/src/core/c/.Lucene_c.cc.swp new file mode 100644 index 0000000000000000000000000000000000000000..9b14535f30de3335c5dc3f90fba20dc3f793e852 GIT binary patch literal 12288 zcmeHNO>7%Q6rLdQ-vR4u}I1XRb&f)B_0AGZ!Roa4QAM87LCp%&hHoNK#eeQZcK1+1;5pZ{GXf z&nU{iQ94~;rt?!X1lLo9ynp_p^Uj_eIs6qNJc@)IzT4G#v2*yP1@F-O+(KtJ*=tha z^tvym8;Q$8b}~p}iRqZT({9Asw42Vf-6_}IO_iB53>XIPg@HtLit~HP-0VzAF591b zfj;we>s~Y%S;K%~z%XDKFbo(53GnK4(DmC&*jy;ephTGHFkl!k3>XIPLk1j=kWU{ZLz{QZCHNkV=GZUWx{ z*MU!fcL4#c0ZpI+8~~mJ_5rt_AmlgTTj0)KLcRw+1KtA?zy%!Ob>R0sgnR>h3`_uj zKTgO;zy{C*E&%6&A08v*25<=Y<55Ds2Cf17feGOHBhUeS2=svGfqlT8hY9%$_zCzC zxB+|#d;xq8d;q)+bb(Vq16Tygzyk0r@F#e=4g3oH0^9-=SNCD8jmd@q!+>Gn|H!~5 zoZcjqawZZU(wPdL{m^4qiZ*Bcq8;^x-}A3BD_<6p1!HIc`RUp zXUy6J$wLr`5uzl8?gDQ!{c(S}WO-msQ{_P6ClX>z{)=O~L(ey(Ro0E|*1A4}}#o zhr34OPtuuEsRTmebbXu>QkPlSOeuuu zP#N@w?6Uq;a&b(`w&0@2I5^d$s@YXFCN=0;tXFxHW;g<&C9FJ6w>~AUlna!(WBV;d zVJw_bC~uXVZFQ74gFv3-tm9u9dPw!`n3AlBZH=5+W4vcuTQW(%T2`Usv%t%rEu8~r zJIP#N8%jc^6)r^4?+o+4B%?Me;xr_Ri(v^*QdfFN-LAuFLGL*UzQo)tWWr<8hAlGK zGb8bG`bK57sd1>$l`@hVB$YI1*Y@Lh9#SglF_aUEC=D#0iZ-S))G5PkSy4NYllUHp zQ*z~UbM3@xt$n7}9FEjismuKIpPl$mW8}*i?H(j0a#w5C+DBK8HqN|O ztHLz=tS&dxjZlad zwKu9v7<-xfXcMXz4G#m^z~#{aC4=SaBuUSh@od%(r$>euD$;s0n$iP9DN>=Y%jWZn z+Fo)uSg^J6XbIao)~xy*eK6u1<#N<#;SNRIcSJXj)EJd~6V1ePxOX&kf{m+;+m>!@ zrx}mYRmE-yRiHz>>o3)e-rQD3gF)vD^ih8Jy7 zNC{AeWy@QcBZCn`sXI8_1(?mK*NwAZp_@20*%bsNq`9RWmgyp(s5$W5$h8hs>(yL^ zBry&`8ah1|_Z^o}?GP64_d_>GJci}}>Y`KKTxNdRk6m??jt`@$vTcU-2FF#KkH=Ol d`Vm}XPS4IL7Y;|yzr?r{&03}P;2V34{{wQlJ!}8~ literal 0 HcmV?d00001 diff --git a/src/core/c/Lucene_c.cc b/src/core/c/Lucene_c.cc new file mode 100644 index 00000000..caa30090 --- /dev/null +++ b/src/core/c/Lucene_c.cc @@ -0,0 +1,72 @@ +#include "Lucene_c.h" + + +#include "targetver.h" +#include +#include "LuceneHeaders.h" +#include "FileUtils.h" +#include "MiscUtils.h" +#include "ConstantScoreQuery.h" + +using namespace Lucene; +String UID = L"U$DID"; +extern "C" { + +struct index_t { IndexWriterPtr rep; }; + +index_t* index_open(const char *path) { + IndexWriterPtr writer = newLucene(FSDirectory::open(StringUtils::toString(path)), newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); + if (writer == NULL) {return NULL;} + index_t *index = new index_t; + if (index == NULL) { return NULL; } + index->rep = writer; + return index; +} + +int index_put(index_t *index, const char **field, int32_t nField, const char **key, int32_t nKey, int32_t uid) { + DocumentPtr doc = newLucene(); + for (int i = 0; i < nField; i++) { + doc->add(newLucene(StringUtils::toString((*field)[i]), StringUtils::toString((*key)[i]), Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS)); + } + doc->add(newLucene(UID, StringUtils::toString(uid), Field::STORE_YES, Field::INDEX_NO)); + index->rep->addDocument(doc); + return 1; +} + +int index_search(index_t *index, const char **field, int32_t nField, const char **key, int32_t nKey, int type, int **result, int32_t *nResult) { + if(type == 0) { + PrefixFilterPtr filter = newLucene(newLucene(StringUtils::toString((*field)[0]), StringUtils::toString((*key)[0]))); + QueryPtr query = newLucene(filter); + IndexReaderPtr reader = index->rep->getReader() ; + IndexSearcherPtr searcher = newLucene(reader); + Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; + if (*nResult < hits.size()) { + *result = (int *)realloc(*result, hits.size() * sizeof(int)); + *nResult = hits.size(); + } + for (int i = 0; i < hits.size(); i++) { + (*result)[i] = StringUtils::toInt(searcher->doc(hits[i]->doc)->get(UID)); + } + } else if (type == 1) { + + } else if (type == 2) { + + } else if (type == 3); + return 1; +} + +void index_close(index_t *index) { + if (index->rep) { + index->rep->close(); + index->rep = NULL; + } + delete index; +} + +int index_optimize(index_t *index) { + index->rep->optimize(); + return 1; +} + + +} From ab5230a12d223bcf79ebed62c4fa1320396015bc Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 21 Oct 2021 21:40:07 +0800 Subject: [PATCH 02/10] add c interface --- src/core/c/.Lucene_c.cc.swp | Bin 12288 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 src/core/c/.Lucene_c.cc.swp diff --git a/src/core/c/.Lucene_c.cc.swp b/src/core/c/.Lucene_c.cc.swp deleted file mode 100644 index 9b14535f30de3335c5dc3f90fba20dc3f793e852..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12288 zcmeHNO>7%Q6rLdQ-vR4u}I1XRb&f)B_0AGZ!Roa4QAM87LCp%&hHoNK#eeQZcK1+1;5pZ{GXf z&nU{iQ94~;rt?!X1lLo9ynp_p^Uj_eIs6qNJc@)IzT4G#v2*yP1@F-O+(KtJ*=tha z^tvym8;Q$8b}~p}iRqZT({9Asw42Vf-6_}IO_iB53>XIPg@HtLit~HP-0VzAF591b zfj;we>s~Y%S;K%~z%XDKFbo(53GnK4(DmC&*jy;ephTGHFkl!k3>XIPLk1j=kWU{ZLz{QZCHNkV=GZUWx{ z*MU!fcL4#c0ZpI+8~~mJ_5rt_AmlgTTj0)KLcRw+1KtA?zy%!Ob>R0sgnR>h3`_uj zKTgO;zy{C*E&%6&A08v*25<=Y<55Ds2Cf17feGOHBhUeS2=svGfqlT8hY9%$_zCzC zxB+|#d;xq8d;q)+bb(Vq16Tygzyk0r@F#e=4g3oH0^9-=SNCD8jmd@q!+>Gn|H!~5 zoZcjqawZZU(wPdL{m^4qiZ*Bcq8;^x-}A3BD_<6p1!HIc`RUp zXUy6J$wLr`5uzl8?gDQ!{c(S}WO-msQ{_P6ClX>z{)=O~L(ey(Ro0E|*1A4}}#o zhr34OPtuuEsRTmebbXu>QkPlSOeuuu zP#N@w?6Uq;a&b(`w&0@2I5^d$s@YXFCN=0;tXFxHW;g<&C9FJ6w>~AUlna!(WBV;d zVJw_bC~uXVZFQ74gFv3-tm9u9dPw!`n3AlBZH=5+W4vcuTQW(%T2`Usv%t%rEu8~r zJIP#N8%jc^6)r^4?+o+4B%?Me;xr_Ri(v^*QdfFN-LAuFLGL*UzQo)tWWr<8hAlGK zGb8bG`bK57sd1>$l`@hVB$YI1*Y@Lh9#SglF_aUEC=D#0iZ-S))G5PkSy4NYllUHp zQ*z~UbM3@xt$n7}9FEjismuKIpPl$mW8}*i?H(j0a#w5C+DBK8HqN|O ztHLz=tS&dxjZlad zwKu9v7<-xfXcMXz4G#m^z~#{aC4=SaBuUSh@od%(r$>euD$;s0n$iP9DN>=Y%jWZn z+Fo)uSg^J6XbIao)~xy*eK6u1<#N<#;SNRIcSJXj)EJd~6V1ePxOX&kf{m+;+m>!@ zrx}mYRmE-yRiHz>>o3)e-rQD3gF)vD^ih8Jy7 zNC{AeWy@QcBZCn`sXI8_1(?mK*NwAZp_@20*%bsNq`9RWmgyp(s5$W5$h8hs>(yL^ zBry&`8ah1|_Z^o}?GP64_d_>GJci}}>Y`KKTxNdRk6m??jt`@$vTcU-2FF#KkH=Ol d`Vm}XPS4IL7Y;|yzr?r{&03}P;2V34{{wQlJ!}8~ From 40f89d3966e1eaacd0f0ee4d48b820cea5b9c0ad Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 21 Oct 2021 22:01:18 +0800 Subject: [PATCH 03/10] remove cmake option --- CMakeLists.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 838b25ad..62589f5f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,11 +101,11 @@ configure_file( IMMEDIATE @ONLY ) -add_custom_target( - uninstall - "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" - VERBATIM -) +#add_custom_target( +# uninstall +# "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" +# VERBATIM +#) if(ENABLE_PACKAGING) include(CreateLucene++Packages) From 2be0d1fd04a6ca2cf12893350e44dc2efb421830 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 22 Oct 2021 12:27:05 +0800 Subject: [PATCH 04/10] add interface --- include/lucene++/Lucene_c.h | 4 +++- src/core/c/Lucene_c.cc | 25 ++++++++++++++++++++++--- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/include/lucene++/Lucene_c.h b/include/lucene++/Lucene_c.h index a763dd6e..9b8b4c8b 100644 --- a/include/lucene++/Lucene_c.h +++ b/include/lucene++/Lucene_c.h @@ -18,7 +18,9 @@ __attribute__((visibility("default"))) index_t* index_open(const char *path); __attribute__((visibility("default"))) int index_put(index_t *index, const char **field, int32_t nField, const char **key, int32_t nKey, int32_t uid); -__attribute__((visibility("default"))) int index_search(index_t *index, const char **field, int32_t nField, const char **key, int32_t nKey, int type, int **result, int32_t *nResult); +__attribute__((visibility("default"))) int index_search(index_t *index, const char *field, int32_t nField, const char *key, int32_t nKey, int type, int **result, int32_t *nResult); + +__attribute__((visibility("default"))) int index_multi_search(index_t *index, const char **field, int32_t nField, const char **key, int32_t nKey, int type, int **result, int32_t *nResult); __attribute__((visibility("default"))) void index_close(index_t *index); diff --git a/src/core/c/Lucene_c.cc b/src/core/c/Lucene_c.cc index caa30090..034c8c35 100644 --- a/src/core/c/Lucene_c.cc +++ b/src/core/c/Lucene_c.cc @@ -33,9 +33,10 @@ int index_put(index_t *index, const char **field, int32_t nField, const char **k return 1; } -int index_search(index_t *index, const char **field, int32_t nField, const char **key, int32_t nKey, int type, int **result, int32_t *nResult) { +int index_mulit_search(index_t *index, const char **field, int32_t nField, const char **key, int32_t nKey, int type, int **result, int32_t *nResult) { if(type == 0) { - PrefixFilterPtr filter = newLucene(newLucene(StringUtils::toString((*field)[0]), StringUtils::toString((*key)[0]))); + } else if (type == 1) { + PrefixFilterPtr filter = newLucene(newLucene(StringUtils::toString((*field)), StringUtils::toString((*key)))); QueryPtr query = newLucene(filter); IndexReaderPtr reader = index->rep->getReader() ; IndexSearcherPtr searcher = newLucene(reader); @@ -47,8 +48,26 @@ int index_search(index_t *index, const char **field, int32_t nField, const char for (int i = 0; i < hits.size(); i++) { (*result)[i] = StringUtils::toInt(searcher->doc(hits[i]->doc)->get(UID)); } - } else if (type == 1) { + } else if (type == 2) { + } else if (type == 3); + return 1; +} +int index_search(index_t *index, const char *field, int32_t nField, const char *key, int32_t nKey, int type, int **result, int32_t *nResult) { + if(type == 0) { + } else if (type == 1) { + PrefixFilterPtr filter = newLucene(newLucene(StringUtils::toString((field)), StringUtils::toString((key)))); + QueryPtr query = newLucene(filter); + IndexReaderPtr reader = index->rep->getReader() ; + IndexSearcherPtr searcher = newLucene(reader); + Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; + if (*nResult < hits.size()) { + *result = (int *)realloc(*result, hits.size() * sizeof(int)); + *nResult = hits.size(); + } + for (int i = 0; i < hits.size(); i++) { + (*result)[i] = StringUtils::toInt(searcher->doc(hits[i]->doc)->get(UID)); + } } else if (type == 2) { } else if (type == 3); From f26c68ce487a081f72fdf228c271190b8eb33fba Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 22 Oct 2021 14:08:18 +0800 Subject: [PATCH 05/10] add interface --- include/lucene++/Lucene_c.h | 9 +++++++-- src/core/c/Lucene_c.cc | 32 +++++++++++++++++++++++++------- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/include/lucene++/Lucene_c.h b/include/lucene++/Lucene_c.h index 9b8b4c8b..a36d0d12 100644 --- a/include/lucene++/Lucene_c.h +++ b/include/lucene++/Lucene_c.h @@ -12,11 +12,11 @@ extern "C" { typedef struct index_t index_t; - +typedef struct index_document_t index_document_t; __attribute__((visibility("default"))) index_t* index_open(const char *path); -__attribute__((visibility("default"))) int index_put(index_t *index, const char **field, int32_t nField, const char **key, int32_t nKey, int32_t uid); +__attribute__((visibility("default"))) int index_put(index_t *index, index_document_t *idoc); __attribute__((visibility("default"))) int index_search(index_t *index, const char *field, int32_t nField, const char *key, int32_t nKey, int type, int **result, int32_t *nResult); @@ -27,6 +27,11 @@ __attribute__((visibility("default"))) void index_close(index_t *index); __attribute__((visibility("default"))) int index_optimize(index_t *index); +__attribute__((visibility("default"))) index_document_t* index_document_create(); + +__attribute__((visibility("default"))) void index_document_add(index_document_t *idoc, const char **field, int nFields, const char **val, int nVals, int32_t uid); + +__attribute__((visibility("default"))) void index_document_destory(index_document_t *doc); #ifdef __cplusplus } #endif diff --git a/src/core/c/Lucene_c.cc b/src/core/c/Lucene_c.cc index 034c8c35..1903e9d2 100644 --- a/src/core/c/Lucene_c.cc +++ b/src/core/c/Lucene_c.cc @@ -13,6 +13,9 @@ String UID = L"U$DID"; extern "C" { struct index_t { IndexWriterPtr rep; }; +struct index_document_t { DocumentPtr rep;}; + + index_t* index_open(const char *path) { IndexWriterPtr writer = newLucene(FSDirectory::open(StringUtils::toString(path)), newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); @@ -23,13 +26,8 @@ index_t* index_open(const char *path) { return index; } -int index_put(index_t *index, const char **field, int32_t nField, const char **key, int32_t nKey, int32_t uid) { - DocumentPtr doc = newLucene(); - for (int i = 0; i < nField; i++) { - doc->add(newLucene(StringUtils::toString((*field)[i]), StringUtils::toString((*key)[i]), Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS)); - } - doc->add(newLucene(UID, StringUtils::toString(uid), Field::STORE_YES, Field::INDEX_NO)); - index->rep->addDocument(doc); +int index_put(index_t *index, index_document_t *idoc) { + index->rep->addDocument(idoc->rep); return 1; } @@ -82,10 +80,30 @@ void index_close(index_t *index) { delete index; } + + int index_optimize(index_t *index) { index->rep->optimize(); return 1; } +index_document_t* index_document_create() { + DocumentPtr doc = newLucene(); + if (doc == NULL) { return NULL; } + index_document_t *idoc = new index_document_t; + idoc->rep = doc; + return idoc; +} +void index_document_destory(index_document_t *idoc) { + if (idoc == NULL) { return; } + idoc->rep = NULL; + delete idoc; +} +void index_document_add(index_document_t *idoc, const char **field, int nFields, const char **val, int nVals, int32_t uid) { + for (int i = 0; i < nFields; i++) { + idoc->rep->add(newLucene(StringUtils::toString(field[i]), StringUtils::toString(val[i]), Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS)); + } + idoc->rep->add(newLucene(UID, StringUtils::toString(uid), Field::STORE_YES, Field::INDEX_NO)); +} } From 00f331bb64bf708ee857c306d162ea23f4735fda Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 22 Oct 2021 14:14:15 +0800 Subject: [PATCH 06/10] add interface --- include/lucene++/Lucene_c.h | 2 +- src/core/c/Lucene_c.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/lucene++/Lucene_c.h b/include/lucene++/Lucene_c.h index a36d0d12..073eb2c2 100644 --- a/include/lucene++/Lucene_c.h +++ b/include/lucene++/Lucene_c.h @@ -31,7 +31,7 @@ __attribute__((visibility("default"))) index_document_t* index_document_create() __attribute__((visibility("default"))) void index_document_add(index_document_t *idoc, const char **field, int nFields, const char **val, int nVals, int32_t uid); -__attribute__((visibility("default"))) void index_document_destory(index_document_t *doc); +__attribute__((visibility("default"))) void index_document_destroyy(index_document_t *doc); #ifdef __cplusplus } #endif diff --git a/src/core/c/Lucene_c.cc b/src/core/c/Lucene_c.cc index 1903e9d2..2424b420 100644 --- a/src/core/c/Lucene_c.cc +++ b/src/core/c/Lucene_c.cc @@ -95,7 +95,7 @@ index_document_t* index_document_create() { } -void index_document_destory(index_document_t *idoc) { +void index_document_destroy(index_document_t *idoc) { if (idoc == NULL) { return; } idoc->rep = NULL; delete idoc; From 87df6df1871d4e892120418a1762c12b061db127 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 22 Oct 2021 14:17:18 +0800 Subject: [PATCH 07/10] add interface --- include/lucene++/Lucene_c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/lucene++/Lucene_c.h b/include/lucene++/Lucene_c.h index 073eb2c2..9be9f80c 100644 --- a/include/lucene++/Lucene_c.h +++ b/include/lucene++/Lucene_c.h @@ -31,7 +31,7 @@ __attribute__((visibility("default"))) index_document_t* index_document_create() __attribute__((visibility("default"))) void index_document_add(index_document_t *idoc, const char **field, int nFields, const char **val, int nVals, int32_t uid); -__attribute__((visibility("default"))) void index_document_destroyy(index_document_t *doc); +__attribute__((visibility("default"))) void index_document_destroy(index_document_t *doc); #ifdef __cplusplus } #endif From ced8a506dd8bc3624a94a8b33ac86a37f31599e5 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Fri, 22 Oct 2021 16:15:52 +0800 Subject: [PATCH 08/10] add interface --- include/lucene++/Lucene_c.h | 2 +- src/core/c/Lucene_c.cc | 9 +++-- src/demo/indexfiles/main.cpp | 75 ++++++++++++++++++++++-------------- 3 files changed, 53 insertions(+), 33 deletions(-) diff --git a/include/lucene++/Lucene_c.h b/include/lucene++/Lucene_c.h index 9be9f80c..144d22f9 100644 --- a/include/lucene++/Lucene_c.h +++ b/include/lucene++/Lucene_c.h @@ -29,7 +29,7 @@ __attribute__((visibility("default"))) int index_optimize(index_t *index); __attribute__((visibility("default"))) index_document_t* index_document_create(); -__attribute__((visibility("default"))) void index_document_add(index_document_t *idoc, const char **field, int nFields, const char **val, int nVals, int32_t uid); +__attribute__((visibility("default"))) void index_document_add(index_document_t *idoc, const char *field, int nFields, const char *val, int nVals, int index); __attribute__((visibility("default"))) void index_document_destroy(index_document_t *doc); #ifdef __cplusplus diff --git a/src/core/c/Lucene_c.cc b/src/core/c/Lucene_c.cc index 2424b420..9254de93 100644 --- a/src/core/c/Lucene_c.cc +++ b/src/core/c/Lucene_c.cc @@ -100,10 +100,11 @@ void index_document_destroy(index_document_t *idoc) { idoc->rep = NULL; delete idoc; } -void index_document_add(index_document_t *idoc, const char **field, int nFields, const char **val, int nVals, int32_t uid) { - for (int i = 0; i < nFields; i++) { - idoc->rep->add(newLucene(StringUtils::toString(field[i]), StringUtils::toString(val[i]), Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS)); +void index_document_add(index_document_t *idoc, const char *field, int nFields, const char *val, int nVals, int32_t index) { + if (index) { + idoc->rep->add(newLucene(StringUtils::toString(field), StringUtils::toString(val), Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS)); + } else { + idoc->rep->add(newLucene(UID, StringUtils::toString(val), Field::STORE_YES, Field::INDEX_NO)); } - idoc->rep->add(newLucene(UID, StringUtils::toString(uid), Field::STORE_YES, Field::INDEX_NO)); } } diff --git a/src/demo/indexfiles/main.cpp b/src/demo/indexfiles/main.cpp index e6911f48..c253b3f5 100644 --- a/src/demo/indexfiles/main.cpp +++ b/src/demo/indexfiles/main.cpp @@ -17,7 +17,7 @@ #include "LuceneHeaders.h" #include "FileUtils.h" #include "MiscUtils.h" - +#include "ConstantScoreQuery.h" using namespace Lucene; int32_t docNumber = 0; @@ -42,6 +42,14 @@ DocumentPtr fileDocument(const String& docFile) { return doc; } +int addDoc(IndexWriterPtr& writer) { + DocumentPtr doc = newLucene(); + doc->add(newLucene(L"tag1", L"cpu", Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS)); + doc->add(newLucene(L"uid", StringUtils::toString(10), Field::STORE_YES, Field::INDEX_NO)); + writer->addDocument(doc); + return 0; + +} void indexDocs(const IndexWriterPtr& writer, const String& sourceDir) { HashSet dirList(HashSet::newInstance()); if (!FileUtils::listDirectory(sourceDir, false, dirList)) { @@ -65,47 +73,58 @@ void indexDocs(const IndexWriterPtr& writer, const String& sourceDir) { /// Index all text files under a directory. int main(int argc, char* argv[]) { - if (argc != 3) { + if (argc != 2) { std::wcout << L"Usage: indexfiles.exe \n"; return 1; } - String sourceDir(StringUtils::toUnicode(argv[1])); - String indexDir(StringUtils::toUnicode(argv[2])); + //String sourceDir(StringUtils::toUnicode(argv[1])); + String indexDir(StringUtils::toUnicode(argv[1])); - if (!FileUtils::isDirectory(sourceDir)) { - std::wcout << L"Source directory doesn't exist: " << sourceDir << L"\n"; - return 1; - } + //if (!FileUtils::isDirectory(sourceDir)) { + // std::wcout << L"Source directory doesn't exist: " << sourceDir << L"\n"; + // return 1; + //} - if (!FileUtils::isDirectory(indexDir)) { - if (!FileUtils::createDirectory(indexDir)) { - std::wcout << L"Unable to create directory: " << indexDir << L"\n"; - return 1; - } - } + //if (!FileUtils::isDirectory(indexDir)) { + // if (!FileUtils::createDirectory(indexDir)) { + // std::wcout << L"Unable to create directory: " << indexDir << L"\n"; + // return 1; + // } + //} uint64_t beginIndex = MiscUtils::currentTimeMillis(); try { IndexWriterPtr writer = newLucene(FSDirectory::open(indexDir), newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); std::wcout << L"Indexing to directory: " << indexDir << L"...\n"; - - indexDocs(writer, sourceDir); - - uint64_t endIndex = MiscUtils::currentTimeMillis(); - uint64_t indexDuration = endIndex - beginIndex; - std::wcout << L"Index time: " << indexDuration << L" milliseconds\n"; - std::wcout << L"Optimizing...\n"; - - writer->optimize(); - - uint64_t optimizeDuration = MiscUtils::currentTimeMillis() - endIndex; - std::wcout << L"Optimize time: " << optimizeDuration << L" milliseconds\n"; - + for (int i = 0; i < 10000; i++) { + addDoc(writer); + } + IndexReaderPtr reader = writer->getReader(); + // PrefixFilter combined with ConstantScoreQuery + PrefixFilterPtr filter = newLucene(newLucene(L"tag1", L"cp")); + QueryPtr query = newLucene(filter); + IndexSearcherPtr searcher = newLucene(reader); + Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; + std::wcout << "size: " << hits.size() << std::endl; + //EXPECT_EQ(4, hits.size()); + + ///indexDocs(writer, sourceDir); + + //uint64_t endIndex = MiscUtils::currentTimeMillis(); + //uint64_t indexDuration = endIndex - beginIndex; + //std::wcout << L"Index time: " << indexDuration << L" milliseconds\n"; + //std::wcout << L"Optimizing...\n"; + + //writer->optimize(); + + //uint64_t optimizeDuration = MiscUtils::currentTimeMillis() - endIndex; + //std::wcout << L"Optimize time: " << optimizeDuration << L" milliseconds\n"; + writer->close(); - std::wcout << L"Total time: " << indexDuration + optimizeDuration << L" milliseconds\n"; + //std::wcout << L"Total time: " << indexDuration + optimizeDuration << L" milliseconds\n"; } catch (LuceneException& e) { std::wcout << L"Exception: " << e.getError() << L"\n"; return 1; From efa7cd6f4bc5b63a54b5018790107f5e64c2731c Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Sat, 23 Oct 2021 11:48:19 +0800 Subject: [PATCH 09/10] add lucene test --- include/lucene++/Lucene_c.h | 2 +- src/core/c/Lucene_c.cc | 60 +++++++++++++++++++++++++++--------- src/demo/indexfiles/main.cpp | 29 ++++++++++++++++- 3 files changed, 74 insertions(+), 17 deletions(-) diff --git a/include/lucene++/Lucene_c.h b/include/lucene++/Lucene_c.h index 144d22f9..47f161e7 100644 --- a/include/lucene++/Lucene_c.h +++ b/include/lucene++/Lucene_c.h @@ -20,7 +20,7 @@ __attribute__((visibility("default"))) int index_put(index_t *index, index_docum __attribute__((visibility("default"))) int index_search(index_t *index, const char *field, int32_t nField, const char *key, int32_t nKey, int type, int **result, int32_t *nResult); -__attribute__((visibility("default"))) int index_multi_search(index_t *index, const char **field, int32_t nField, const char **key, int32_t nKey, int type, int **result, int32_t *nResult); +__attribute__((visibility("default"))) int index_multi_search(index_t *index, const char **field, const char **key, int *qSet, int nQuery, int opera, int **result, int32_t *nResult); __attribute__((visibility("default"))) void index_close(index_t *index); diff --git a/src/core/c/Lucene_c.cc b/src/core/c/Lucene_c.cc index 9254de93..1e2381e6 100644 --- a/src/core/c/Lucene_c.cc +++ b/src/core/c/Lucene_c.cc @@ -10,6 +10,8 @@ using namespace Lucene; String UID = L"U$DID"; +static const int MAX_NUM_OF_OUTPUT = 1000*10000; + extern "C" { struct index_t { IndexWriterPtr rep; }; @@ -31,14 +33,47 @@ int index_put(index_t *index, index_document_t *idoc) { return 1; } -int index_mulit_search(index_t *index, const char **field, int32_t nField, const char **key, int32_t nKey, int type, int **result, int32_t *nResult) { +int index_mulit_search(index_t *index, const char **field, const char **key, int *qSet, int nQuery, int opera, int **result, int32_t *nResult) { + if (index->rep == NULL) { return -1; } + IndexReaderPtr reader = index->rep->getReader() ; + IndexSearcherPtr searcher = newLucene(reader); + BooleanQueryPtr bQuery = newLucene(); + BooleanClause::Occur occur; + + if (opera == 0) { + occur = BooleanClause::MUST; + } else if (opera == 1) { + occur = BooleanClause::SHOULD; + } else if (opera == 2) { + occur = BooleanClause::MUST_NOT; + } + for (int i = 0; i < nQuery; i++) { + if (qSet[i] == 0) { + bQuery->add(newLucene(newLucene(StringUtils::toString(field[i]),StringUtils::toString(key[i]))), occur); + } else if (qSet[i] == 1) { + bQuery->add(newLucene(newLucene(StringUtils::toString(field[i]),StringUtils::toString(key[i]))), occur); + } else if (qSet[i] == 2) { + //other query type + } else if (qSet[i] == 3) { + + } + } + Collection hits = searcher->search(bQuery, FilterPtr(), MAX_NUM_OF_OUTPUT)->scoreDocs; + if (*nResult < hits.size()) { + *result = (int *)realloc(*result, hits.size() * sizeof(int)); + *nResult = hits.size(); + } + for (int i = 0; i < hits.size(); i++) { + (*result)[i] = StringUtils::toInt(searcher->doc(hits[i]->doc)->get(UID)); + } + return 0; +} +int index_search(index_t *index, const char *field, int32_t nField, const char *key, int32_t nKey, int type, int **result, int32_t *nResult) { if(type == 0) { - } else if (type == 1) { - PrefixFilterPtr filter = newLucene(newLucene(StringUtils::toString((*field)), StringUtils::toString((*key)))); - QueryPtr query = newLucene(filter); IndexReaderPtr reader = index->rep->getReader() ; IndexSearcherPtr searcher = newLucene(reader); - Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; + QueryPtr query = newLucene(newLucene(StringUtils::toString(*field),StringUtils::toString(*key))); + Collection hits = searcher->search(query, FilterPtr(), MAX_NUM_OF_OUTPUT)->scoreDocs; if (*nResult < hits.size()) { *result = (int *)realloc(*result, hits.size() * sizeof(int)); *nResult = hits.size(); @@ -46,19 +81,14 @@ int index_mulit_search(index_t *index, const char **field, int32_t nField, const for (int i = 0; i < hits.size(); i++) { (*result)[i] = StringUtils::toInt(searcher->doc(hits[i]->doc)->get(UID)); } - } else if (type == 2) { - - } else if (type == 3); - return 1; -} -int index_search(index_t *index, const char *field, int32_t nField, const char *key, int32_t nKey, int type, int **result, int32_t *nResult) { - if(type == 0) { + } else if (type == 1) { - PrefixFilterPtr filter = newLucene(newLucene(StringUtils::toString((field)), StringUtils::toString((key)))); - QueryPtr query = newLucene(filter); + //PrefixFilterPtr filter = newLucene(newLucene(StringUtils::toString((*field)), StringUtils::toString((*key)))); + //QueryPtr query = newLucene(filter); + PrefixQueryPtr query = newLucene(newLucene(StringUtils::toString(*field), StringUtils::toString(*key))); IndexReaderPtr reader = index->rep->getReader() ; IndexSearcherPtr searcher = newLucene(reader); - Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; + Collection hits = searcher->search(query, FilterPtr(), MAX_NUM_OF_OUTPUT)->scoreDocs; if (*nResult < hits.size()) { *result = (int *)realloc(*result, hits.size() * sizeof(int)); *nResult = hits.size(); diff --git a/src/demo/indexfiles/main.cpp b/src/demo/indexfiles/main.cpp index c253b3f5..7b247938 100644 --- a/src/demo/indexfiles/main.cpp +++ b/src/demo/indexfiles/main.cpp @@ -18,6 +18,7 @@ #include "FileUtils.h" #include "MiscUtils.h" #include "ConstantScoreQuery.h" +#include "BooleanQuery.h" using namespace Lucene; int32_t docNumber = 0; @@ -44,7 +45,8 @@ DocumentPtr fileDocument(const String& docFile) { int addDoc(IndexWriterPtr& writer) { DocumentPtr doc = newLucene(); - doc->add(newLucene(L"tag1", L"cpu", Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS)); + doc->add(newLucene(L"tag1", L"cpu1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS)); + doc->add(newLucene(L"tag2", L"cpu2", Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS)); doc->add(newLucene(L"uid", StringUtils::toString(10), Field::STORE_YES, Field::INDEX_NO)); writer->addDocument(doc); return 0; @@ -108,6 +110,31 @@ int main(int argc, char* argv[]) { IndexSearcherPtr searcher = newLucene(reader); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; std::wcout << "size: " << hits.size() << std::endl; + + + BooleanQueryPtr q = newLucene(); + q->add(newLucene(newLucene(L"tag1", L"cpu1")), BooleanClause::SHOULD); + q->add(newLucene(newLucene(L"tag2", L"cpu2")), BooleanClause::SHOULD); + hits = searcher->search(q, FilterPtr(), 100000000)->scoreDocs; + std::wcout << "size: " << hits.size() << std::endl; + + q->add(newLucene(newLucene(L"tag1", L"cpu1")), BooleanClause::SHOULD); + q->add(newLucene(newLucene(L"tag1", L"cpu1")), BooleanClause::MUST); + hits = searcher->search(q, 100000000)->scoreDocs; + std::wcout << "size: " << hits.size() << std::endl; + + q->add(newLucene(newLucene(L"tag1", L"cpu1")), BooleanClause::MUST); + q->add(newLucene(newLucene(L"tag2", L"cpu1")), BooleanClause::MUST); + hits = searcher->search(q, 10000000)->scoreDocs; + + + + BooleanQueryPtr bquery = newLucene(); + bquery->add(newLucene(newLucene(L"tag1", L"xxx")), BooleanClause::SHOULD); + bquery->add(newLucene(newLucene(L"tag2", L"cpuxxx")), BooleanClause::SHOULD); + hits = searcher->search(bquery, FilterPtr(), 10000000)->scoreDocs; + std::wcout << "size: " << hits.size() << std::endl; + //EXPECT_EQ(4, hits.size()); ///indexDocs(writer, sourceDir); From 4787b682b5293852d4d8febb7128ee18e65b7c65 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Sat, 23 Oct 2021 12:03:34 +0800 Subject: [PATCH 10/10] add lucene test --- src/core/c/Lucene_c.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/c/Lucene_c.cc b/src/core/c/Lucene_c.cc index 1e2381e6..f9817c69 100644 --- a/src/core/c/Lucene_c.cc +++ b/src/core/c/Lucene_c.cc @@ -33,7 +33,7 @@ int index_put(index_t *index, index_document_t *idoc) { return 1; } -int index_mulit_search(index_t *index, const char **field, const char **key, int *qSet, int nQuery, int opera, int **result, int32_t *nResult) { +int index_multi_search(index_t *index, const char **field, const char **key, int *qSet, int nQuery, int opera, int **result, int32_t *nResult) { if (index->rep == NULL) { return -1; } IndexReaderPtr reader = index->rep->getReader() ; IndexSearcherPtr searcher = newLucene(reader);