From ffe5737f7d7b954a87073b03b8b547a61e640d77 Mon Sep 17 00:00:00 2001 From: KevinHuSh Date: Sat, 11 May 2024 19:47:53 +0800 Subject: [PATCH] let index be batchly. (#733) ### What problem does this PR solve? let index be batchly. ### Type of change - [x] Refactoring --- rag/svr/task_executor.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index a7ebd0d88a4..7c25ce0d190 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -285,7 +285,12 @@ def main(): init_kb(r) chunk_count = len(set([c["_id"] for c in cks])) st = timer() - es_r = ELASTICSEARCH.bulk(cks, search.index_name(r["tenant_id"])) + es_r = "" + for b in range(0, len(cks), 32): + es_r = ELASTICSEARCH.bulk(cks[b:b+32], search.index_name(r["tenant_id"])) + if b % 128 == 0: + callback(prog=0.8 + 0.1 * (b + 1) / len(cks), msg="") + cron_logger.info("Indexing elapsed({}): {}".format(r["name"], timer()-st)) if es_r: callback(-1, "Index failure!")