diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index b155f8fe02f..fe62e468634 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -78,7 +78,7 @@ jobs:
echo "Waiting for service to be available..."
sleep 5
done
- cd sdk/python && poetry install && source .venv/bin/activate && cd test && pytest t_dataset.py t_chat.py t_session.py t_document.py t_chunk.py
+ cd sdk/python && poetry install && source .venv/bin/activate && cd test && pytest --tb=short t_dataset.py t_chat.py t_session.py t_document.py t_chunk.py
- name: Stop ragflow:dev
if: always() # always run this step even if previous steps failed
diff --git a/README.md b/README.md
index 997362ecc8c..bfae7f92989 100644
--- a/README.md
+++ b/README.md
@@ -285,7 +285,7 @@ docker build -f Dockerfile -t infiniflow/ragflow:dev .
git clone https://github.com/infiniflow/ragflow.git
cd ragflow/
export POETRY_VIRTUALENVS_CREATE=true POETRY_VIRTUALENVS_IN_PROJECT=true
- ~/.local/bin/poetry install --sync --no-root # install RAGFlow dependent python modules
+ ~/.local/bin/poetry install --sync --no-root --with=full # install RAGFlow dependent python modules
```
3. Launch the dependent services (MinIO, Elasticsearch, Redis, and MySQL) using Docker Compose:
@@ -295,7 +295,7 @@ docker build -f Dockerfile -t infiniflow/ragflow:dev .
Add the following line to `/etc/hosts` to resolve all hosts specified in **docker/service_conf.yaml** to `127.0.0.1`:
```
- 127.0.0.1 es01 mysql minio redis
+ 127.0.0.1 es01 infinity mysql minio redis
```
In **docker/service_conf.yaml**, update mysql port to `5455` and es port to `1200`, as specified in **docker/.env**.
diff --git a/README_ja.md b/README_ja.md
index acec08b0e44..4442d13dfd6 100644
--- a/README_ja.md
+++ b/README_ja.md
@@ -250,7 +250,7 @@ docker build -f Dockerfile -t infiniflow/ragflow:dev .
`/etc/hosts` に以下の行を追加して、**docker/service_conf.yaml** に指定されたすべてのホストを `127.0.0.1` に解決します:
```
- 127.0.0.1 es01 mysql minio redis
+ 127.0.0.1 es01 infinity mysql minio redis
```
**docker/service_conf.yaml** で mysql のポートを `5455` に、es のポートを `1200` に更新します(**docker/.env** に指定された通り).
diff --git a/README_ko.md b/README_ko.md
index 3007beac6cd..a7577d0d03a 100644
--- a/README_ko.md
+++ b/README_ko.md
@@ -254,7 +254,7 @@ docker build -f Dockerfile -t infiniflow/ragflow:dev .
`/etc/hosts` 에 다음 줄을 추가하여 **docker/service_conf.yaml** 에 지정된 모든 호스트를 `127.0.0.1` 로 해결합니다:
```
- 127.0.0.1 es01 mysql minio redis
+ 127.0.0.1 es01 infinity mysql minio redis
```
**docker/service_conf.yaml** 에서 mysql 포트를 `5455` 로, es 포트를 `1200` 으로 업데이트합니다( **docker/.env** 에 지정된 대로).
diff --git a/README_zh.md b/README_zh.md
index ad4a15a598b..e3e8cc755c9 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -252,7 +252,7 @@ docker build -f Dockerfile -t infiniflow/ragflow:dev .
在 `/etc/hosts` 中添加以下代码,将 **docker/service_conf.yaml** 文件中的所有 host 地址都解析为 `127.0.0.1`:
```
- 127.0.0.1 es01 mysql minio redis
+ 127.0.0.1 es01 infinity mysql minio redis
```
在文件 **docker/service_conf.yaml** 中,对照 **docker/.env** 的配置将 mysql 端口更新为 `5455`,es 端口更新为 `1200`。
diff --git a/api/apps/api_app.py b/api/apps/api_app.py
index 3bf14590f06..b21f129d2e0 100644
--- a/api/apps/api_app.py
+++ b/api/apps/api_app.py
@@ -529,13 +529,14 @@ def list_chunks():
return get_json_result(
data=False, message="Can't find doc_name or doc_id"
)
+ kb_ids = KnowledgebaseService.get_kb_ids(tenant_id)
- res = retrievaler.chunk_list(doc_id=doc_id, tenant_id=tenant_id)
+ res = retrievaler.chunk_list(doc_id, tenant_id, kb_ids)
res = [
{
"content": res_item["content_with_weight"],
"doc_name": res_item["docnm_kwd"],
- "img_id": res_item["img_id"]
+ "image_id": res_item["img_id"]
} for res_item in res
]
diff --git a/api/apps/chunk_app.py b/api/apps/chunk_app.py
index 02abad1ddb6..365577687a7 100644
--- a/api/apps/chunk_app.py
+++ b/api/apps/chunk_app.py
@@ -18,12 +18,10 @@
from flask import request
from flask_login import login_required, current_user
-from elasticsearch_dsl import Q
from api.db.services.dialog_service import keyword_extraction
from rag.app.qa import rmPrefix, beAdoc
from rag.nlp import search, rag_tokenizer
-from rag.utils.es_conn import ELASTICSEARCH
from rag.utils import rmSpace
from api.db import LLMType, ParserType
from api.db.services.knowledgebase_service import KnowledgebaseService
@@ -31,12 +29,11 @@
from api.db.services.user_service import UserTenantService
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
from api.db.services.document_service import DocumentService
-from api.settings import RetCode, retrievaler, kg_retrievaler
+from api.settings import RetCode, retrievaler, kg_retrievaler, docStoreConn
from api.utils.api_utils import get_json_result
import hashlib
import re
-
@manager.route('/list', methods=['POST'])
@login_required
@validate_request("doc_id")
@@ -53,12 +50,13 @@ def list_chunk():
e, doc = DocumentService.get_by_id(doc_id)
if not e:
return get_data_error_result(message="Document not found!")
+ kb_ids = KnowledgebaseService.get_kb_ids(tenant_id)
query = {
"doc_ids": [doc_id], "page": page, "size": size, "question": question, "sort": True
}
if "available_int" in req:
query["available_int"] = int(req["available_int"])
- sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
+ sres = retrievaler.search(query, search.index_name(tenant_id), kb_ids, highlight=True)
res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()}
for id in sres.ids:
d = {
@@ -69,16 +67,12 @@ def list_chunk():
"doc_id": sres.field[id]["doc_id"],
"docnm_kwd": sres.field[id]["docnm_kwd"],
"important_kwd": sres.field[id].get("important_kwd", []),
- "img_id": sres.field[id].get("img_id", ""),
+ "image_id": sres.field[id].get("img_id", ""),
"available_int": sres.field[id].get("available_int", 1),
- "positions": sres.field[id].get("position_int", "").split("\t")
+ "positions": json.loads(sres.field[id].get("position_list", "[]")),
}
- if len(d["positions"]) % 5 == 0:
- poss = []
- for i in range(0, len(d["positions"]), 5):
- poss.append([float(d["positions"][i]), float(d["positions"][i + 1]), float(d["positions"][i + 2]),
- float(d["positions"][i + 3]), float(d["positions"][i + 4])])
- d["positions"] = poss
+ assert isinstance(d["positions"], list)
+ assert len(d["positions"])==0 or (isinstance(d["positions"][0], list) and len(d["positions"][0]) == 5)
res["chunks"].append(d)
return get_json_result(data=res)
except Exception as e:
@@ -96,22 +90,20 @@ def get():
tenants = UserTenantService.query(user_id=current_user.id)
if not tenants:
return get_data_error_result(message="Tenant not found!")
- res = ELASTICSEARCH.get(
- chunk_id, search.index_name(
- tenants[0].tenant_id))
- if not res.get("found"):
+ tenant_id = tenants[0].tenant_id
+
+ kb_ids = KnowledgebaseService.get_kb_ids(tenant_id)
+ chunk = docStoreConn.get(chunk_id, search.index_name(tenant_id), kb_ids)
+ if chunk is None:
return server_error_response("Chunk not found")
- id = res["_id"]
- res = res["_source"]
- res["chunk_id"] = id
k = []
- for n in res.keys():
+ for n in chunk.keys():
if re.search(r"(_vec$|_sm_|_tks|_ltks)", n):
k.append(n)
for n in k:
- del res[n]
+ del chunk[n]
- return get_json_result(data=res)
+ return get_json_result(data=chunk)
except Exception as e:
if str(e).find("NotFoundError") >= 0:
return get_json_result(data=False, message='Chunk not found!',
@@ -162,7 +154,7 @@ def set():
v, c = embd_mdl.encode([doc.name, req["content_with_weight"]])
v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
d["q_%d_vec" % len(v)] = v.tolist()
- ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
+ docStoreConn.insert([d], search.index_name(tenant_id), doc.kb_id)
return get_json_result(data=True)
except Exception as e:
return server_error_response(e)
@@ -174,11 +166,11 @@ def set():
def switch():
req = request.json
try:
- tenant_id = DocumentService.get_tenant_id(req["doc_id"])
- if not tenant_id:
- return get_data_error_result(message="Tenant not found!")
- if not ELASTICSEARCH.upsert([{"id": i, "available_int": int(req["available_int"])} for i in req["chunk_ids"]],
- search.index_name(tenant_id)):
+ e, doc = DocumentService.get_by_id(req["doc_id"])
+ if not e:
+ return get_data_error_result(message="Document not found!")
+ if not docStoreConn.update({"id": req["chunk_ids"]}, {"available_int": int(req["available_int"])},
+ search.index_name(doc.tenant_id), doc.kb_id):
return get_data_error_result(message="Index updating failure")
return get_json_result(data=True)
except Exception as e:
@@ -191,12 +183,11 @@ def switch():
def rm():
req = request.json
try:
- if not ELASTICSEARCH.deleteByQuery(
- Q("ids", values=req["chunk_ids"]), search.index_name(current_user.id)):
- return get_data_error_result(message="Index updating failure")
e, doc = DocumentService.get_by_id(req["doc_id"])
if not e:
return get_data_error_result(message="Document not found!")
+ if not docStoreConn.delete({"id": req["chunk_ids"]}, search.index_name(current_user.id), doc.kb_id):
+ return get_data_error_result(message="Index updating failure")
deleted_chunk_ids = req["chunk_ids"]
chunk_number = len(deleted_chunk_ids)
DocumentService.decrement_chunk_num(doc.id, doc.kb_id, 1, chunk_number, 0)
@@ -239,7 +230,7 @@ def create():
v, c = embd_mdl.encode([doc.name, req["content_with_weight"]])
v = 0.1 * v[0] + 0.9 * v[1]
d["q_%d_vec" % len(v)] = v.tolist()
- ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
+ docStoreConn.insert([d], search.index_name(tenant_id), doc.kb_id)
DocumentService.increment_chunk_num(
doc.id, doc.kb_id, c, 1, 0)
@@ -256,8 +247,9 @@ def retrieval_test():
page = int(req.get("page", 1))
size = int(req.get("size", 30))
question = req["question"]
- kb_id = req["kb_id"]
- if isinstance(kb_id, str): kb_id = [kb_id]
+ kb_ids = req["kb_id"]
+ if isinstance(kb_ids, str):
+ kb_ids = [kb_ids]
doc_ids = req.get("doc_ids", [])
similarity_threshold = float(req.get("similarity_threshold", 0.0))
vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
@@ -265,17 +257,17 @@ def retrieval_test():
try:
tenants = UserTenantService.query(user_id=current_user.id)
- for kid in kb_id:
+ for kb_id in kb_ids:
for tenant in tenants:
if KnowledgebaseService.query(
- tenant_id=tenant.tenant_id, id=kid):
+ tenant_id=tenant.tenant_id, id=kb_id):
break
else:
return get_json_result(
data=False, message='Only owner of knowledgebase authorized for this operation.',
code=RetCode.OPERATING_ERROR)
- e, kb = KnowledgebaseService.get_by_id(kb_id[0])
+ e, kb = KnowledgebaseService.get_by_id(kb_ids[0])
if not e:
return get_data_error_result(message="Knowledgebase not found!")
@@ -290,7 +282,7 @@ def retrieval_test():
question += keyword_extraction(chat_mdl, question)
retr = retrievaler if kb.parser_id != ParserType.KG else kg_retrievaler
- ranks = retr.retrieval(question, embd_mdl, kb.tenant_id, kb_id, page, size,
+ ranks = retr.retrieval(question, embd_mdl, kb.tenant_id, kb_ids, page, size,
similarity_threshold, vector_similarity_weight, top,
doc_ids, rerank_mdl=rerank_mdl, highlight=req.get("highlight"))
for c in ranks["chunks"]:
@@ -309,12 +301,16 @@ def retrieval_test():
@login_required
def knowledge_graph():
doc_id = request.args["doc_id"]
+ e, doc = DocumentService.get_by_id(doc_id)
+ if not e:
+ return get_data_error_result(message="Document not found!")
+ tenant_id = DocumentService.get_tenant_id(doc_id)
+ kb_ids = KnowledgebaseService.get_kb_ids(tenant_id)
req = {
"doc_ids":[doc_id],
"knowledge_graph_kwd": ["graph", "mind_map"]
}
- tenant_id = DocumentService.get_tenant_id(doc_id)
- sres = retrievaler.search(req, search.index_name(tenant_id))
+ sres = retrievaler.search(req, search.index_name(tenant_id), kb_ids, doc.kb_id)
obj = {"graph": {}, "mind_map": {}}
for id in sres.ids[:2]:
ty = sres.field[id]["knowledge_graph_kwd"]
diff --git a/api/apps/document_app.py b/api/apps/document_app.py
index 9699f12621b..19f5e20b461 100644
--- a/api/apps/document_app.py
+++ b/api/apps/document_app.py
@@ -17,7 +17,6 @@
import re
import flask
-from elasticsearch_dsl import Q
from flask import request
from flask_login import login_required, current_user
@@ -27,14 +26,13 @@
from api.db.services.task_service import TaskService, queue_tasks
from api.db.services.user_service import UserTenantService
from rag.nlp import search
-from rag.utils.es_conn import ELASTICSEARCH
from api.db.services import duplicate_name
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
from api.utils import get_uuid
from api.db import FileType, TaskStatus, ParserType, FileSource
from api.db.services.document_service import DocumentService, doc_upload_and_parse
-from api.settings import RetCode
+from api.settings import RetCode, docStoreConn
from api.utils.api_utils import get_json_result
from rag.utils.storage_factory import STORAGE_IMPL
from api.utils.file_utils import filename_type, thumbnail
@@ -275,18 +273,8 @@ def change_status():
return get_data_error_result(
message="Database error (Document update)!")
- if str(req["status"]) == "0":
- ELASTICSEARCH.updateScriptByQuery(Q("term", doc_id=req["doc_id"]),
- scripts="ctx._source.available_int=0;",
- idxnm=search.index_name(
- kb.tenant_id)
- )
- else:
- ELASTICSEARCH.updateScriptByQuery(Q("term", doc_id=req["doc_id"]),
- scripts="ctx._source.available_int=1;",
- idxnm=search.index_name(
- kb.tenant_id)
- )
+ status = int(req["status"])
+ docStoreConn.update({"doc_id": req["doc_id"]}, {"available_int": status}, search.index_name(kb.tenant_id), doc.kb_id)
return get_json_result(data=True)
except Exception as e:
return server_error_response(e)
@@ -365,8 +353,11 @@ def run():
tenant_id = DocumentService.get_tenant_id(id)
if not tenant_id:
return get_data_error_result(message="Tenant not found!")
- ELASTICSEARCH.deleteByQuery(
- Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
+ e, doc = DocumentService.get_by_id(id)
+ if not e:
+ return get_data_error_result(message="Document not found!")
+ if docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
+ docStoreConn.delete({"doc_id": id}, search.index_name(tenant_id), doc.kb_id)
if str(req["run"]) == TaskStatus.RUNNING.value:
TaskService.filter_delete([Task.doc_id == id])
@@ -490,8 +481,8 @@ def change_parser():
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
if not tenant_id:
return get_data_error_result(message="Tenant not found!")
- ELASTICSEARCH.deleteByQuery(
- Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
+ if docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
+ docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id)
return get_json_result(data=True)
except Exception as e:
diff --git a/api/apps/kb_app.py b/api/apps/kb_app.py
index 7807550cd7e..8bbd1c7ca18 100644
--- a/api/apps/kb_app.py
+++ b/api/apps/kb_app.py
@@ -28,6 +28,8 @@
from api.db.db_models import File
from api.settings import RetCode
from api.utils.api_utils import get_json_result
+from api.settings import docStoreConn
+from rag.nlp import search
@manager.route('/create', methods=['post'])
@@ -166,6 +168,9 @@ def rm():
if not KnowledgebaseService.delete_by_id(req["kb_id"]):
return get_data_error_result(
message="Database error (Knowledgebase removal)!")
+ tenants = UserTenantService.query(user_id=current_user.id)
+ for tenant in tenants:
+ docStoreConn.deleteIdx(search.index_name(tenant.tenant_id), req["kb_id"])
return get_json_result(data=True)
except Exception as e:
return server_error_response(e)
diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py
index 1bc9b056d55..3383d2a1c17 100644
--- a/api/apps/sdk/doc.py
+++ b/api/apps/sdk/doc.py
@@ -30,7 +30,6 @@
from api.utils.api_utils import server_error_response
from api.utils.api_utils import get_result, get_error_data_result
from io import BytesIO
-from elasticsearch_dsl import Q
from flask import request, send_file
from api.db import FileSource, TaskStatus, FileType
from api.db.db_models import File
@@ -42,7 +41,7 @@
from api.utils.api_utils import construct_json_result, get_parser_config
from rag.nlp import search
from rag.utils import rmSpace
-from rag.utils.es_conn import ELASTICSEARCH
+from api.settings import docStoreConn
from rag.utils.storage_factory import STORAGE_IMPL
import os
@@ -293,9 +292,7 @@ def update_doc(tenant_id, dataset_id, document_id):
)
if not e:
return get_error_data_result(message="Document not found!")
- ELASTICSEARCH.deleteByQuery(
- Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id)
- )
+ docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), dataset_id)
return get_result()
@@ -647,9 +644,7 @@ def parse(tenant_id, dataset_id):
info["chunk_num"] = 0
info["token_num"] = 0
DocumentService.update_by_id(id, info)
- ELASTICSEARCH.deleteByQuery(
- Q("match", doc_id=id), idxnm=search.index_name(tenant_id)
- )
+ docStoreConn.delete({"doc_id": id}, search.index_name(tenant_id), dataset_id)
TaskService.filter_delete([Task.doc_id == id])
e, doc = DocumentService.get_by_id(id)
doc = doc.to_dict()
@@ -713,9 +708,7 @@ def stop_parsing(tenant_id, dataset_id):
)
info = {"run": "2", "progress": 0, "chunk_num": 0}
DocumentService.update_by_id(id, info)
- ELASTICSEARCH.deleteByQuery(
- Q("match", doc_id=id), idxnm=search.index_name(tenant_id)
- )
+ docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), dataset_id)
return get_result()
@@ -812,7 +805,6 @@ def list_chunks(tenant_id, dataset_id, document_id):
"question": question,
"sort": True,
}
- sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
key_mapping = {
"chunk_num": "chunk_count",
"kb_id": "dataset_id",
@@ -833,51 +825,56 @@ def list_chunks(tenant_id, dataset_id, document_id):
renamed_doc[new_key] = value
if key == "run":
renamed_doc["run"] = run_mapping.get(str(value))
- res = {"total": sres.total, "chunks": [], "doc": renamed_doc}
+
+ res = {"total": 0, "chunks": [], "doc": renamed_doc}
origin_chunks = []
- sign = 0
- for id in sres.ids:
- d = {
- "chunk_id": id,
- "content_with_weight": (
- rmSpace(sres.highlight[id])
- if question and id in sres.highlight
- else sres.field[id].get("content_with_weight", "")
- ),
- "doc_id": sres.field[id]["doc_id"],
- "docnm_kwd": sres.field[id]["docnm_kwd"],
- "important_kwd": sres.field[id].get("important_kwd", []),
- "img_id": sres.field[id].get("img_id", ""),
- "available_int": sres.field[id].get("available_int", 1),
- "positions": sres.field[id].get("position_int", "").split("\t"),
- }
- if len(d["positions"]) % 5 == 0:
- poss = []
- for i in range(0, len(d["positions"]), 5):
- poss.append(
- [
- float(d["positions"][i]),
- float(d["positions"][i + 1]),
- float(d["positions"][i + 2]),
- float(d["positions"][i + 3]),
- float(d["positions"][i + 4]),
- ]
- )
- d["positions"] = poss
+ if docStoreConn.indexExist(search.index_name(tenant_id), dataset_id):
+ sres = retrievaler.search(query, search.index_name(tenant_id), [dataset_id], emb_mdl=None, highlight=True)
+ res["total"] = sres.total
+ sign = 0
+ for id in sres.ids:
+ d = {
+ "id": id,
+ "content_with_weight": (
+ rmSpace(sres.highlight[id])
+ if question and id in sres.highlight
+ else sres.field[id].get("content_with_weight", "")
+ ),
+ "doc_id": sres.field[id]["doc_id"],
+ "docnm_kwd": sres.field[id]["docnm_kwd"],
+ "important_kwd": sres.field[id].get("important_kwd", []),
+ "img_id": sres.field[id].get("img_id", ""),
+ "available_int": sres.field[id].get("available_int", 1),
+ "positions": sres.field[id].get("position_int", "").split("\t"),
+ }
+ if len(d["positions"]) % 5 == 0:
+ poss = []
+ for i in range(0, len(d["positions"]), 5):
+ poss.append(
+ [
+ float(d["positions"][i]),
+ float(d["positions"][i + 1]),
+ float(d["positions"][i + 2]),
+ float(d["positions"][i + 3]),
+ float(d["positions"][i + 4]),
+ ]
+ )
+ d["positions"] = poss
- origin_chunks.append(d)
+ origin_chunks.append(d)
+ if req.get("id"):
+ if req.get("id") == id:
+ origin_chunks.clear()
+ origin_chunks.append(d)
+ sign = 1
+ break
if req.get("id"):
- if req.get("id") == id:
- origin_chunks.clear()
- origin_chunks.append(d)
- sign = 1
- break
- if req.get("id"):
- if sign == 0:
- return get_error_data_result(f"Can't find this chunk {req.get('id')}")
+ if sign == 0:
+ return get_error_data_result(f"Can't find this chunk {req.get('id')}")
+
for chunk in origin_chunks:
key_mapping = {
- "chunk_id": "id",
+ "id": "id",
"content_with_weight": "content",
"doc_id": "document_id",
"important_kwd": "important_keywords",
@@ -996,9 +993,9 @@ def add_chunk(tenant_id, dataset_id, document_id):
)
d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19]
d["create_timestamp_flt"] = datetime.datetime.now().timestamp()
- d["kb_id"] = [doc.kb_id]
+ d["kb_id"] = dataset_id
d["docnm_kwd"] = doc.name
- d["doc_id"] = doc.id
+ d["doc_id"] = document_id
embd_id = DocumentService.get_embd_id(document_id)
embd_mdl = TenantLLMService.model_instance(
tenant_id, LLMType.EMBEDDING.value, embd_id
@@ -1006,14 +1003,12 @@ def add_chunk(tenant_id, dataset_id, document_id):
v, c = embd_mdl.encode([doc.name, req["content"]])
v = 0.1 * v[0] + 0.9 * v[1]
d["q_%d_vec" % len(v)] = v.tolist()
- ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
+ docStoreConn.insert([d], search.index_name(tenant_id), dataset_id)
DocumentService.increment_chunk_num(doc.id, doc.kb_id, c, 1, 0)
- d["chunk_id"] = chunk_id
- d["kb_id"] = doc.kb_id
# rename keys
key_mapping = {
- "chunk_id": "id",
+ "id": "id",
"content_with_weight": "content",
"doc_id": "document_id",
"important_kwd": "important_keywords",
@@ -1079,36 +1074,16 @@ def rm_chunk(tenant_id, dataset_id, document_id):
"""
if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
- doc = DocumentService.query(id=document_id, kb_id=dataset_id)
- if not doc:
- return get_error_data_result(
- message=f"You don't own the document {document_id}."
- )
- doc = doc[0]
req = request.json
- if not req.get("chunk_ids"):
- return get_error_data_result("`chunk_ids` is required")
- query = {"doc_ids": [doc.id], "page": 1, "size": 1024, "question": "", "sort": True}
- sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
- if not req:
- chunk_ids = None
- else:
- chunk_ids = req.get("chunk_ids")
- if not chunk_ids:
- chunk_list = sres.ids
- else:
- chunk_list = chunk_ids
- for chunk_id in chunk_list:
- if chunk_id not in sres.ids:
- return get_error_data_result(f"Chunk {chunk_id} not found")
- if not ELASTICSEARCH.deleteByQuery(
- Q("ids", values=chunk_list), search.index_name(tenant_id)
- ):
- return get_error_data_result(message="Index updating failure")
- deleted_chunk_ids = chunk_list
- chunk_number = len(deleted_chunk_ids)
- DocumentService.decrement_chunk_num(doc.id, doc.kb_id, 1, chunk_number, 0)
- return get_result()
+ condition = {"doc_id": document_id}
+ if "chunk_ids" in req:
+ condition["id"] = req["chunk_ids"]
+ chunk_number = docStoreConn.delete(condition, search.index_name(tenant_id), dataset_id)
+ if chunk_number != 0:
+ DocumentService.decrement_chunk_num(document_id, dataset_id, 1, chunk_number, 0)
+ if "chunk_ids" in req and chunk_number != len(req["chunk_ids"]):
+ return get_error_data_result(message=f"rm_chunk deleted chunks {chunk_number}, expect {len(req["chunk_ids"])}")
+ return get_result(message=f"deleted {chunk_number} chunks")
@manager.route(
@@ -1168,9 +1143,8 @@ def update_chunk(tenant_id, dataset_id, document_id, chunk_id):
schema:
type: object
"""
- try:
- res = ELASTICSEARCH.get(chunk_id, search.index_name(tenant_id))
- except Exception:
+ chunk = docStoreConn.get(chunk_id, search.index_name(tenant_id), [dataset_id])
+ if chunk is None:
return get_error_data_result(f"Can't find this chunk {chunk_id}")
if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
return get_error_data_result(message=f"You don't own the dataset {dataset_id}.")
@@ -1180,19 +1154,12 @@ def update_chunk(tenant_id, dataset_id, document_id, chunk_id):
message=f"You don't own the document {document_id}."
)
doc = doc[0]
- query = {
- "doc_ids": [document_id],
- "page": 1,
- "size": 1024,
- "question": "",
- "sort": True,
- }
- sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
- if chunk_id not in sres.ids:
- return get_error_data_result(f"You don't own the chunk {chunk_id}")
req = request.json
- content = res["_source"].get("content_with_weight")
- d = {"id": chunk_id, "content_with_weight": req.get("content", content)}
+ if "content" in req:
+ content = req["content"]
+ else:
+ content = chunk.get("content_with_weight", "")
+ d = {"id": chunk_id, "content_with_weight": content}
d["content_ltks"] = rag_tokenizer.tokenize(d["content_with_weight"])
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
if "important_keywords" in req:
@@ -1220,7 +1187,7 @@ def update_chunk(tenant_id, dataset_id, document_id, chunk_id):
v, c = embd_mdl.encode([doc.name, d["content_with_weight"]])
v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
d["q_%d_vec" % len(v)] = v.tolist()
- ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
+ docStoreConn.update({"id": chunk_id}, d, search.index_name(tenant_id), dataset_id)
return get_result()
diff --git a/api/apps/system_app.py b/api/apps/system_app.py
index 0ebe075ab23..33f89fc2fdf 100644
--- a/api/apps/system_app.py
+++ b/api/apps/system_app.py
@@ -31,7 +31,7 @@
generate_confirmation_token,
)
from api.versions import get_rag_version
-from rag.utils.es_conn import ELASTICSEARCH
+from api.settings import docStoreConn
from rag.utils.storage_factory import STORAGE_IMPL, STORAGE_IMPL_TYPE
from timeit import default_timer as timer
@@ -98,10 +98,11 @@ def status():
res = {}
st = timer()
try:
- res["es"] = ELASTICSEARCH.health()
- res["es"]["elapsed"] = "{:.1f}".format((timer() - st) * 1000.0)
+ res["doc_store"] = docStoreConn.health()
+ res["doc_store"]["elapsed"] = "{:.1f}".format((timer() - st) * 1000.0)
except Exception as e:
- res["es"] = {
+ res["doc_store"] = {
+ "type": "unknown",
"status": "red",
"elapsed": "{:.1f}".format((timer() - st) * 1000.0),
"error": str(e),
diff --git a/api/db/db_models.py b/api/db/db_models.py
index 1ce8b51283d..3938cb3a306 100644
--- a/api/db/db_models.py
+++ b/api/db/db_models.py
@@ -470,7 +470,7 @@ class User(DataBaseModel, UserMixin):
status = CharField(
max_length=1,
null=True,
- help_text="is it validate(0: wasted,1: validate)",
+ help_text="is it validate(0: wasted, 1: validate)",
default="1",
index=True)
is_superuser = BooleanField(null=True, help_text="is root", default=False, index=True)
@@ -525,7 +525,7 @@ class Tenant(DataBaseModel):
status = CharField(
max_length=1,
null=True,
- help_text="is it validate(0: wasted,1: validate)",
+ help_text="is it validate(0: wasted, 1: validate)",
default="1",
index=True)
@@ -542,7 +542,7 @@ class UserTenant(DataBaseModel):
status = CharField(
max_length=1,
null=True,
- help_text="is it validate(0: wasted,1: validate)",
+ help_text="is it validate(0: wasted, 1: validate)",
default="1",
index=True)
@@ -559,7 +559,7 @@ class InvitationCode(DataBaseModel):
status = CharField(
max_length=1,
null=True,
- help_text="is it validate(0: wasted,1: validate)",
+ help_text="is it validate(0: wasted, 1: validate)",
default="1",
index=True)
@@ -582,7 +582,7 @@ class LLMFactories(DataBaseModel):
status = CharField(
max_length=1,
null=True,
- help_text="is it validate(0: wasted,1: validate)",
+ help_text="is it validate(0: wasted, 1: validate)",
default="1",
index=True)
@@ -616,7 +616,7 @@ class LLM(DataBaseModel):
status = CharField(
max_length=1,
null=True,
- help_text="is it validate(0: wasted,1: validate)",
+ help_text="is it validate(0: wasted, 1: validate)",
default="1",
index=True)
@@ -703,7 +703,7 @@ class Knowledgebase(DataBaseModel):
status = CharField(
max_length=1,
null=True,
- help_text="is it validate(0: wasted,1: validate)",
+ help_text="is it validate(0: wasted, 1: validate)",
default="1",
index=True)
@@ -767,7 +767,7 @@ class Document(DataBaseModel):
status = CharField(
max_length=1,
null=True,
- help_text="is it validate(0: wasted,1: validate)",
+ help_text="is it validate(0: wasted, 1: validate)",
default="1",
index=True)
@@ -904,7 +904,7 @@ class Dialog(DataBaseModel):
status = CharField(
max_length=1,
null=True,
- help_text="is it validate(0: wasted,1: validate)",
+ help_text="is it validate(0: wasted, 1: validate)",
default="1",
index=True)
@@ -987,7 +987,7 @@ def migrate_db():
help_text="where dose this document come from",
index=True))
)
- except Exception as e:
+ except Exception:
pass
try:
migrate(
@@ -996,7 +996,7 @@ def migrate_db():
help_text="default rerank model ID"))
)
- except Exception as e:
+ except Exception:
pass
try:
migrate(
@@ -1004,59 +1004,59 @@ def migrate_db():
help_text="default rerank model ID"))
)
- except Exception as e:
+ except Exception:
pass
try:
migrate(
migrator.add_column('dialog', 'top_k', IntegerField(default=1024))
)
- except Exception as e:
+ except Exception:
pass
try:
migrate(
migrator.alter_column_type('tenant_llm', 'api_key',
CharField(max_length=1024, null=True, help_text="API KEY", index=True))
)
- except Exception as e:
+ except Exception:
pass
try:
migrate(
migrator.add_column('api_token', 'source',
CharField(max_length=16, null=True, help_text="none|agent|dialog", index=True))
)
- except Exception as e:
+ except Exception:
pass
try:
migrate(
migrator.add_column("tenant","tts_id",
CharField(max_length=256,null=True,help_text="default tts model ID",index=True))
)
- except Exception as e:
+ except Exception:
pass
try:
migrate(
migrator.add_column('api_4_conversation', 'source',
CharField(max_length=16, null=True, help_text="none|agent|dialog", index=True))
)
- except Exception as e:
+ except Exception:
pass
try:
DB.execute_sql('ALTER TABLE llm DROP PRIMARY KEY;')
DB.execute_sql('ALTER TABLE llm ADD PRIMARY KEY (llm_name,fid);')
- except Exception as e:
+ except Exception:
pass
try:
migrate(
migrator.add_column('task', 'retry_count', IntegerField(default=0))
)
- except Exception as e:
+ except Exception:
pass
try:
migrate(
migrator.alter_column_type('api_token', 'dialog_id',
CharField(max_length=32, null=True, index=True))
)
- except Exception as e:
+ except Exception:
pass
diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py
index d9ab1365e1c..51fd28c4ddb 100644
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@@ -15,7 +15,6 @@
#
import hashlib
import json
-import os
import random
import re
import traceback
@@ -24,16 +23,13 @@
from datetime import datetime
from io import BytesIO
-from elasticsearch_dsl import Q
from peewee import fn
from api.db.db_utils import bulk_insert_into_db
-from api.settings import stat_logger
+from api.settings import stat_logger, docStoreConn
from api.utils import current_timestamp, get_format_time, get_uuid
-from api.utils.file_utils import get_project_base_directory
from graphrag.mind_map_extractor import MindMapExtractor
from rag.settings import SVR_QUEUE_NAME
-from rag.utils.es_conn import ELASTICSEARCH
from rag.utils.storage_factory import STORAGE_IMPL
from rag.nlp import search, rag_tokenizer
@@ -112,8 +108,7 @@ def insert(cls, doc):
@classmethod
@DB.connection_context()
def remove_document(cls, doc, tenant_id):
- ELASTICSEARCH.deleteByQuery(
- Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
+ docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id)
cls.clear_chunk_num(doc.id)
return cls.delete_by_id(doc.id)
@@ -225,6 +220,15 @@ def get_tenant_id(cls, doc_id):
return
return docs[0]["tenant_id"]
+ @classmethod
+ @DB.connection_context()
+ def get_knowledgebase_id(cls, doc_id):
+ docs = cls.model.select(cls.model.kb_id).where(cls.model.id == doc_id)
+ docs = docs.dicts()
+ if not docs:
+ return
+ return docs[0]["kb_id"]
+
@classmethod
@DB.connection_context()
def get_tenant_id_by_name(cls, name):
@@ -438,11 +442,6 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
if not e:
raise LookupError("Can't find this knowledgebase!")
- idxnm = search.index_name(kb.tenant_id)
- if not ELASTICSEARCH.indexExist(idxnm):
- ELASTICSEARCH.createIdx(idxnm, json.load(
- open(os.path.join(get_project_base_directory(), "conf", "mapping.json"), "r")))
-
embd_mdl = LLMBundle(kb.tenant_id, LLMType.EMBEDDING, llm_name=kb.embd_id, lang=kb.language)
err, files = FileService.upload_document(kb, file_objs, user_id)
@@ -486,7 +485,7 @@ def dummy(prog=None, msg=""):
md5 = hashlib.md5()
md5.update((ck["content_with_weight"] +
str(d["doc_id"])).encode("utf-8"))
- d["_id"] = md5.hexdigest()
+ d["id"] = md5.hexdigest()
d["create_time"] = str(datetime.now()).replace("T", " ")[:19]
d["create_timestamp_flt"] = datetime.now().timestamp()
if not d.get("image"):
@@ -499,8 +498,8 @@ def dummy(prog=None, msg=""):
else:
d["image"].save(output_buffer, format='JPEG')
- STORAGE_IMPL.put(kb.id, d["_id"], output_buffer.getvalue())
- d["img_id"] = "{}-{}".format(kb.id, d["_id"])
+ STORAGE_IMPL.put(kb.id, d["id"], output_buffer.getvalue())
+ d["img_id"] = "{}-{}".format(kb.id, d["id"])
del d["image"]
docs.append(d)
@@ -520,6 +519,9 @@ def embedding(doc_id, cnts, batch_size=16):
token_counts[doc_id] += c
return vects
+ idxnm = search.index_name(kb.tenant_id)
+ try_create_idx = True
+
_, tenant = TenantService.get_by_id(kb.tenant_id)
llm_bdl = LLMBundle(kb.tenant_id, LLMType.CHAT, tenant.llm_id)
for doc_id in docids:
@@ -550,7 +552,11 @@ def embedding(doc_id, cnts, batch_size=16):
v = vects[i]
d["q_%d_vec" % len(v)] = v
for b in range(0, len(cks), es_bulk_size):
- ELASTICSEARCH.bulk(cks[b:b + es_bulk_size], idxnm)
+ if try_create_idx:
+ if not docStoreConn.indexExist(idxnm, kb_id):
+ docStoreConn.createIdx(idxnm, kb_id, len(vects[0]))
+ try_create_idx = False
+ docStoreConn.insert(cks[b:b + es_bulk_size], idxnm, kb_id)
DocumentService.increment_chunk_num(
doc_id, kb.id, token_counts[doc_id], chunk_counts[doc_id], 0)
diff --git a/api/db/services/knowledgebase_service.py b/api/db/services/knowledgebase_service.py
index 0920c753c6b..93a1a7c740b 100644
--- a/api/db/services/knowledgebase_service.py
+++ b/api/db/services/knowledgebase_service.py
@@ -66,6 +66,16 @@ def get_by_tenant_ids(cls, joined_tenant_ids, user_id,
return list(kbs.dicts())
+ @classmethod
+ @DB.connection_context()
+ def get_kb_ids(cls, tenant_id):
+ fields = [
+ cls.model.id,
+ ]
+ kbs = cls.model.select(*fields).where(cls.model.tenant_id == tenant_id)
+ kb_ids = [kb["id"] for kb in kbs]
+ return kb_ids
+
@classmethod
@DB.connection_context()
def get_detail(cls, kb_id):
diff --git a/api/settings.py b/api/settings.py
index 7fb6c41c4ca..46fb612c448 100644
--- a/api/settings.py
+++ b/api/settings.py
@@ -18,6 +18,8 @@
from enum import IntEnum, Enum
from api.utils.file_utils import get_project_base_directory
from api.utils.log_utils import LoggerFactory, getLogger
+import rag.utils.es_conn
+import rag.utils.infinity_conn
# Logger
LoggerFactory.set_directory(
@@ -33,7 +35,7 @@
database_logger = getLogger("database")
chat_logger = getLogger("chat")
-from rag.utils.es_conn import ELASTICSEARCH
+import rag.utils
from rag.nlp import search
from graphrag import search as kg_search
from api.utils import get_base_config, decrypt_database_config
@@ -206,8 +208,12 @@
PRIVILEGE_COMMAND_WHITELIST = []
CHECK_NODES_IDENTITY = False
-retrievaler = search.Dealer(ELASTICSEARCH)
-kg_retrievaler = kg_search.KGSearch(ELASTICSEARCH)
+if 'username' in get_base_config("es", {}):
+ docStoreConn = rag.utils.es_conn.ESConnection()
+else:
+ docStoreConn = rag.utils.infinity_conn.InfinityConnection()
+retrievaler = search.Dealer(docStoreConn)
+kg_retrievaler = kg_search.KGSearch(docStoreConn)
class CustomEnum(Enum):
diff --git a/api/utils/api_utils.py b/api/utils/api_utils.py
index 6a8b30bf52a..815506a27ed 100644
--- a/api/utils/api_utils.py
+++ b/api/utils/api_utils.py
@@ -126,10 +126,6 @@ def server_error_response(e):
if len(e.args) > 1:
return get_json_result(
code=RetCode.EXCEPTION_ERROR, message=repr(e.args[0]), data=e.args[1])
- if repr(e).find("index_not_found_exception") >= 0:
- return get_json_result(code=RetCode.EXCEPTION_ERROR,
- message="No chunk found, please upload file and parse it.")
-
return get_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e))
@@ -270,10 +266,6 @@ def construct_error_response(e):
pass
if len(e.args) > 1:
return construct_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e.args[0]), data=e.args[1])
- if repr(e).find("index_not_found_exception") >= 0:
- return construct_json_result(code=RetCode.EXCEPTION_ERROR,
- message="No chunk found, please upload file and parse it.")
-
return construct_json_result(code=RetCode.EXCEPTION_ERROR, message=repr(e))
@@ -295,7 +287,7 @@ def decorated_function(*args, **kwargs):
return decorated_function
-def get_result(code=RetCode.SUCCESS, message='error', data=None):
+def get_result(code=RetCode.SUCCESS, message="", data=None):
if code == 0:
if data is not None:
response = {"code": code, "data": data}
diff --git a/conf/infinity_mapping.json b/conf/infinity_mapping.json
new file mode 100644
index 00000000000..17f5e86c979
--- /dev/null
+++ b/conf/infinity_mapping.json
@@ -0,0 +1,26 @@
+{
+ "id": {"type": "varchar", "default": ""},
+ "doc_id": {"type": "varchar", "default": ""},
+ "kb_id": {"type": "varchar", "default": ""},
+ "create_time": {"type": "varchar", "default": ""},
+ "create_timestamp_flt": {"type": "float", "default": 0.0},
+ "img_id": {"type": "varchar", "default": ""},
+ "docnm_kwd": {"type": "varchar", "default": ""},
+ "title_tks": {"type": "varchar", "default": ""},
+ "title_sm_tks": {"type": "varchar", "default": ""},
+ "name_kwd": {"type": "varchar", "default": ""},
+ "important_kwd": {"type": "varchar", "default": ""},
+ "important_tks": {"type": "varchar", "default": ""},
+ "content_with_weight": {"type": "varchar", "default": ""},
+ "content_ltks": {"type": "varchar", "default": ""},
+ "content_sm_ltks": {"type": "varchar", "default": ""},
+ "page_num_list": {"type": "varchar", "default": ""},
+ "top_list": {"type": "varchar", "default": ""},
+ "position_list": {"type": "varchar", "default": ""},
+ "weight_int": {"type": "integer", "default": 0},
+ "weight_flt": {"type": "float", "default": 0.0},
+ "rank_int": {"type": "integer", "default": 0},
+ "available_int": {"type": "integer", "default": 1},
+ "knowledge_graph_kwd": {"type": "varchar", "default": ""},
+ "entities_kwd": {"type": "varchar", "default": ""}
+}
diff --git a/conf/mapping.json b/conf/mapping.json
index c8831346aaf..b4331f2be19 100644
--- a/conf/mapping.json
+++ b/conf/mapping.json
@@ -1,200 +1,203 @@
- {
+{
"settings": {
"index": {
"number_of_shards": 2,
"number_of_replicas": 0,
- "refresh_interval" : "1000ms"
+ "refresh_interval": "1000ms"
},
"similarity": {
- "scripted_sim": {
- "type": "scripted",
- "script": {
- "source": "double idf = Math.log(1+(field.docCount-term.docFreq+0.5)/(term.docFreq + 0.5))/Math.log(1+((field.docCount-0.5)/1.5)); return query.boost * idf * Math.min(doc.freq, 1);"
- }
+ "scripted_sim": {
+ "type": "scripted",
+ "script": {
+ "source": "double idf = Math.log(1+(field.docCount-term.docFreq+0.5)/(term.docFreq + 0.5))/Math.log(1+((field.docCount-0.5)/1.5)); return query.boost * idf * Math.min(doc.freq, 1);"
}
+ }
}
},
"mappings": {
- "properties": {
- "lat_lon": {"type": "geo_point", "store":"true"}
- },
- "date_detection": "true",
- "dynamic_templates": [
- {
- "int": {
- "match": "*_int",
- "mapping": {
- "type": "integer",
- "store": "true"
- }
- }
- },
- {
- "ulong": {
- "match": "*_ulong",
- "mapping": {
- "type": "unsigned_long",
- "store": "true"
- }
- }
- },
- {
- "long": {
- "match": "*_long",
- "mapping": {
- "type": "long",
- "store": "true"
- }
- }
- },
- {
- "short": {
- "match": "*_short",
- "mapping": {
- "type": "short",
- "store": "true"
- }
- }
- },
- {
- "numeric": {
- "match": "*_flt",
- "mapping": {
- "type": "float",
- "store": true
- }
- }
- },
- {
- "tks": {
- "match": "*_tks",
- "mapping": {
- "type": "text",
- "similarity": "scripted_sim",
- "analyzer": "whitespace",
- "store": true
- }
- }
- },
- {
- "ltks":{
- "match": "*_ltks",
- "mapping": {
- "type": "text",
- "analyzer": "whitespace",
- "store": true
- }
- }
- },
- {
- "kwd": {
- "match_pattern": "regex",
- "match": "^(.*_(kwd|id|ids|uid|uids)|uid)$",
- "mapping": {
- "type": "keyword",
- "similarity": "boolean",
- "store": true
- }
- }
- },
- {
- "dt": {
- "match_pattern": "regex",
- "match": "^.*(_dt|_time|_at)$",
- "mapping": {
- "type": "date",
- "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM-dd_HH:mm:ss",
- "store": true
- }
- }
- },
- {
- "nested": {
- "match": "*_nst",
- "mapping": {
- "type": "nested"
- }
- }
- },
- {
- "object": {
- "match": "*_obj",
- "mapping": {
- "type": "object",
- "dynamic": "true"
- }
- }
- },
- {
- "string": {
- "match": "*_with_weight",
- "mapping": {
- "type": "text",
- "index": "false",
- "store": true
- }
- }
- },
- {
- "string": {
- "match": "*_fea",
- "mapping": {
- "type": "rank_feature"
- }
- }
- },
- {
- "dense_vector": {
- "match": "*_512_vec",
- "mapping": {
- "type": "dense_vector",
- "index": true,
- "similarity": "cosine",
- "dims": 512
- }
- }
- },
- {
- "dense_vector": {
- "match": "*_768_vec",
- "mapping": {
- "type": "dense_vector",
- "index": true,
- "similarity": "cosine",
- "dims": 768
- }
- }
- },
- {
- "dense_vector": {
- "match": "*_1024_vec",
- "mapping": {
- "type": "dense_vector",
- "index": true,
- "similarity": "cosine",
- "dims": 1024
- }
- }
- },
- {
- "dense_vector": {
- "match": "*_1536_vec",
- "mapping": {
- "type": "dense_vector",
- "index": true,
- "similarity": "cosine",
- "dims": 1536
- }
- }
- },
- {
- "binary": {
- "match": "*_bin",
- "mapping": {
- "type": "binary"
- }
- }
- }
- ]
- }
-}
+ "properties": {
+ "lat_lon": {
+ "type": "geo_point",
+ "store": "true"
+ }
+ },
+ "date_detection": "true",
+ "dynamic_templates": [
+ {
+ "int": {
+ "match": "*_int",
+ "mapping": {
+ "type": "integer",
+ "store": "true"
+ }
+ }
+ },
+ {
+ "ulong": {
+ "match": "*_ulong",
+ "mapping": {
+ "type": "unsigned_long",
+ "store": "true"
+ }
+ }
+ },
+ {
+ "long": {
+ "match": "*_long",
+ "mapping": {
+ "type": "long",
+ "store": "true"
+ }
+ }
+ },
+ {
+ "short": {
+ "match": "*_short",
+ "mapping": {
+ "type": "short",
+ "store": "true"
+ }
+ }
+ },
+ {
+ "numeric": {
+ "match": "*_flt",
+ "mapping": {
+ "type": "float",
+ "store": true
+ }
+ }
+ },
+ {
+ "tks": {
+ "match": "*_tks",
+ "mapping": {
+ "type": "text",
+ "similarity": "scripted_sim",
+ "analyzer": "whitespace",
+ "store": true
+ }
+ }
+ },
+ {
+ "ltks": {
+ "match": "*_ltks",
+ "mapping": {
+ "type": "text",
+ "analyzer": "whitespace",
+ "store": true
+ }
+ }
+ },
+ {
+ "kwd": {
+ "match_pattern": "regex",
+ "match": "^(.*_(kwd|id|ids|uid|uids)|uid)$",
+ "mapping": {
+ "type": "keyword",
+ "similarity": "boolean",
+ "store": true
+ }
+ }
+ },
+ {
+ "dt": {
+ "match_pattern": "regex",
+ "match": "^.*(_dt|_time|_at)$",
+ "mapping": {
+ "type": "date",
+ "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM-dd_HH:mm:ss",
+ "store": true
+ }
+ }
+ },
+ {
+ "nested": {
+ "match": "*_nst",
+ "mapping": {
+ "type": "nested"
+ }
+ }
+ },
+ {
+ "object": {
+ "match": "*_obj",
+ "mapping": {
+ "type": "object",
+ "dynamic": "true"
+ }
+ }
+ },
+ {
+ "string": {
+ "match": "*_(with_weight|list)$",
+ "mapping": {
+ "type": "text",
+ "index": "false",
+ "store": true
+ }
+ }
+ },
+ {
+ "string": {
+ "match": "*_fea",
+ "mapping": {
+ "type": "rank_feature"
+ }
+ }
+ },
+ {
+ "dense_vector": {
+ "match": "*_512_vec",
+ "mapping": {
+ "type": "dense_vector",
+ "index": true,
+ "similarity": "cosine",
+ "dims": 512
+ }
+ }
+ },
+ {
+ "dense_vector": {
+ "match": "*_768_vec",
+ "mapping": {
+ "type": "dense_vector",
+ "index": true,
+ "similarity": "cosine",
+ "dims": 768
+ }
+ }
+ },
+ {
+ "dense_vector": {
+ "match": "*_1024_vec",
+ "mapping": {
+ "type": "dense_vector",
+ "index": true,
+ "similarity": "cosine",
+ "dims": 1024
+ }
+ }
+ },
+ {
+ "dense_vector": {
+ "match": "*_1536_vec",
+ "mapping": {
+ "type": "dense_vector",
+ "index": true,
+ "similarity": "cosine",
+ "dims": 1536
+ }
+ }
+ },
+ {
+ "binary": {
+ "match": "*_bin",
+ "mapping": {
+ "type": "binary"
+ }
+ }
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/docker/.env b/docker/.env
index eccde03849a..b8d82a32b84 100644
--- a/docker/.env
+++ b/docker/.env
@@ -19,6 +19,11 @@ KIBANA_PASSWORD=infini_rag_flow
# Update it according to the available memory in the host machine.
MEM_LIMIT=8073741824
+# Port to expose Infinity API to the host
+INFINITY_THRIFT_PORT=23817
+INFINITY_HTTP_PORT=23820
+INFINITY_PSQL_PORT=5432
+
# The password for MySQL.
# When updated, you must revise the `mysql.password` entry in service_conf.yaml.
MYSQL_PASSWORD=infini_rag_flow
diff --git a/docker/docker-compose-base.yml b/docker/docker-compose-base.yml
index 300aa62ab77..88ee12307b9 100644
--- a/docker/docker-compose-base.yml
+++ b/docker/docker-compose-base.yml
@@ -6,6 +6,7 @@ services:
- esdata01:/usr/share/elasticsearch/data
ports:
- ${ES_PORT}:9200
+ env_file: .env
environment:
- node.name=es01
- ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
@@ -27,12 +28,40 @@ services:
retries: 120
networks:
- ragflow
- restart: always
+ restart: on-failure
+
+ # infinity:
+ # container_name: ragflow-infinity
+ # image: infiniflow/infinity:v0.5.0-dev2
+ # volumes:
+ # - infinity_data:/var/infinity
+ # ports:
+ # - ${INFINITY_THRIFT_PORT}:23817
+ # - ${INFINITY_HTTP_PORT}:23820
+ # - ${INFINITY_PSQL_PORT}:5432
+ # env_file: .env
+ # environment:
+ # - TZ=${TIMEZONE}
+ # mem_limit: ${MEM_LIMIT}
+ # ulimits:
+ # nofile:
+ # soft: 500000
+ # hard: 500000
+ # networks:
+ # - ragflow
+ # healthcheck:
+ # test: ["CMD", "curl", "http://localhost:23820/admin/node/current"]
+ # interval: 10s
+ # timeout: 10s
+ # retries: 120
+ # restart: on-failure
+
mysql:
# mysql:5.7 linux/arm64 image is unavailable.
image: mysql:8.0.39
container_name: ragflow-mysql
+ env_file: .env
environment:
- MYSQL_ROOT_PASSWORD=${MYSQL_PASSWORD}
- TZ=${TIMEZONE}
@@ -55,7 +84,7 @@ services:
interval: 10s
timeout: 10s
retries: 3
- restart: always
+ restart: on-failure
minio:
image: quay.io/minio/minio:RELEASE.2023-12-20T01-00-02Z
@@ -64,6 +93,7 @@ services:
ports:
- ${MINIO_PORT}:9000
- ${MINIO_CONSOLE_PORT}:9001
+ env_file: .env
environment:
- MINIO_ROOT_USER=${MINIO_USER}
- MINIO_ROOT_PASSWORD=${MINIO_PASSWORD}
@@ -72,25 +102,28 @@ services:
- minio_data:/data
networks:
- ragflow
- restart: always
+ restart: on-failure
redis:
image: valkey/valkey:8
container_name: ragflow-redis
command: redis-server --requirepass ${REDIS_PASSWORD} --maxmemory 128mb --maxmemory-policy allkeys-lru
+ env_file: .env
ports:
- ${REDIS_PORT}:6379
volumes:
- redis_data:/data
networks:
- ragflow
- restart: always
+ restart: on-failure
volumes:
esdata01:
driver: local
+ infinity_data:
+ driver: local
mysql_data:
driver: local
minio_data:
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index 24592c3f4b8..b8707935c8a 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -1,6 +1,5 @@
include:
- - path: ./docker-compose-base.yml
- env_file: ./.env
+ - ./docker-compose-base.yml
services:
ragflow:
@@ -15,19 +14,21 @@ services:
- ${SVR_HTTP_PORT}:9380
- 80:80
- 443:443
- - 5678:5678
volumes:
- ./service_conf.yaml:/ragflow/conf/service_conf.yaml
- ./ragflow-logs:/ragflow/logs
- ./nginx/ragflow.conf:/etc/nginx/conf.d/ragflow.conf
- ./nginx/proxy.conf:/etc/nginx/proxy.conf
- ./nginx/nginx.conf:/etc/nginx/nginx.conf
+ env_file: .env
environment:
- TZ=${TIMEZONE}
- HF_ENDPOINT=${HF_ENDPOINT}
- MACOS=${MACOS}
networks:
- ragflow
- restart: always
+ restart: on-failure
+ # https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
+ # If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
extra_hosts:
- "host.docker.internal:host-gateway"
diff --git a/docs/guides/develop/launch_ragflow_from_source.md b/docs/guides/develop/launch_ragflow_from_source.md
index 0584aa14a50..160c704abe1 100644
--- a/docs/guides/develop/launch_ragflow_from_source.md
+++ b/docs/guides/develop/launch_ragflow_from_source.md
@@ -67,7 +67,7 @@ docker compose -f docker/docker-compose-base.yml up -d
1. Add the following line to `/etc/hosts` to resolve all hosts specified in **docker/service_conf.yaml** to `127.0.0.1`:
```
- 127.0.0.1 es01 mysql minio redis
+ 127.0.0.1 es01 infinity mysql minio redis
```
2. In **docker/service_conf.yaml**, update mysql port to `5455` and es port to `1200`, as specified in **docker/.env**.
diff --git a/docs/references/http_api_reference.md b/docs/references/http_api_reference.md
index be6467a8809..1c4f53f4146 100644
--- a/docs/references/http_api_reference.md
+++ b/docs/references/http_api_reference.md
@@ -1280,7 +1280,7 @@ Success:
"document_keyword": "1.txt",
"highlight": "ragflow content",
"id": "d78435d142bd5cf6704da62c778795c5",
- "img_id": "",
+ "image_id": "",
"important_keywords": [
""
],
diff --git a/docs/references/python_api_reference.md b/docs/references/python_api_reference.md
index ae39b0d2696..ff3682e1993 100644
--- a/docs/references/python_api_reference.md
+++ b/docs/references/python_api_reference.md
@@ -1351,7 +1351,7 @@ A list of `Chunk` objects representing references to the message, each containin
The chunk ID.
- `content` `str`
The content of the chunk.
-- `image_id` `str`
+- `img_id` `str`
The ID of the snapshot of the chunk. Applicable only when the source of the chunk is an image, PPT, PPTX, or PDF file.
- `document_id` `str`
The ID of the referenced document.
diff --git a/graphrag/claim_extractor.py b/graphrag/claim_extractor.py
index d3c68642967..d986b24546a 100644
--- a/graphrag/claim_extractor.py
+++ b/graphrag/claim_extractor.py
@@ -254,9 +254,12 @@ def pull_field(index: int, fields: list[str]) -> str | None:
from api.db import LLMType
from api.db.services.llm_service import LLMBundle
from api.settings import retrievaler
+ from api.db.services.knowledgebase_service import KnowledgebaseService
+
+ kb_ids = KnowledgebaseService.get_kb_ids(args.tenant_id)
ex = ClaimExtractor(LLMBundle(args.tenant_id, LLMType.CHAT))
- docs = [d["content_with_weight"] for d in retrievaler.chunk_list(args.doc_id, args.tenant_id, max_count=12, fields=["content_with_weight"])]
+ docs = [d["content_with_weight"] for d in retrievaler.chunk_list(args.doc_id, args.tenant_id, kb_ids, max_count=12, fields=["content_with_weight"])]
info = {
"input_text": docs,
"entity_specs": "organization, person",
diff --git a/graphrag/search.py b/graphrag/search.py
index a5574466aff..c5ce08b3fe9 100644
--- a/graphrag/search.py
+++ b/graphrag/search.py
@@ -15,95 +15,90 @@
#
import json
from copy import deepcopy
+from typing import Dict
import pandas as pd
-from elasticsearch_dsl import Q, Search
+from rag.utils.doc_store_conn import OrderByExpr, FusionExpr
from rag.nlp.search import Dealer
class KGSearch(Dealer):
- def search(self, req, idxnm, emb_mdl=None, highlight=False):
- def merge_into_first(sres, title=""):
- df,texts = [],[]
- for d in sres["hits"]["hits"]:
+ def search(self, req, idxnm, kb_ids, emb_mdl, highlight=False):
+ def merge_into_first(sres, title="") -> Dict[str, str]:
+ if not sres:
+ return {}
+ content_with_weight = ""
+ df, texts = [],[]
+ for d in sres.values():
try:
- df.append(json.loads(d["_source"]["content_with_weight"]))
- except Exception as e:
- texts.append(d["_source"]["content_with_weight"])
- pass
- if not df and not texts: return False
+ df.append(json.loads(d["content_with_weight"]))
+ except Exception:
+ texts.append(d["content_with_weight"])
if df:
- try:
- sres["hits"]["hits"][0]["_source"]["content_with_weight"] = title + "\n" + pd.DataFrame(df).to_csv()
- except Exception as e:
- pass
+ content_with_weight = title + "\n" + pd.DataFrame(df).to_csv()
else:
- sres["hits"]["hits"][0]["_source"]["content_with_weight"] = title + "\n" + "\n".join(texts)
- return True
+ content_with_weight = title + "\n" + "\n".join(texts)
+ first_id = ""
+ first_source = {}
+ for k, v in sres.items():
+ first_id = id
+ first_source = deepcopy(v)
+ break
+ first_source["content_with_weight"] = content_with_weight
+ first_id = next(iter(sres))
+ return {first_id: first_source}
+
+ qst = req.get("question", "")
+ matchText, keywords = self.qryr.question(qst, min_match=0.05)
+ condition = self.get_filters(req)
+ ## Entity retrieval
+ condition.update({"knowledge_graph_kwd": ["entity"]})
+ assert emb_mdl, "No embedding model selected"
+ matchDense = self.get_vector(qst, emb_mdl, 1024, req.get("similarity", 0.1))
+ q_vec = matchDense.embedding_data
src = req.get("fields", ["docnm_kwd", "content_ltks", "kb_id", "img_id", "title_tks", "important_kwd",
- "image_id", "doc_id", "q_512_vec", "q_768_vec", "position_int", "name_kwd",
+ "doc_id", f"q_{len(q_vec)}_vec", "position_list", "name_kwd",
"q_1024_vec", "q_1536_vec", "available_int", "content_with_weight",
"weight_int", "weight_flt", "rank_int"
])
- qst = req.get("question", "")
- binary_query, keywords = self.qryr.question(qst, min_match="5%")
- binary_query = self._add_filters(binary_query, req)
+ fusionExpr = FusionExpr("weighted_sum", 32, {"weights": "0.5, 0.5"})
- ## Entity retrieval
- bqry = deepcopy(binary_query)
- bqry.filter.append(Q("terms", knowledge_graph_kwd=["entity"]))
- s = Search()
- s = s.query(bqry)[0: 32]
-
- s = s.to_dict()
- q_vec = []
- if req.get("vector"):
- assert emb_mdl, "No embedding model selected"
- s["knn"] = self._vector(
- qst, emb_mdl, req.get(
- "similarity", 0.1), 1024)
- s["knn"]["filter"] = bqry.to_dict()
- q_vec = s["knn"]["query_vector"]
-
- ent_res = self.es.search(deepcopy(s), idxnms=idxnm, timeout="600s", src=src)
- entities = [d["name_kwd"] for d in self.es.getSource(ent_res)]
- ent_ids = self.es.getDocIds(ent_res)
- if merge_into_first(ent_res, "-Entities-"):
- ent_ids = ent_ids[0:1]
+ ent_res = self.dataStore.search(src, list(), condition, [matchText, matchDense, fusionExpr], OrderByExpr(), 0, 32, idxnm, kb_ids)
+ ent_res_fields = self.dataStore.getFields(ent_res, src)
+ entities = [d["name_kwd"] for d in ent_res_fields.values()]
+ ent_ids = self.dataStore.getChunkIds(ent_res)
+ ent_content = merge_into_first(ent_res_fields, "-Entities-")
+ if ent_content:
+ ent_ids = list(ent_content.keys())
## Community retrieval
- bqry = deepcopy(binary_query)
- bqry.filter.append(Q("terms", entities_kwd=entities))
- bqry.filter.append(Q("terms", knowledge_graph_kwd=["community_report"]))
- s = Search()
- s = s.query(bqry)[0: 32]
- s = s.to_dict()
- comm_res = self.es.search(deepcopy(s), idxnms=idxnm, timeout="600s", src=src)
- comm_ids = self.es.getDocIds(comm_res)
- if merge_into_first(comm_res, "-Community Report-"):
- comm_ids = comm_ids[0:1]
+ condition = self.get_filters(req)
+ condition.update({"entities_kwd": entities, "knowledge_graph_kwd": ["community_report"]})
+ comm_res = self.dataStore.search(src, list(), condition, [matchText, matchDense, fusionExpr], OrderByExpr(), 0, 32, idxnm, kb_ids)
+ comm_res_fields = self.dataStore.getFields(comm_res, src)
+ comm_ids = self.dataStore.getChunkIds(comm_res)
+ comm_content = merge_into_first(comm_res_fields, "-Community Report-")
+ if comm_content:
+ comm_ids = list(comm_content.keys())
## Text content retrieval
- bqry = deepcopy(binary_query)
- bqry.filter.append(Q("terms", knowledge_graph_kwd=["text"]))
- s = Search()
- s = s.query(bqry)[0: 6]
- s = s.to_dict()
- txt_res = self.es.search(deepcopy(s), idxnms=idxnm, timeout="600s", src=src)
- txt_ids = self.es.getDocIds(txt_res)
- if merge_into_first(txt_res, "-Original Content-"):
- txt_ids = txt_ids[0:1]
+ condition = self.get_filters(req)
+ condition.update({"knowledge_graph_kwd": ["text"]})
+ txt_res = self.dataStore.search(src, list(), condition, [matchText, matchDense, fusionExpr], OrderByExpr(), 0, 6, idxnm, kb_ids)
+ txt_res_fields = self.dataStore.getFields(txt_res, src)
+ txt_ids = self.dataStore.getChunkIds(txt_res)
+ txt_content = merge_into_first(txt_res_fields, "-Original Content-")
+ if txt_content:
+ txt_ids = list(txt_content.keys())
return self.SearchResult(
total=len(ent_ids) + len(comm_ids) + len(txt_ids),
ids=[*ent_ids, *comm_ids, *txt_ids],
query_vector=q_vec,
- aggregation=None,
highlight=None,
- field={**self.getFields(ent_res, src), **self.getFields(comm_res, src), **self.getFields(txt_res, src)},
+ field={**ent_content, **comm_content, **txt_content},
keywords=[]
)
-
diff --git a/graphrag/smoke.py b/graphrag/smoke.py
index b2efbc91fd2..3d0ae370a23 100644
--- a/graphrag/smoke.py
+++ b/graphrag/smoke.py
@@ -31,10 +31,13 @@
from api.db import LLMType
from api.db.services.llm_service import LLMBundle
from api.settings import retrievaler
+ from api.db.services.knowledgebase_service import KnowledgebaseService
+
+ kb_ids = KnowledgebaseService.get_kb_ids(args.tenant_id)
ex = GraphExtractor(LLMBundle(args.tenant_id, LLMType.CHAT))
docs = [d["content_with_weight"] for d in
- retrievaler.chunk_list(args.doc_id, args.tenant_id, max_count=6, fields=["content_with_weight"])]
+ retrievaler.chunk_list(args.doc_id, args.tenant_id, kb_ids, max_count=6, fields=["content_with_weight"])]
graph = ex(docs)
er = EntityResolution(LLMBundle(args.tenant_id, LLMType.CHAT))
diff --git a/poetry.lock b/poetry.lock
index 6dcaa034bfe..a40e666fdca 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,13 +1,14 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
[[package]]
name = "accelerate"
-version = "1.1.0"
+version = "1.1.1"
description = "Accelerate"
optional = false
python-versions = ">=3.9.0"
files = [
- {file = "accelerate-1.1.0-py3-none-any.whl", hash = "sha256:babee98bd3692cfb0537db3f96f36b5d4e29809776b406d32aaf593d4eeb574c"},
+ {file = "accelerate-1.1.1-py3-none-any.whl", hash = "sha256:61edd81762131b8d4bede008643fa1e1f3bf59bec710ebda9771443e24feae02"},
+ {file = "accelerate-1.1.1.tar.gz", hash = "sha256:0d39dfac557052bc735eb2703a0e87742879e1e40b88af8a2f9a93233d4cd7db"},
]
[package.dependencies]
@@ -208,17 +209,17 @@ files = [
[[package]]
name = "akshare"
-version = "1.15.12"
+version = "1.15.20"
description = "AKShare is an elegant and simple financial data interface library for Python, built for human beings!"
optional = false
python-versions = ">=3.8"
files = [
- {file = "akshare-1.15.12-py3-none-any.whl", hash = "sha256:1f3bae6ce885498cf1b05ffc20800651e2c5e4bedfc90fa0cfd944d1de457a81"},
- {file = "akshare-1.15.12.tar.gz", hash = "sha256:089029ffc9dc6d98e39071961d394bbb89b4fdcd76820f598d3743c0a61dd0ae"},
+ {file = "akshare-1.15.20-py3-none-any.whl", hash = "sha256:525338e6b28babf3d4e22d80958faafe8019155642f7dd82be132ebfc269ff6e"},
+ {file = "akshare-1.15.20.tar.gz", hash = "sha256:dca6a70c3424996b4305d4cf4e985c98007bb7ebbb943c0948134224c8c22523"},
]
[package.dependencies]
-akracer = {version = ">=0.0.13", markers = "platform_system == \"Linux\""}
+akracer = {version = ">=0.0.13", extras = ["py-mini-racer"], markers = "platform_system == \"Linux\""}
beautifulsoup4 = ">=4.9.1"
decorator = ">=4.4.2"
html5lib = ">=1.0.1"
@@ -423,27 +424,27 @@ requests = ">=2.32.0,<2.33.0"
[[package]]
name = "aspose-slides"
-version = "24.10.0"
+version = "24.11.0"
description = "Aspose.Slides for Python via .NET is a presentation file formats processing library for working with Microsoft PowerPoint files without using Microsoft PowerPoint."
optional = false
-python-versions = ">=3.5,<3.13"
+python-versions = ">=3.5,<3.14"
files = [
- {file = "Aspose.Slides-24.10.0-py3-none-macosx_10_14_x86_64.whl", hash = "sha256:a9d810ee3277b286c8218fbfa776f9ab46f1de285cb120954ff5031c1a59d959"},
- {file = "Aspose.Slides-24.10.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:0ff7ee9a8c00855dc315a7e6cd14022547e5015828b182124ceda0de3d6e3a94"},
- {file = "Aspose.Slides-24.10.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:506a5f4c1955599eacaeeb3a3badb273c11ed8a4a208035af83a243b8c8cdc8a"},
- {file = "Aspose.Slides-24.10.0-py3-none-win32.whl", hash = "sha256:f2b2770af6470d4ed4160f7b16b0666c80db11e611c6a4d60f841be87afdcbf1"},
- {file = "Aspose.Slides-24.10.0-py3-none-win_amd64.whl", hash = "sha256:8980015fbc32c1e70e80444c70a642597511300ead6b352183bf74ba3da67f2d"},
+ {file = "Aspose.Slides-24.11.0-py3-none-macosx_10_14_x86_64.whl", hash = "sha256:b4819364497f9e075e00e63ee8fba8745dda4c910e199d5201e4abeebdcdec89"},
+ {file = "Aspose.Slides-24.11.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:bbeb5f0b14901f29f209beeac694a183f8d36c9475556ddeed3b2edb8107536a"},
+ {file = "Aspose.Slides-24.11.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:24d6287401863de1251fb366be4845e1693ff1c70f09f04fed1e2086561401f5"},
+ {file = "Aspose.Slides-24.11.0-py3-none-win32.whl", hash = "sha256:9e07bcb1c6b17f01d51d9d9c88b1fbc40da580e54ccabb6373e884e64f406a8b"},
+ {file = "Aspose.Slides-24.11.0-py3-none-win_amd64.whl", hash = "sha256:2b249848a0432cd2746d94011fe6258038c04615ef8606ddd1bb238f5e9d4f2f"},
]
[[package]]
name = "async-timeout"
-version = "5.0.0"
+version = "5.0.1"
description = "Timeout context manager for asyncio programs"
optional = false
python-versions = ">=3.8"
files = [
- {file = "async_timeout-5.0.0-py3-none-any.whl", hash = "sha256:904719a4bd6e0520047d0ddae220aabee67b877f7ca17bf8cea20f67f6247ae0"},
- {file = "async_timeout-5.0.0.tar.gz", hash = "sha256:49675ec889daacfe65ff66d2dde7dd1447a6f4b2f23721022e4ba121f8772a85"},
+ {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"},
+ {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
]
[[package]]
@@ -578,7 +579,7 @@ name = "bce-python-sdk"
version = "0.9.23"
description = "BCE SDK for python"
optional = false
-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,<4,>=2.7"
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, <4"
files = [
{file = "bce_python_sdk-0.9.23-py3-none-any.whl", hash = "sha256:8debe21a040e00060f6044877d594765ed7b18bc765c6bf16b878bca864140a3"},
{file = "bce_python_sdk-0.9.23.tar.gz", hash = "sha256:19739fed5cd0725356fc5ffa2acbdd8fb23f2a81edb91db21a03174551d0cf41"},
@@ -735,13 +736,13 @@ jsonld = ["PyLD (>=0.7.2)"]
[[package]]
name = "blinker"
-version = "1.8.2"
+version = "1.9.0"
description = "Fast, simple object-to-object and broadcast signaling"
optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
files = [
- {file = "blinker-1.8.2-py3-none-any.whl", hash = "sha256:1779309f71bf239144b9399d06ae925637cf6634cf6bd131104184531bf67c01"},
- {file = "blinker-1.8.2.tar.gz", hash = "sha256:8f77b09d3bf7c795e969e9486f39c2c5e9c39d4ee07424be2bc594ece9642d83"},
+ {file = "blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc"},
+ {file = "blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf"},
]
[[package]]
@@ -1913,7 +1914,7 @@ name = "fastembed"
version = "0.3.6"
description = "Fast, light, accurate library built for retrieval embedding generation"
optional = false
-python-versions = "<3.13,>=3.8.0"
+python-versions = ">=3.8.0,<3.13"
files = [
{file = "fastembed-0.3.6-py3-none-any.whl", hash = "sha256:2bf70edae28bb4ccd9e01617098c2075b0ba35b88025a3d22b0e1e85b2c488ce"},
{file = "fastembed-0.3.6.tar.gz", hash = "sha256:c93c8ec99b8c008c2d192d6297866b8d70ec7ac8f5696b34eb5ea91f85efd15f"},
@@ -2228,12 +2229,12 @@ woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"]
[[package]]
name = "free-proxy"
-version = "1.1.2"
+version = "1.1.3"
description = "Proxy scraper for further use"
optional = false
python-versions = ">=3.6"
files = [
- {file = "free_proxy-1.1.2.tar.gz", hash = "sha256:f7c361d3b6c82918ab845163c46ea574e6bd4ce54b1342e89804aaf07e53b848"},
+ {file = "free_proxy-1.1.3.tar.gz", hash = "sha256:6d82aa112e3df7725bdbf177e2110bccdf5f3bbd6e1c70b8616ec12ae3bbf98c"},
]
[package.dependencies]
@@ -2517,13 +2518,13 @@ uritemplate = ">=3.0.1,<5"
[[package]]
name = "google-auth"
-version = "2.35.0"
+version = "2.36.0"
description = "Google Authentication Library"
optional = false
python-versions = ">=3.7"
files = [
- {file = "google_auth-2.35.0-py2.py3-none-any.whl", hash = "sha256:25df55f327ef021de8be50bad0dfd4a916ad0de96da86cd05661c9297723ad3f"},
- {file = "google_auth-2.35.0.tar.gz", hash = "sha256:f4c64ed4e01e8e8b646ef34c018f8bf3338df0c8e37d8b3bba40e7f574a3278a"},
+ {file = "google_auth-2.36.0-py2.py3-none-any.whl", hash = "sha256:51a15d47028b66fd36e5c64a82d2d57480075bccc7da37cde257fc94177a61fb"},
+ {file = "google_auth-2.36.0.tar.gz", hash = "sha256:545e9618f2df0bcbb7dcbc45a546485b1212624716975a1ea5ae8149ce769ab1"},
]
[package.dependencies]
@@ -2821,7 +2822,7 @@ name = "graspologic"
version = "3.4.1"
description = "A set of Python modules for graph statistics"
optional = false
-python-versions = "<3.13,>=3.9"
+python-versions = ">=3.9,<3.13"
files = [
{file = "graspologic-3.4.1-py3-none-any.whl", hash = "sha256:c6563e087eda599bad1de831d4b7321c0daa7a82f4e85a7d7737ff67e07cdda2"},
{file = "graspologic-3.4.1.tar.gz", hash = "sha256:7561f0b852a2bccd351bff77e8db07d9892f9dfa35a420fdec01690e4fdc8075"},
@@ -3466,13 +3467,13 @@ type = ["pytest-mypy"]
[[package]]
name = "infinity-emb"
-version = "0.0.51"
-description = "Infinity is a high-throughput, low-latency REST API for serving vector embeddings, supporting a wide range of sentence-transformer models and frameworks."
+version = "0.0.66"
+description = "Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip."
optional = false
-python-versions = "<4,>=3.9"
+python-versions = ">=3.9,<4"
files = [
- {file = "infinity_emb-0.0.51-py3-none-any.whl", hash = "sha256:d4384e398189b619699c300dfc144160344e5de2c7b57da5831881688d6c2842"},
- {file = "infinity_emb-0.0.51.tar.gz", hash = "sha256:11b09959c15f9456cbfc9f3d8516ad97485ef54dc5c68f5d9d6279083b8fba9d"},
+ {file = "infinity_emb-0.0.66-py3-none-any.whl", hash = "sha256:1dc6ed9fa48e6cbe83650a7583dbbb4bc393900c39c326bb0aff2ddc090ac018"},
+ {file = "infinity_emb-0.0.66.tar.gz", hash = "sha256:9c9a361ccebf8e8f626c1f685286518d03d0c35e7d14179ae7c2500b4fc68b98"},
]
[package.dependencies]
@@ -3481,17 +3482,41 @@ huggingface_hub = "*"
numpy = ">=1.20.0,<2"
[package.extras]
-all = ["ctranslate2 (>=4.0.0,<5.0.0)", "diskcache", "einops", "fastapi (>=0.103.2)", "optimum[onnxruntime] (>=1.16.2)", "orjson (>=3.9.8,!=3.10.0)", "pillow", "prometheus-fastapi-instrumentator (>=6.1.0)", "pydantic (>=2.4.0,<3)", "rich (>=13,<14)", "sentence-transformers (>=3.0.1,<4.0.0)", "timm", "torch (>=2.2.1)", "typer[all] (>=0.9.0,<0.10.0)", "uvicorn[standard] (>=0.23.2,<0.24.0)"]
+all = ["colpali-engine (>=0.3.1,<0.4.0)", "ctranslate2 (>=4.0.0,<5.0.0)", "diskcache", "einops", "fastapi (>=0.103.2)", "optimum[onnxruntime] (>=1.23.1)", "orjson (>=3.9.8,!=3.10.0)", "pillow", "posthog", "prometheus-fastapi-instrumentator (>=6.1.0)", "pydantic (>=2.4.0,<3)", "rich (>=13,<14)", "sentence-transformers (>=3.0.1,<4.0.0)", "soundfile (>=0.12.1,<0.13.0)", "timm", "torch (>=2.2.1)", "torchvision", "typer[all] (>=0.12.5,<0.13.0)", "uvicorn[standard] (>=0.32.0,<0.33.0)"]
+audio = ["soundfile (>=0.12.1,<0.13.0)"]
cache = ["diskcache"]
ct2 = ["ctranslate2 (>=4.0.0,<5.0.0)", "sentence-transformers (>=3.0.1,<4.0.0)", "torch (>=2.2.1)", "transformers (>4.34.0,<=5.0)"]
einops = ["einops"]
logging = ["rich (>=13,<14)"]
onnxruntime-gpu = ["onnxruntime-gpu"]
-optimum = ["optimum[onnxruntime] (>=1.16.2)"]
-server = ["fastapi (>=0.103.2)", "orjson (>=3.9.8,!=3.10.0)", "prometheus-fastapi-instrumentator (>=6.1.0)", "pydantic (>=2.4.0,<3)", "rich (>=13,<14)", "typer[all] (>=0.9.0,<0.10.0)", "uvicorn[standard] (>=0.23.2,<0.24.0)"]
+optimum = ["optimum[onnxruntime] (>=1.23.1)"]
+server = ["fastapi (>=0.103.2)", "orjson (>=3.9.8,!=3.10.0)", "posthog", "prometheus-fastapi-instrumentator (>=6.1.0)", "pydantic (>=2.4.0,<3)", "rich (>=13,<14)", "typer[all] (>=0.12.5,<0.13.0)", "uvicorn[standard] (>=0.32.0,<0.33.0)"]
tensorrt = ["tensorrt (>=8.6.1,<9.0.0)"]
torch = ["sentence-transformers (>=3.0.1,<4.0.0)", "torch (>=2.2.1)"]
-vision = ["pillow", "timm"]
+vision = ["colpali-engine (>=0.3.1,<0.4.0)", "pillow", "timm", "torchvision"]
+
+[[package]]
+name = "infinity-sdk"
+version = "0.5.0.dev2"
+description = "infinity"
+optional = false
+python-versions = ">=3.10"
+files = [
+ {file = "infinity_sdk-0.5.0.dev2-py3-none-any.whl", hash = "sha256:7f26cdb913e88094b805fb5f76e55408341d8303558c5ebda6385a90d4e94bc0"},
+]
+
+[package.dependencies]
+numpy = ">=1.26.0,<1.27.0"
+openpyxl = ">=3.1.0,<3.2.0"
+pandas = ">=2.2.0,<2.3.0"
+polars = ">=1.9.0,<1.10.0"
+pyarrow = ">=17.0.0,<17.1.0"
+pydantic = ">=2.9.0,<2.10.0"
+pytest = ">=8.3.0,<8.4.0"
+requests = ">=2.32.0,<2.33.0"
+setuptools = ">=75.2.0,<75.3.0"
+sqlglot = ">=11.7.0,<11.8.0"
+thrift = ">=0.20.0,<0.21.0"
[[package]]
name = "iniconfig"
@@ -3525,7 +3550,7 @@ web-service = ["fastapi (>=0.109.0,<0.110.0)", "uvicorn (>=0.25.0,<0.26.0)"]
[[package]]
name = "intel-openmp"
version = "2021.4.0"
-description = "Intel® OpenMP* Runtime Library"
+description = "Intel OpenMP* Runtime Library"
optional = false
python-versions = "*"
files = [
@@ -3538,13 +3563,13 @@ files = [
[[package]]
name = "ir-datasets"
-version = "0.5.8"
+version = "0.5.9"
description = "provides a common interface to many IR ad-hoc ranking benchmarks, training datasets, etc."
optional = false
python-versions = ">=3.8"
files = [
- {file = "ir_datasets-0.5.8-py3-none-any.whl", hash = "sha256:fb8ded3f79d238da3d90e266f44adc7ca976dc867bf1f95a432cbd066b18bd6c"},
- {file = "ir_datasets-0.5.8.tar.gz", hash = "sha256:5120807dadbcb1c38a37cd9fe9be71edee617f862ba05695155b0931327984e0"},
+ {file = "ir_datasets-0.5.9-py3-none-any.whl", hash = "sha256:07c9bed07f31031f1da1bc02afc7a1077b1179a3af402d061f83bf6fb833b90a"},
+ {file = "ir_datasets-0.5.9.tar.gz", hash = "sha256:35c90980fbd0f4ea8fe22a1ab16d2bb6be3dc373cbd6dfab1d905f176a70e5ac"},
]
[package.dependencies]
@@ -5221,13 +5246,13 @@ numpy = [
[[package]]
name = "openpyxl"
-version = "3.1.2"
+version = "3.1.5"
description = "A Python library to read/write Excel 2010 xlsx/xlsm files"
optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.8"
files = [
- {file = "openpyxl-3.1.2-py2.py3-none-any.whl", hash = "sha256:f91456ead12ab3c6c2e9491cf33ba6d08357d802192379bb482f1033ade496f5"},
- {file = "openpyxl-3.1.2.tar.gz", hash = "sha256:a6f5977418eff3b2d5500d54d9db50c8277a368436f4e4f8ddb1be3422870184"},
+ {file = "openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2"},
+ {file = "openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"},
]
[package.dependencies]
@@ -5351,51 +5376,64 @@ attrs = ">=19.2.0"
[[package]]
name = "packaging"
-version = "24.1"
+version = "24.2"
description = "Core utilities for Python packages"
optional = false
python-versions = ">=3.8"
files = [
- {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"},
- {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"},
+ {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
+ {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
]
[[package]]
name = "pandas"
-version = "2.2.2"
+version = "2.2.3"
description = "Powerful data structures for data analysis, time series, and statistics"
optional = false
python-versions = ">=3.9"
files = [
- {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"},
- {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"},
- {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"},
- {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"},
- {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"},
- {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99"},
- {file = "pandas-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772"},
- {file = "pandas-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288"},
- {file = "pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151"},
- {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b"},
- {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee"},
- {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db"},
- {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1"},
- {file = "pandas-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24"},
- {file = "pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef"},
- {file = "pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce"},
- {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad"},
- {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad"},
- {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76"},
- {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"},
- {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"},
- {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"},
- {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"},
- {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"},
- {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"},
- {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"},
- {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57"},
- {file = "pandas-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4"},
- {file = "pandas-2.2.2.tar.gz", hash = "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54"},
+ {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"},
+ {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"},
+ {file = "pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed"},
+ {file = "pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57"},
+ {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42"},
+ {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f"},
+ {file = "pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645"},
+ {file = "pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039"},
+ {file = "pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd"},
+ {file = "pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698"},
+ {file = "pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc"},
+ {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3"},
+ {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32"},
+ {file = "pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5"},
+ {file = "pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9"},
+ {file = "pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4"},
+ {file = "pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3"},
+ {file = "pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319"},
+ {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8"},
+ {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a"},
+ {file = "pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13"},
+ {file = "pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015"},
+ {file = "pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28"},
+ {file = "pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0"},
+ {file = "pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24"},
+ {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659"},
+ {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb"},
+ {file = "pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d"},
+ {file = "pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468"},
+ {file = "pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18"},
+ {file = "pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2"},
+ {file = "pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4"},
+ {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d"},
+ {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a"},
+ {file = "pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc6b93f9b966093cb0fd62ff1a7e4c09e6d546ad7c1de191767baffc57628f39"},
+ {file = "pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5dbca4c1acd72e8eeef4753eeca07de9b1db4f398669d5994086f788a5d7cc30"},
+ {file = "pandas-2.2.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8cd6d7cc958a3910f934ea8dbdf17b2364827bb4dafc38ce6eef6bb3d65ff09c"},
+ {file = "pandas-2.2.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99df71520d25fade9db7c1076ac94eb994f4d2673ef2aa2e86ee039b6746d20c"},
+ {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31d0ced62d4ea3e231a9f228366919a5ea0b07440d9d4dac345376fd8e1477ea"},
+ {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7eee9e7cea6adf3e3d24e304ac6b8300646e2a5d1cd3a3c2abed9101b0846761"},
+ {file = "pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e"},
+ {file = "pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667"},
]
[package.dependencies]
@@ -5675,6 +5713,47 @@ files = [
dev = ["pre-commit", "tox"]
testing = ["pytest", "pytest-benchmark"]
+[[package]]
+name = "polars"
+version = "1.9.0"
+description = "Blazingly fast DataFrame library"
+optional = false
+python-versions = ">=3.9"
+files = [
+ {file = "polars-1.9.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:a471d2ce96f6fa5dd0ef16bcdb227f3dbe3af8acb776ca52f9e64ef40c7489a0"},
+ {file = "polars-1.9.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:94b12d731cd200d2c50b13fc070d6353f708e632bca6529c5a72aa6a69e5285d"},
+ {file = "polars-1.9.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f85f132732aa63c6f3b502b0fdfc3ba9f0b78cc6330059b5a2d6f9fd78508acb"},
+ {file = "polars-1.9.0-cp38-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:f753c8941a3b3249d59262d68a856714a96a7d4e16977aefbb196be0c192e151"},
+ {file = "polars-1.9.0-cp38-abi3-win_amd64.whl", hash = "sha256:95de07066cd797dd940fa2783708a7bef93c827a57be0f4dfad3575a6144212b"},
+ {file = "polars-1.9.0.tar.gz", hash = "sha256:8e1206ef876f61c1d50a81e102611ea92ee34631cb135b46ad314bfefd3cb122"},
+]
+
+[package.extras]
+adbc = ["adbc-driver-manager[dbapi]", "adbc-driver-sqlite[dbapi]"]
+all = ["polars[async,cloudpickle,database,deltalake,excel,fsspec,graph,iceberg,numpy,pandas,plot,pyarrow,pydantic,style,timezone]"]
+async = ["gevent"]
+calamine = ["fastexcel (>=0.9)"]
+cloudpickle = ["cloudpickle"]
+connectorx = ["connectorx (>=0.3.2)"]
+database = ["nest-asyncio", "polars[adbc,connectorx,sqlalchemy]"]
+deltalake = ["deltalake (>=0.15.0)"]
+excel = ["polars[calamine,openpyxl,xlsx2csv,xlsxwriter]"]
+fsspec = ["fsspec"]
+gpu = ["cudf-polars-cu12"]
+graph = ["matplotlib"]
+iceberg = ["pyiceberg (>=0.5.0)"]
+numpy = ["numpy (>=1.16.0)"]
+openpyxl = ["openpyxl (>=3.0.0)"]
+pandas = ["pandas", "polars[pyarrow]"]
+plot = ["altair (>=5.4.0)"]
+pyarrow = ["pyarrow (>=7.0.0)"]
+pydantic = ["pydantic"]
+sqlalchemy = ["polars[pandas]", "sqlalchemy"]
+style = ["great-tables (>=0.8.0)"]
+timezone = ["backports-zoneinfo", "tzdata"]
+xlsx2csv = ["xlsx2csv (>=0.8.0)"]
+xlsxwriter = ["xlsxwriter"]
+
[[package]]
name = "pooch"
version = "1.8.2"
@@ -5717,52 +5796,52 @@ tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "p
[[package]]
name = "pot"
-version = "0.9.4"
+version = "0.9.5"
description = "Python Optimal Transport Library"
optional = false
python-versions = ">=3.7"
files = [
- {file = "POT-0.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8418ab9c24f549290fdc452caebb58ded05b986a024063fe3354cfd2e704b378"},
- {file = "POT-0.9.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:096cd3b454f87ff9c8f48d8e221bc26509d8f9355ce99d9fefe83560f82278b5"},
- {file = "POT-0.9.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6e67d420a479ed66f4549c785e157bb3dce2c5489bf81a44ac922a6e9471fe69"},
- {file = "POT-0.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:107bc7225906a3fa3aafdb441e1d24c55eaf1ee3badd1c93ab6199865f689221"},
- {file = "POT-0.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dfee044f744b1722912654c8b905bc289ce160524db5ca0e853f1aa442ffea55"},
- {file = "POT-0.9.4-cp310-cp310-win32.whl", hash = "sha256:421c3efb8da2f1ce9605f9f2068047ea629b95de87baa15b8786b3e664de9dbd"},
- {file = "POT-0.9.4-cp310-cp310-win_amd64.whl", hash = "sha256:6e76194a62f29ddadc975e18cf7f07d22060735bd3fb9a023781e0e126a05822"},
- {file = "POT-0.9.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:148040b89063790ab784458d5d200ba4a7f36c54fdb62ea0842f8d5d4c5c6ccb"},
- {file = "POT-0.9.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1109fc77989834a1467be731ff957d90c2b558e772cff6c06fb90f7cbe58b014"},
- {file = "POT-0.9.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9f8218cd419e8457b37fe2b8060b5bf9bd07d4671d5f5fa4d5ac98c58b5be8c0"},
- {file = "POT-0.9.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ea0055f18e26917ff326f39dd5e5fd43bcc9eccaab4b09a4f8d7785c8921250"},
- {file = "POT-0.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f03b4af3f56234787d2a34e0637dac9c1e1de4cb3f7386ca4f91331f0c4af187"},
- {file = "POT-0.9.4-cp311-cp311-win32.whl", hash = "sha256:a69f6d554df1de47dd312fc065d9171bdbedf48c90c8409889485945ffaaeacf"},
- {file = "POT-0.9.4-cp311-cp311-win_amd64.whl", hash = "sha256:8791c8f09a852901e03e2dc1c6aec4f374b58b3ee905a90349713587aa16e26a"},
- {file = "POT-0.9.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:1a7a55b3fd528e6808f99de0165dcacf185eb86ae3aff4d358b850479b76a8ba"},
- {file = "POT-0.9.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a9bbd2507605be6eae4f0f0d6f6f0ff91ce3eeb5b7c8eeb350e4eb76bcc6940a"},
- {file = "POT-0.9.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5594ab0e78329307ce4cd293f2de409513876695d60fb4c1013b5dd46069f256"},
- {file = "POT-0.9.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0ca658105d129b752c8d20751ff2cb965d1bdcaecec319ae489b135c58d9da9"},
- {file = "POT-0.9.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e6732f1acd94959b8fa13a4fa250ad49c1e6894ece488a81f4427dbf13df4776"},
- {file = "POT-0.9.4-cp312-cp312-win32.whl", hash = "sha256:bf7f5253fee6ba7df5dd854b739540f701153cabab80dd25332dfac93d84bec1"},
- {file = "POT-0.9.4-cp312-cp312-win_amd64.whl", hash = "sha256:597ff64b06a157871feb84e6e82b3f5dfbfff57161c14660ab2ddbcc93c940e6"},
- {file = "POT-0.9.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:385b41606943fbc73f1ab96fd994117d79c4ad782c91bbb7ba74c0359e9de887"},
- {file = "POT-0.9.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b3f697e084243b9fe0a975331e471fd09610343c6aa28172232958e39100ede6"},
- {file = "POT-0.9.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b967fb9cafc6ad30a6d51b21d6307b384491a106d6dc75f37bee52a3f63575c3"},
- {file = "POT-0.9.4-cp37-cp37m-win32.whl", hash = "sha256:35926c2f4d2ee49309dce340f7f6646fe451ca1e0d11b2d017a851d482bf4468"},
- {file = "POT-0.9.4-cp37-cp37m-win_amd64.whl", hash = "sha256:f29fa1fcd591d1940e2279dc1422ff46c0c273f6be4ecbcaa819d91dd196573e"},
- {file = "POT-0.9.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:63f8b50f448e32a4ae274dd1e68e22b1a2bc291c53c5c6ec5afadfb930b6a809"},
- {file = "POT-0.9.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cabd13a728d2db40b3989ad57810769dfba8972b686ae7f4881dbd315252e5d9"},
- {file = "POT-0.9.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5984157d5a819bd6c55db3b0d8fe631ff398c243e80a9e9e933cbd1ee7c7588c"},
- {file = "POT-0.9.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b8da4e3268eeee40dff96364f0a9f0326979d565d970ec74a1688b8ad338022"},
- {file = "POT-0.9.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ede957083299e4904627621f4d2c8a6b56af108fef9b486330f65519a395f10a"},
- {file = "POT-0.9.4-cp38-cp38-win32.whl", hash = "sha256:79716079d7970c6c0bf909f986c65d7103135e36b364ba5fa5caed97d7aa6464"},
- {file = "POT-0.9.4-cp38-cp38-win_amd64.whl", hash = "sha256:3246493745bcf2b353312183b9ab547466cae711936f991a6754b62f55ff1dec"},
- {file = "POT-0.9.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:223c4ca199b679e4c2b8a79fb49d31f2c7fab2975c2c37d1e68a0a7fbe2bc55d"},
- {file = "POT-0.9.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c30d77994d02984ad32665f5975e272e8c02e8d5288c4edfbec08617c5c38f91"},
- {file = "POT-0.9.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5b0fe5be45112c12cc0f6ab61fb85ed9161ca540b24a4037e5d53ab86f390a49"},
- {file = "POT-0.9.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab68bdfeae54719d202e923f18ec29869c09b105e42f94568221fc92996f0f4d"},
- {file = "POT-0.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2847015e3bb65171eb70eca786f8cebe806960af40625ebc17c858b6314a9e0b"},
- {file = "POT-0.9.4-cp39-cp39-win32.whl", hash = "sha256:2e35d68c6e676108d6eeb7e6b119c4c19dca364992bca57f3f513660bfb1810c"},
- {file = "POT-0.9.4-cp39-cp39-win_amd64.whl", hash = "sha256:e7d029f024ed58f8d10b3e4d517df551bb9758ac12d0503be793f44258f2dffc"},
- {file = "pot-0.9.4.tar.gz", hash = "sha256:4cf8b46bf4992c37529161c32dd5e3334e0c089399142f08ed6d455b57015edd"},
+ {file = "POT-0.9.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:34d766c38e65a69c087b01a854fe89fbd152c3e8af93da2227b6c40aed6d37b9"},
+ {file = "POT-0.9.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5407377256de11b6fdc94bbba9b50ea5a2301570905fc9014541cc8473806d9"},
+ {file = "POT-0.9.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2f37039cd356198c1fb994e7d935b9bf75d44f2a40319d298bf8cc149eb360d5"},
+ {file = "POT-0.9.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00a18427c9abdd107a2285ea0a814c6b22e95a1af8f88a37c56f23cd216f7a6b"},
+ {file = "POT-0.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0dc608cea1107289a58dec33cddc1b0a3fea77ff36d66e2c8ac7aeea543969a"},
+ {file = "POT-0.9.5-cp310-cp310-win32.whl", hash = "sha256:8312bee055389db47adab063749c8d77b5981534177ca6cd9b91e4fb68f69d00"},
+ {file = "POT-0.9.5-cp310-cp310-win_amd64.whl", hash = "sha256:043706d69202ac87e140121ba32ed1b038f2b3fc4a5549586187239a583cd50d"},
+ {file = "POT-0.9.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b5f000da00e408ff781672a4895bfa8daacec055bd534c9e66ead479f3c6d83c"},
+ {file = "POT-0.9.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9eddd9ff29bdb17d4db8ba00ba18d42656c694a128591502bf59afc1369e1bb3"},
+ {file = "POT-0.9.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7eb9b88c73387a9966775a6f6d077d9d071814783701d2656dc05b5032a9662d"},
+ {file = "POT-0.9.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9f44446056f5fc9d132ed8e431732c33cbe754fb1e6d73636f1b6ae811be7df"},
+ {file = "POT-0.9.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7f5d27bc9063e01b03d906bb77e7b3428065fdd72ed64233b249584ead2e2bf"},
+ {file = "POT-0.9.5-cp311-cp311-win32.whl", hash = "sha256:cd79a8b4d35b706f2124f73ebff3bb1ce3450e01cc8f610eda3b6ce13616b829"},
+ {file = "POT-0.9.5-cp311-cp311-win_amd64.whl", hash = "sha256:6680aadb69df2f75a413fe9c58bd1c5cb744d017a7c8ba8841654fd0dc75433b"},
+ {file = "POT-0.9.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:7d57f96b333c9816a2af7817753108739b38155e52648c5967681dbd89d92ed2"},
+ {file = "POT-0.9.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:afad647c78f999439f8c5cbcf74b03c5c0afefb08727cd7d68994130fabfc761"},
+ {file = "POT-0.9.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bca891c28592d6e0e8f04b35989de7005f0fb9b3923f00537f1b269c5084aa7b"},
+ {file = "POT-0.9.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:088c930a5fcd1e8e36fb6af710df47ce6e9331b6b5a28eb09c673df4186dcb10"},
+ {file = "POT-0.9.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dfb18268fac1e982e21821a03f802802a0d579c4690988b764115dd886dc38f5"},
+ {file = "POT-0.9.5-cp312-cp312-win32.whl", hash = "sha256:931fa46ff8e01d47309207243988c783a2d8364452bc080b130c5d319349ad3f"},
+ {file = "POT-0.9.5-cp312-cp312-win_amd64.whl", hash = "sha256:be786612b391c2e4d3b5db4e7d51cdb2360284e3a6949990051c2eb102f60d3c"},
+ {file = "POT-0.9.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:844820020240bad66ca07255289df9ed1e46c5f71ba2401852833c0dd114c660"},
+ {file = "POT-0.9.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a76a5bed3af51db1a10c59ba376f500a743f8e20c2a6d4851c4535dbbed17714"},
+ {file = "POT-0.9.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a03da3283cb04a1fa3258f0096ad9cfa3311192d5a6bee3a2ca0e15304f8652"},
+ {file = "POT-0.9.5-cp37-cp37m-win32.whl", hash = "sha256:dc50b8005b4dfa3478f0bf841c22d8b3500a8a04e5673da146d71f7039607e3a"},
+ {file = "POT-0.9.5-cp37-cp37m-win_amd64.whl", hash = "sha256:a9cab787bcb3ce6d23ef297c115baad34ed578a98b4a02afba8cb4e30e39d171"},
+ {file = "POT-0.9.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:926ba491b5b1f43fb0f3bc6e9d92b6cc634c12e2fa778eba88d9350e82fc2c88"},
+ {file = "POT-0.9.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1b77b630a303868ee14015a4306d7e852b174d4a734815c67e27cd45fd59cc07"},
+ {file = "POT-0.9.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:db0dd974328cbdd7b20477fb5757326dda22d77cb639f4759296fcd206db380f"},
+ {file = "POT-0.9.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb29c375d02bb5aadad527133e9c20dd73930d8e2294434dc5306fb740a49d9e"},
+ {file = "POT-0.9.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:293e0993d66b09db69c2282edbf859e1de57a3f15b99bd909609ce120380b398"},
+ {file = "POT-0.9.5-cp38-cp38-win32.whl", hash = "sha256:5996d538885b834e36a3838bc73adeb747bd54ab0a2b3178addbb35b3edafa45"},
+ {file = "POT-0.9.5-cp38-cp38-win_amd64.whl", hash = "sha256:0131aab58d57bf5876d826461d0968d1a655b611cc8c0297c38ab8a235e0d627"},
+ {file = "POT-0.9.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:95c29ee3e647b272bfcb35c3c4cb7409326a0a6d3bf3ed8460495e9ac3f3a76d"},
+ {file = "POT-0.9.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b1bca1b3465eadab9d5e1c075122963da3e921102555d1c6b7ff3c1f437d3e18"},
+ {file = "POT-0.9.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e64f5d8890e21eb1e7decac694c34820496238e7d9c95309411e58cb0b04d384"},
+ {file = "POT-0.9.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fa190662670868126a2372499aec513bd4ac50b4565fe2014525c7cef11e2bf"},
+ {file = "POT-0.9.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9b775daf69cb4043897050961f9b654c30261543e531d53248a99e5599db0c8"},
+ {file = "POT-0.9.5-cp39-cp39-win32.whl", hash = "sha256:ceea4cffebce88211cd63bfddc878e2f29a6b6347125cbac40fa214308315878"},
+ {file = "POT-0.9.5-cp39-cp39-win_amd64.whl", hash = "sha256:2f6af660505772833d4ccc189d9de264b429d9ec8e0cb564f33d2181e6f1bbce"},
+ {file = "pot-0.9.5.tar.gz", hash = "sha256:9644ee7ff51c3cffa3c2632b9dd9dff4f3520266f9fb771450935ffb646d6042"},
]
[package.dependencies]
@@ -6085,54 +6164,51 @@ files = [
[[package]]
name = "pyarrow"
-version = "18.0.0"
+version = "17.0.0"
description = "Python library for Apache Arrow"
optional = false
-python-versions = ">=3.9"
+python-versions = ">=3.8"
files = [
- {file = "pyarrow-18.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:2333f93260674e185cfbf208d2da3007132572e56871f451ba1a556b45dae6e2"},
- {file = "pyarrow-18.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:4c381857754da44326f3a49b8b199f7f87a51c2faacd5114352fc78de30d3aba"},
- {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:603cd8ad4976568954598ef0a6d4ed3dfb78aff3d57fa8d6271f470f0ce7d34f"},
- {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58a62549a3e0bc9e03df32f350e10e1efb94ec6cf63e3920c3385b26663948ce"},
- {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bc97316840a349485fbb137eb8d0f4d7057e1b2c1272b1a20eebbbe1848f5122"},
- {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:2e549a748fa8b8715e734919923f69318c953e077e9c02140ada13e59d043310"},
- {file = "pyarrow-18.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:606e9a3dcb0f52307c5040698ea962685fb1c852d72379ee9412be7de9c5f9e2"},
- {file = "pyarrow-18.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d5795e37c0a33baa618c5e054cd61f586cf76850a251e2b21355e4085def6280"},
- {file = "pyarrow-18.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:5f0510608ccd6e7f02ca8596962afb8c6cc84c453e7be0da4d85f5f4f7b0328a"},
- {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:616ea2826c03c16e87f517c46296621a7c51e30400f6d0a61be645f203aa2b93"},
- {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a1824f5b029ddd289919f354bc285992cb4e32da518758c136271cf66046ef22"},
- {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:6dd1b52d0d58dd8f685ced9971eb49f697d753aa7912f0a8f50833c7a7426319"},
- {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:320ae9bd45ad7ecc12ec858b3e8e462578de060832b98fc4d671dee9f10d9954"},
- {file = "pyarrow-18.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:2c992716cffb1088414f2b478f7af0175fd0a76fea80841b1706baa8fb0ebaad"},
- {file = "pyarrow-18.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:e7ab04f272f98ebffd2a0661e4e126036f6936391ba2889ed2d44c5006237802"},
- {file = "pyarrow-18.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:03f40b65a43be159d2f97fd64dc998f769d0995a50c00f07aab58b0b3da87e1f"},
- {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be08af84808dff63a76860847c48ec0416928a7b3a17c2f49a072cac7c45efbd"},
- {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c70c1965cde991b711a98448ccda3486f2a336457cf4ec4dca257a926e149c9"},
- {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:00178509f379415a3fcf855af020e3340254f990a8534294ec3cf674d6e255fd"},
- {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:a71ab0589a63a3e987beb2bc172e05f000a5c5be2636b4b263c44034e215b5d7"},
- {file = "pyarrow-18.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:fe92efcdbfa0bcf2fa602e466d7f2905500f33f09eb90bf0bcf2e6ca41b574c8"},
- {file = "pyarrow-18.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:907ee0aa8ca576f5e0cdc20b5aeb2ad4d3953a3b4769fc4b499e00ef0266f02f"},
- {file = "pyarrow-18.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:66dcc216ebae2eb4c37b223feaf82f15b69d502821dde2da138ec5a3716e7463"},
- {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc1daf7c425f58527900876354390ee41b0ae962a73ad0959b9d829def583bb1"},
- {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:871b292d4b696b09120ed5bde894f79ee2a5f109cb84470546471df264cae136"},
- {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:082ba62bdcb939824ba1ce10b8acef5ab621da1f4c4805e07bfd153617ac19d4"},
- {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:2c664ab88b9766413197733c1720d3dcd4190e8fa3bbdc3710384630a0a7207b"},
- {file = "pyarrow-18.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:dc892be34dbd058e8d189b47db1e33a227d965ea8805a235c8a7286f7fd17d3a"},
- {file = "pyarrow-18.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:28f9c39a56d2c78bf6b87dcc699d520ab850919d4a8c7418cd20eda49874a2ea"},
- {file = "pyarrow-18.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:f1a198a50c409ab2d009fbf20956ace84567d67f2c5701511d4dd561fae6f32e"},
- {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5bd7fd32e3ace012d43925ea4fc8bd1b02cc6cc1e9813b518302950e89b5a22"},
- {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:336addb8b6f5208be1b2398442c703a710b6b937b1a046065ee4db65e782ff5a"},
- {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:45476490dd4adec5472c92b4d253e245258745d0ccaabe706f8d03288ed60a79"},
- {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:b46591222c864e7da7faa3b19455196416cd8355ff6c2cc2e65726a760a3c420"},
- {file = "pyarrow-18.0.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:eb7e3abcda7e1e6b83c2dc2909c8d045881017270a119cc6ee7fdcfe71d02df8"},
- {file = "pyarrow-18.0.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:09f30690b99ce34e0da64d20dab372ee54431745e4efb78ac938234a282d15f9"},
- {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d5ca5d707e158540312e09fd907f9f49bacbe779ab5236d9699ced14d2293b8"},
- {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6331f280c6e4521c69b201a42dd978f60f7e129511a55da9e0bfe426b4ebb8d"},
- {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3ac24b2be732e78a5a3ac0b3aa870d73766dd00beba6e015ea2ea7394f8b4e55"},
- {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b30a927c6dff89ee702686596f27c25160dd6c99be5bcc1513a763ae5b1bfc03"},
- {file = "pyarrow-18.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:8f40ec677e942374e3d7f2fad6a67a4c2811a8b975e8703c6fd26d3b168a90e2"},
- {file = "pyarrow-18.0.0.tar.gz", hash = "sha256:a6aa027b1a9d2970cf328ccd6dbe4a996bc13c39fd427f502782f5bdb9ca20f5"},
-]
+ {file = "pyarrow-17.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:a5c8b238d47e48812ee577ee20c9a2779e6a5904f1708ae240f53ecbee7c9f07"},
+ {file = "pyarrow-17.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db023dc4c6cae1015de9e198d41250688383c3f9af8f565370ab2b4cb5f62655"},
+ {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da1e060b3876faa11cee287839f9cc7cdc00649f475714b8680a05fd9071d545"},
+ {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75c06d4624c0ad6674364bb46ef38c3132768139ddec1c56582dbac54f2663e2"},
+ {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:fa3c246cc58cb5a4a5cb407a18f193354ea47dd0648194e6265bd24177982fe8"},
+ {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:f7ae2de664e0b158d1607699a16a488de3d008ba99b3a7aa5de1cbc13574d047"},
+ {file = "pyarrow-17.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:5984f416552eea15fd9cee03da53542bf4cddaef5afecefb9aa8d1010c335087"},
+ {file = "pyarrow-17.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:1c8856e2ef09eb87ecf937104aacfa0708f22dfeb039c363ec99735190ffb977"},
+ {file = "pyarrow-17.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e19f569567efcbbd42084e87f948778eb371d308e137a0f97afe19bb860ccb3"},
+ {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b244dc8e08a23b3e352899a006a26ae7b4d0da7bb636872fa8f5884e70acf15"},
+ {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b72e87fe3e1db343995562f7fff8aee354b55ee83d13afba65400c178ab2597"},
+ {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dc5c31c37409dfbc5d014047817cb4ccd8c1ea25d19576acf1a001fe07f5b420"},
+ {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e3343cb1e88bc2ea605986d4b94948716edc7a8d14afd4e2c097232f729758b4"},
+ {file = "pyarrow-17.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:a27532c38f3de9eb3e90ecab63dfda948a8ca859a66e3a47f5f42d1e403c4d03"},
+ {file = "pyarrow-17.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9b8a823cea605221e61f34859dcc03207e52e409ccf6354634143e23af7c8d22"},
+ {file = "pyarrow-17.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f1e70de6cb5790a50b01d2b686d54aaf73da01266850b05e3af2a1bc89e16053"},
+ {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0071ce35788c6f9077ff9ecba4858108eebe2ea5a3f7cf2cf55ebc1dbc6ee24a"},
+ {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:757074882f844411fcca735e39aae74248a1531367a7c80799b4266390ae51cc"},
+ {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ba11c4f16976e89146781a83833df7f82077cdab7dc6232c897789343f7891a"},
+ {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b0c6ac301093b42d34410b187bba560b17c0330f64907bfa4f7f7f2444b0cf9b"},
+ {file = "pyarrow-17.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7"},
+ {file = "pyarrow-17.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204"},
+ {file = "pyarrow-17.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8"},
+ {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155"},
+ {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145"},
+ {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c"},
+ {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c"},
+ {file = "pyarrow-17.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca"},
+ {file = "pyarrow-17.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb"},
+ {file = "pyarrow-17.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df"},
+ {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687"},
+ {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b"},
+ {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5"},
+ {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda"},
+ {file = "pyarrow-17.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204"},
+ {file = "pyarrow-17.0.0.tar.gz", hash = "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28"},
+]
+
+[package.dependencies]
+numpy = ">=1.16.6"
[package.extras]
test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"]
@@ -6786,20 +6862,20 @@ files = [
[[package]]
name = "pytest"
-version = "8.2.2"
+version = "8.3.3"
description = "pytest: simple powerful testing with Python"
optional = false
python-versions = ">=3.8"
files = [
- {file = "pytest-8.2.2-py3-none-any.whl", hash = "sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343"},
- {file = "pytest-8.2.2.tar.gz", hash = "sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977"},
+ {file = "pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2"},
+ {file = "pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181"},
]
[package.dependencies]
colorama = {version = "*", markers = "sys_platform == \"win32\""}
iniconfig = "*"
packaging = "*"
-pluggy = ">=1.5,<2.0"
+pluggy = ">=1.5,<2"
[package.extras]
dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
@@ -7078,105 +7154,105 @@ rpds-py = ">=0.7.0"
[[package]]
name = "regex"
-version = "2024.9.11"
+version = "2024.11.6"
description = "Alternative regular expression module, to replace re."
optional = false
python-versions = ">=3.8"
files = [
- {file = "regex-2024.9.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1494fa8725c285a81d01dc8c06b55287a1ee5e0e382d8413adc0a9197aac6408"},
- {file = "regex-2024.9.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0e12c481ad92d129c78f13a2a3662317e46ee7ef96c94fd332e1c29131875b7d"},
- {file = "regex-2024.9.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:16e13a7929791ac1216afde26f712802e3df7bf0360b32e4914dca3ab8baeea5"},
- {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46989629904bad940bbec2106528140a218b4a36bb3042d8406980be1941429c"},
- {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a906ed5e47a0ce5f04b2c981af1c9acf9e8696066900bf03b9d7879a6f679fc8"},
- {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9a091b0550b3b0207784a7d6d0f1a00d1d1c8a11699c1a4d93db3fbefc3ad35"},
- {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ddcd9a179c0a6fa8add279a4444015acddcd7f232a49071ae57fa6e278f1f71"},
- {file = "regex-2024.9.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6b41e1adc61fa347662b09398e31ad446afadff932a24807d3ceb955ed865cc8"},
- {file = "regex-2024.9.11-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ced479f601cd2f8ca1fd7b23925a7e0ad512a56d6e9476f79b8f381d9d37090a"},
- {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:635a1d96665f84b292e401c3d62775851aedc31d4f8784117b3c68c4fcd4118d"},
- {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:c0256beda696edcf7d97ef16b2a33a8e5a875affd6fa6567b54f7c577b30a137"},
- {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:3ce4f1185db3fbde8ed8aa223fc9620f276c58de8b0d4f8cc86fd1360829edb6"},
- {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:09d77559e80dcc9d24570da3745ab859a9cf91953062e4ab126ba9d5993688ca"},
- {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7a22ccefd4db3f12b526eccb129390942fe874a3a9fdbdd24cf55773a1faab1a"},
- {file = "regex-2024.9.11-cp310-cp310-win32.whl", hash = "sha256:f745ec09bc1b0bd15cfc73df6fa4f726dcc26bb16c23a03f9e3367d357eeedd0"},
- {file = "regex-2024.9.11-cp310-cp310-win_amd64.whl", hash = "sha256:01c2acb51f8a7d6494c8c5eafe3d8e06d76563d8a8a4643b37e9b2dd8a2ff623"},
- {file = "regex-2024.9.11-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2cce2449e5927a0bf084d346da6cd5eb016b2beca10d0013ab50e3c226ffc0df"},
- {file = "regex-2024.9.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b37fa423beefa44919e009745ccbf353d8c981516e807995b2bd11c2c77d268"},
- {file = "regex-2024.9.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:64ce2799bd75039b480cc0360907c4fb2f50022f030bf9e7a8705b636e408fad"},
- {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4cc92bb6db56ab0c1cbd17294e14f5e9224f0cc6521167ef388332604e92679"},
- {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d05ac6fa06959c4172eccd99a222e1fbf17b5670c4d596cb1e5cde99600674c4"},
- {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:040562757795eeea356394a7fb13076ad4f99d3c62ab0f8bdfb21f99a1f85664"},
- {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6113c008a7780792efc80f9dfe10ba0cd043cbf8dc9a76ef757850f51b4edc50"},
- {file = "regex-2024.9.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e5fb5f77c8745a60105403a774fe2c1759b71d3e7b4ca237a5e67ad066c7199"},
- {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:54d9ff35d4515debf14bc27f1e3b38bfc453eff3220f5bce159642fa762fe5d4"},
- {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:df5cbb1fbc74a8305b6065d4ade43b993be03dbe0f8b30032cced0d7740994bd"},
- {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7fb89ee5d106e4a7a51bce305ac4efb981536301895f7bdcf93ec92ae0d91c7f"},
- {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:a738b937d512b30bf75995c0159c0ddf9eec0775c9d72ac0202076c72f24aa96"},
- {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e28f9faeb14b6f23ac55bfbbfd3643f5c7c18ede093977f1df249f73fd22c7b1"},
- {file = "regex-2024.9.11-cp311-cp311-win32.whl", hash = "sha256:18e707ce6c92d7282dfce370cd205098384b8ee21544e7cb29b8aab955b66fa9"},
- {file = "regex-2024.9.11-cp311-cp311-win_amd64.whl", hash = "sha256:313ea15e5ff2a8cbbad96ccef6be638393041b0a7863183c2d31e0c6116688cf"},
- {file = "regex-2024.9.11-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b0d0a6c64fcc4ef9c69bd5b3b3626cc3776520a1637d8abaa62b9edc147a58f7"},
- {file = "regex-2024.9.11-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:49b0e06786ea663f933f3710a51e9385ce0cba0ea56b67107fd841a55d56a231"},
- {file = "regex-2024.9.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5b513b6997a0b2f10e4fd3a1313568e373926e8c252bd76c960f96fd039cd28d"},
- {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee439691d8c23e76f9802c42a95cfeebf9d47cf4ffd06f18489122dbb0a7ad64"},
- {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a8f877c89719d759e52783f7fe6e1c67121076b87b40542966c02de5503ace42"},
- {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:23b30c62d0f16827f2ae9f2bb87619bc4fba2044911e2e6c2eb1af0161cdb766"},
- {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85ab7824093d8f10d44330fe1e6493f756f252d145323dd17ab6b48733ff6c0a"},
- {file = "regex-2024.9.11-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8dee5b4810a89447151999428fe096977346cf2f29f4d5e29609d2e19e0199c9"},
- {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:98eeee2f2e63edae2181c886d7911ce502e1292794f4c5ee71e60e23e8d26b5d"},
- {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:57fdd2e0b2694ce6fc2e5ccf189789c3e2962916fb38779d3e3521ff8fe7a822"},
- {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:d552c78411f60b1fdaafd117a1fca2f02e562e309223b9d44b7de8be451ec5e0"},
- {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a0b2b80321c2ed3fcf0385ec9e51a12253c50f146fddb2abbb10f033fe3d049a"},
- {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:18406efb2f5a0e57e3a5881cd9354c1512d3bb4f5c45d96d110a66114d84d23a"},
- {file = "regex-2024.9.11-cp312-cp312-win32.whl", hash = "sha256:e464b467f1588e2c42d26814231edecbcfe77f5ac414d92cbf4e7b55b2c2a776"},
- {file = "regex-2024.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:9e8719792ca63c6b8340380352c24dcb8cd7ec49dae36e963742a275dfae6009"},
- {file = "regex-2024.9.11-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c157bb447303070f256e084668b702073db99bbb61d44f85d811025fcf38f784"},
- {file = "regex-2024.9.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4db21ece84dfeefc5d8a3863f101995de646c6cb0536952c321a2650aa202c36"},
- {file = "regex-2024.9.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:220e92a30b426daf23bb67a7962900ed4613589bab80382be09b48896d211e92"},
- {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb1ae19e64c14c7ec1995f40bd932448713d3c73509e82d8cd7744dc00e29e86"},
- {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f47cd43a5bfa48f86925fe26fbdd0a488ff15b62468abb5d2a1e092a4fb10e85"},
- {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9d4a76b96f398697fe01117093613166e6aa8195d63f1b4ec3f21ab637632963"},
- {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ea51dcc0835eea2ea31d66456210a4e01a076d820e9039b04ae8d17ac11dee6"},
- {file = "regex-2024.9.11-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7aaa315101c6567a9a45d2839322c51c8d6e81f67683d529512f5bcfb99c802"},
- {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c57d08ad67aba97af57a7263c2d9006d5c404d721c5f7542f077f109ec2a4a29"},
- {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:f8404bf61298bb6f8224bb9176c1424548ee1181130818fcd2cbffddc768bed8"},
- {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dd4490a33eb909ef5078ab20f5f000087afa2a4daa27b4c072ccb3cb3050ad84"},
- {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:eee9130eaad130649fd73e5cd92f60e55708952260ede70da64de420cdcad554"},
- {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6a2644a93da36c784e546de579ec1806bfd2763ef47babc1b03d765fe560c9f8"},
- {file = "regex-2024.9.11-cp313-cp313-win32.whl", hash = "sha256:e997fd30430c57138adc06bba4c7c2968fb13d101e57dd5bb9355bf8ce3fa7e8"},
- {file = "regex-2024.9.11-cp313-cp313-win_amd64.whl", hash = "sha256:042c55879cfeb21a8adacc84ea347721d3d83a159da6acdf1116859e2427c43f"},
- {file = "regex-2024.9.11-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:35f4a6f96aa6cb3f2f7247027b07b15a374f0d5b912c0001418d1d55024d5cb4"},
- {file = "regex-2024.9.11-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:55b96e7ce3a69a8449a66984c268062fbaa0d8ae437b285428e12797baefce7e"},
- {file = "regex-2024.9.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cb130fccd1a37ed894824b8c046321540263013da72745d755f2d35114b81a60"},
- {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:323c1f04be6b2968944d730e5c2091c8c89767903ecaa135203eec4565ed2b2b"},
- {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be1c8ed48c4c4065ecb19d882a0ce1afe0745dfad8ce48c49586b90a55f02366"},
- {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b5b029322e6e7b94fff16cd120ab35a253236a5f99a79fb04fda7ae71ca20ae8"},
- {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6fff13ef6b5f29221d6904aa816c34701462956aa72a77f1f151a8ec4f56aeb"},
- {file = "regex-2024.9.11-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:587d4af3979376652010e400accc30404e6c16b7df574048ab1f581af82065e4"},
- {file = "regex-2024.9.11-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:079400a8269544b955ffa9e31f186f01d96829110a3bf79dc338e9910f794fca"},
- {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:f9268774428ec173654985ce55fc6caf4c6d11ade0f6f914d48ef4719eb05ebb"},
- {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:23f9985c8784e544d53fc2930fc1ac1a7319f5d5332d228437acc9f418f2f168"},
- {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:ae2941333154baff9838e88aa71c1d84f4438189ecc6021a12c7573728b5838e"},
- {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:e93f1c331ca8e86fe877a48ad64e77882c0c4da0097f2212873a69bbfea95d0c"},
- {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:846bc79ee753acf93aef4184c040d709940c9d001029ceb7b7a52747b80ed2dd"},
- {file = "regex-2024.9.11-cp38-cp38-win32.whl", hash = "sha256:c94bb0a9f1db10a1d16c00880bdebd5f9faf267273b8f5bd1878126e0fbde771"},
- {file = "regex-2024.9.11-cp38-cp38-win_amd64.whl", hash = "sha256:2b08fce89fbd45664d3df6ad93e554b6c16933ffa9d55cb7e01182baaf971508"},
- {file = "regex-2024.9.11-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:07f45f287469039ffc2c53caf6803cd506eb5f5f637f1d4acb37a738f71dd066"},
- {file = "regex-2024.9.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4838e24ee015101d9f901988001038f7f0d90dc0c3b115541a1365fb439add62"},
- {file = "regex-2024.9.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6edd623bae6a737f10ce853ea076f56f507fd7726bee96a41ee3d68d347e4d16"},
- {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c69ada171c2d0e97a4b5aa78fbb835e0ffbb6b13fc5da968c09811346564f0d3"},
- {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02087ea0a03b4af1ed6ebab2c54d7118127fee8d71b26398e8e4b05b78963199"},
- {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:69dee6a020693d12a3cf892aba4808fe168d2a4cef368eb9bf74f5398bfd4ee8"},
- {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:297f54910247508e6e5cae669f2bc308985c60540a4edd1c77203ef19bfa63ca"},
- {file = "regex-2024.9.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ecea58b43a67b1b79805f1a0255730edaf5191ecef84dbc4cc85eb30bc8b63b9"},
- {file = "regex-2024.9.11-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:eab4bb380f15e189d1313195b062a6aa908f5bd687a0ceccd47c8211e9cf0d4a"},
- {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0cbff728659ce4bbf4c30b2a1be040faafaa9eca6ecde40aaff86f7889f4ab39"},
- {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:54c4a097b8bc5bb0dfc83ae498061d53ad7b5762e00f4adaa23bee22b012e6ba"},
- {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:73d6d2f64f4d894c96626a75578b0bf7d9e56dcda8c3d037a2118fdfe9b1c664"},
- {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:e53b5fbab5d675aec9f0c501274c467c0f9a5d23696cfc94247e1fb56501ed89"},
- {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:0ffbcf9221e04502fc35e54d1ce9567541979c3fdfb93d2c554f0ca583a19b35"},
- {file = "regex-2024.9.11-cp39-cp39-win32.whl", hash = "sha256:e4c22e1ac1f1ec1e09f72e6c44d8f2244173db7eb9629cc3a346a8d7ccc31142"},
- {file = "regex-2024.9.11-cp39-cp39-win_amd64.whl", hash = "sha256:faa3c142464efec496967359ca99696c896c591c56c53506bac1ad465f66e919"},
- {file = "regex-2024.9.11.tar.gz", hash = "sha256:6c188c307e8433bcb63dc1915022deb553b4203a70722fc542c363bf120a01fd"},
+ {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"},
+ {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"},
+ {file = "regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:164d8b7b3b4bcb2068b97428060b2a53be050085ef94eca7f240e7947f1b080e"},
+ {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3660c82f209655a06b587d55e723f0b813d3a7db2e32e5e7dc64ac2a9e86fde"},
+ {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d22326fcdef5e08c154280b71163ced384b428343ae16a5ab2b3354aed12436e"},
+ {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f1ac758ef6aebfc8943560194e9fd0fa18bcb34d89fd8bd2af18183afd8da3a2"},
+ {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:997d6a487ff00807ba810e0f8332c18b4eb8d29463cfb7c820dc4b6e7562d0cf"},
+ {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:02a02d2bb04fec86ad61f3ea7f49c015a0681bf76abb9857f945d26159d2968c"},
+ {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f02f93b92358ee3f78660e43b4b0091229260c5d5c408d17d60bf26b6c900e86"},
+ {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:06eb1be98df10e81ebaded73fcd51989dcf534e3c753466e4b60c4697a003b67"},
+ {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:040df6fe1a5504eb0f04f048e6d09cd7c7110fef851d7c567a6b6e09942feb7d"},
+ {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabbfc59f2c6edba2a6622c647b716e34e8e3867e0ab975412c5c2f79b82da2"},
+ {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8447d2d39b5abe381419319f942de20b7ecd60ce86f16a23b0698f22e1b70008"},
+ {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:da8f5fc57d1933de22a9e23eec290a0d8a5927a5370d24bda9a6abe50683fe62"},
+ {file = "regex-2024.11.6-cp310-cp310-win32.whl", hash = "sha256:b489578720afb782f6ccf2840920f3a32e31ba28a4b162e13900c3e6bd3f930e"},
+ {file = "regex-2024.11.6-cp310-cp310-win_amd64.whl", hash = "sha256:5071b2093e793357c9d8b2929dfc13ac5f0a6c650559503bb81189d0a3814519"},
+ {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638"},
+ {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7"},
+ {file = "regex-2024.11.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20"},
+ {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114"},
+ {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3"},
+ {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f"},
+ {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0"},
+ {file = "regex-2024.11.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55"},
+ {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89"},
+ {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d"},
+ {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34"},
+ {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d"},
+ {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45"},
+ {file = "regex-2024.11.6-cp311-cp311-win32.whl", hash = "sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9"},
+ {file = "regex-2024.11.6-cp311-cp311-win_amd64.whl", hash = "sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60"},
+ {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a"},
+ {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9"},
+ {file = "regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2"},
+ {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4"},
+ {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577"},
+ {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3"},
+ {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e"},
+ {file = "regex-2024.11.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe"},
+ {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e"},
+ {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29"},
+ {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39"},
+ {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51"},
+ {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad"},
+ {file = "regex-2024.11.6-cp312-cp312-win32.whl", hash = "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54"},
+ {file = "regex-2024.11.6-cp312-cp312-win_amd64.whl", hash = "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b"},
+ {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84"},
+ {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4"},
+ {file = "regex-2024.11.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0"},
+ {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0"},
+ {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7"},
+ {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7"},
+ {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c"},
+ {file = "regex-2024.11.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3"},
+ {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07"},
+ {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e"},
+ {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6"},
+ {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4"},
+ {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d"},
+ {file = "regex-2024.11.6-cp313-cp313-win32.whl", hash = "sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff"},
+ {file = "regex-2024.11.6-cp313-cp313-win_amd64.whl", hash = "sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a"},
+ {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3a51ccc315653ba012774efca4f23d1d2a8a8f278a6072e29c7147eee7da446b"},
+ {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ad182d02e40de7459b73155deb8996bbd8e96852267879396fb274e8700190e3"},
+ {file = "regex-2024.11.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ba9b72e5643641b7d41fa1f6d5abda2c9a263ae835b917348fc3c928182ad467"},
+ {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40291b1b89ca6ad8d3f2b82782cc33807f1406cf68c8d440861da6304d8ffbbd"},
+ {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cdf58d0e516ee426a48f7b2c03a332a4114420716d55769ff7108c37a09951bf"},
+ {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a36fdf2af13c2b14738f6e973aba563623cb77d753bbbd8d414d18bfaa3105dd"},
+ {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1cee317bfc014c2419a76bcc87f071405e3966da434e03e13beb45f8aced1a6"},
+ {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50153825ee016b91549962f970d6a4442fa106832e14c918acd1c8e479916c4f"},
+ {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ea1bfda2f7162605f6e8178223576856b3d791109f15ea99a9f95c16a7636fb5"},
+ {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:df951c5f4a1b1910f1a99ff42c473ff60f8225baa1cdd3539fe2819d9543e9df"},
+ {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:072623554418a9911446278f16ecb398fb3b540147a7828c06e2011fa531e773"},
+ {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f654882311409afb1d780b940234208a252322c24a93b442ca714d119e68086c"},
+ {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:89d75e7293d2b3e674db7d4d9b1bee7f8f3d1609428e293771d1a962617150cc"},
+ {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:f65557897fc977a44ab205ea871b690adaef6b9da6afda4790a2484b04293a5f"},
+ {file = "regex-2024.11.6-cp38-cp38-win32.whl", hash = "sha256:6f44ec28b1f858c98d3036ad5d7d0bfc568bdd7a74f9c24e25f41ef1ebfd81a4"},
+ {file = "regex-2024.11.6-cp38-cp38-win_amd64.whl", hash = "sha256:bb8f74f2f10dbf13a0be8de623ba4f9491faf58c24064f32b65679b021ed0001"},
+ {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5704e174f8ccab2026bd2f1ab6c510345ae8eac818b613d7d73e785f1310f839"},
+ {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:220902c3c5cc6af55d4fe19ead504de80eb91f786dc102fbd74894b1551f095e"},
+ {file = "regex-2024.11.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e7e351589da0850c125f1600a4c4ba3c722efefe16b297de54300f08d734fbf"},
+ {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5056b185ca113c88e18223183aa1a50e66507769c9640a6ff75859619d73957b"},
+ {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e34b51b650b23ed3354b5a07aab37034d9f923db2a40519139af34f485f77d0"},
+ {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5670bce7b200273eee1840ef307bfa07cda90b38ae56e9a6ebcc9f50da9c469b"},
+ {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08986dce1339bc932923e7d1232ce9881499a0e02925f7402fb7c982515419ef"},
+ {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93c0b12d3d3bc25af4ebbf38f9ee780a487e8bf6954c115b9f015822d3bb8e48"},
+ {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:764e71f22ab3b305e7f4c21f1a97e1526a25ebdd22513e251cf376760213da13"},
+ {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f056bf21105c2515c32372bbc057f43eb02aae2fda61052e2f7622c801f0b4e2"},
+ {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:69ab78f848845569401469da20df3e081e6b5a11cb086de3eed1d48f5ed57c95"},
+ {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:86fddba590aad9208e2fa8b43b4c098bb0ec74f15718bb6a704e3c63e2cef3e9"},
+ {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:684d7a212682996d21ca12ef3c17353c021fe9de6049e19ac8481ec35574a70f"},
+ {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a03e02f48cd1abbd9f3b7e3586d97c8f7a9721c436f51a5245b3b9483044480b"},
+ {file = "regex-2024.11.6-cp39-cp39-win32.whl", hash = "sha256:41758407fc32d5c3c5de163888068cfee69cb4c2be844e7ac517a52770f9af57"},
+ {file = "regex-2024.11.6-cp39-cp39-win_amd64.whl", hash = "sha256:b2837718570f95dd41675328e111345f9b7095d821bac435aac173ac80b19983"},
+ {file = "regex-2024.11.6.tar.gz", hash = "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519"},
]
[[package]]
@@ -7264,114 +7340,101 @@ files = [
[[package]]
name = "rpds-py"
-version = "0.20.1"
+version = "0.21.0"
description = "Python bindings to Rust's persistent data structures (rpds)"
optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
files = [
- {file = "rpds_py-0.20.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a649dfd735fff086e8a9d0503a9f0c7d01b7912a333c7ae77e1515c08c146dad"},
- {file = "rpds_py-0.20.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f16bc1334853e91ddaaa1217045dd7be166170beec337576818461268a3de67f"},
- {file = "rpds_py-0.20.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14511a539afee6f9ab492b543060c7491c99924314977a55c98bfa2ee29ce78c"},
- {file = "rpds_py-0.20.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3ccb8ac2d3c71cda472b75af42818981bdacf48d2e21c36331b50b4f16930163"},
- {file = "rpds_py-0.20.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c142b88039b92e7e0cb2552e8967077e3179b22359e945574f5e2764c3953dcf"},
- {file = "rpds_py-0.20.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f19169781dddae7478a32301b499b2858bc52fc45a112955e798ee307e294977"},
- {file = "rpds_py-0.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13c56de6518e14b9bf6edde23c4c39dac5b48dcf04160ea7bce8fca8397cdf86"},
- {file = "rpds_py-0.20.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:925d176a549f4832c6f69fa6026071294ab5910e82a0fe6c6228fce17b0706bd"},
- {file = "rpds_py-0.20.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:78f0b6877bfce7a3d1ff150391354a410c55d3cdce386f862926a4958ad5ab7e"},
- {file = "rpds_py-0.20.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3dd645e2b0dcb0fd05bf58e2e54c13875847687d0b71941ad2e757e5d89d4356"},
- {file = "rpds_py-0.20.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4f676e21db2f8c72ff0936f895271e7a700aa1f8d31b40e4e43442ba94973899"},
- {file = "rpds_py-0.20.1-cp310-none-win32.whl", hash = "sha256:648386ddd1e19b4a6abab69139b002bc49ebf065b596119f8f37c38e9ecee8ff"},
- {file = "rpds_py-0.20.1-cp310-none-win_amd64.whl", hash = "sha256:d9ecb51120de61e4604650666d1f2b68444d46ae18fd492245a08f53ad2b7711"},
- {file = "rpds_py-0.20.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:762703bdd2b30983c1d9e62b4c88664df4a8a4d5ec0e9253b0231171f18f6d75"},
- {file = "rpds_py-0.20.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0b581f47257a9fce535c4567782a8976002d6b8afa2c39ff616edf87cbeff712"},
- {file = "rpds_py-0.20.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:842c19a6ce894493563c3bd00d81d5100e8e57d70209e84d5491940fdb8b9e3a"},
- {file = "rpds_py-0.20.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42cbde7789f5c0bcd6816cb29808e36c01b960fb5d29f11e052215aa85497c93"},
- {file = "rpds_py-0.20.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c8e9340ce5a52f95fa7d3b552b35c7e8f3874d74a03a8a69279fd5fca5dc751"},
- {file = "rpds_py-0.20.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ba6f89cac95c0900d932c9efb7f0fb6ca47f6687feec41abcb1bd5e2bd45535"},
- {file = "rpds_py-0.20.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a916087371afd9648e1962e67403c53f9c49ca47b9680adbeef79da3a7811b0"},
- {file = "rpds_py-0.20.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:200a23239781f46149e6a415f1e870c5ef1e712939fe8fa63035cd053ac2638e"},
- {file = "rpds_py-0.20.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:58b1d5dd591973d426cbb2da5e27ba0339209832b2f3315928c9790e13f159e8"},
- {file = "rpds_py-0.20.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:6b73c67850ca7cae0f6c56f71e356d7e9fa25958d3e18a64927c2d930859b8e4"},
- {file = "rpds_py-0.20.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d8761c3c891cc51e90bc9926d6d2f59b27beaf86c74622c8979380a29cc23ac3"},
- {file = "rpds_py-0.20.1-cp311-none-win32.whl", hash = "sha256:cd945871335a639275eee904caef90041568ce3b42f402c6959b460d25ae8732"},
- {file = "rpds_py-0.20.1-cp311-none-win_amd64.whl", hash = "sha256:7e21b7031e17c6b0e445f42ccc77f79a97e2687023c5746bfb7a9e45e0921b84"},
- {file = "rpds_py-0.20.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:36785be22066966a27348444b40389f8444671630063edfb1a2eb04318721e17"},
- {file = "rpds_py-0.20.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:142c0a5124d9bd0e2976089484af5c74f47bd3298f2ed651ef54ea728d2ea42c"},
- {file = "rpds_py-0.20.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dbddc10776ca7ebf2a299c41a4dde8ea0d8e3547bfd731cb87af2e8f5bf8962d"},
- {file = "rpds_py-0.20.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:15a842bb369e00295392e7ce192de9dcbf136954614124a667f9f9f17d6a216f"},
- {file = "rpds_py-0.20.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be5ef2f1fc586a7372bfc355986226484e06d1dc4f9402539872c8bb99e34b01"},
- {file = "rpds_py-0.20.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbcf360c9e3399b056a238523146ea77eeb2a596ce263b8814c900263e46031a"},
- {file = "rpds_py-0.20.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecd27a66740ffd621d20b9a2f2b5ee4129a56e27bfb9458a3bcc2e45794c96cb"},
- {file = "rpds_py-0.20.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0b937b2a1988f184a3e9e577adaa8aede21ec0b38320d6009e02bd026db04fa"},
- {file = "rpds_py-0.20.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6889469bfdc1eddf489729b471303739bf04555bb151fe8875931f8564309afc"},
- {file = "rpds_py-0.20.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:19b73643c802f4eaf13d97f7855d0fb527fbc92ab7013c4ad0e13a6ae0ed23bd"},
- {file = "rpds_py-0.20.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3c6afcf2338e7f374e8edc765c79fbcb4061d02b15dd5f8f314a4af2bdc7feb5"},
- {file = "rpds_py-0.20.1-cp312-none-win32.whl", hash = "sha256:dc73505153798c6f74854aba69cc75953888cf9866465196889c7cdd351e720c"},
- {file = "rpds_py-0.20.1-cp312-none-win_amd64.whl", hash = "sha256:8bbe951244a838a51289ee53a6bae3a07f26d4e179b96fc7ddd3301caf0518eb"},
- {file = "rpds_py-0.20.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:6ca91093a4a8da4afae7fe6a222c3b53ee4eef433ebfee4d54978a103435159e"},
- {file = "rpds_py-0.20.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b9c2fe36d1f758b28121bef29ed1dee9b7a2453e997528e7d1ac99b94892527c"},
- {file = "rpds_py-0.20.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f009c69bc8c53db5dfab72ac760895dc1f2bc1b62ab7408b253c8d1ec52459fc"},
- {file = "rpds_py-0.20.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6740a3e8d43a32629bb9b009017ea5b9e713b7210ba48ac8d4cb6d99d86c8ee8"},
- {file = "rpds_py-0.20.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:32b922e13d4c0080d03e7b62991ad7f5007d9cd74e239c4b16bc85ae8b70252d"},
- {file = "rpds_py-0.20.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe00a9057d100e69b4ae4a094203a708d65b0f345ed546fdef86498bf5390982"},
- {file = "rpds_py-0.20.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49fe9b04b6fa685bd39237d45fad89ba19e9163a1ccaa16611a812e682913496"},
- {file = "rpds_py-0.20.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:aa7ac11e294304e615b43f8c441fee5d40094275ed7311f3420d805fde9b07b4"},
- {file = "rpds_py-0.20.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6aa97af1558a9bef4025f8f5d8c60d712e0a3b13a2fe875511defc6ee77a1ab7"},
- {file = "rpds_py-0.20.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:483b29f6f7ffa6af845107d4efe2e3fa8fb2693de8657bc1849f674296ff6a5a"},
- {file = "rpds_py-0.20.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:37fe0f12aebb6a0e3e17bb4cd356b1286d2d18d2e93b2d39fe647138458b4bcb"},
- {file = "rpds_py-0.20.1-cp313-none-win32.whl", hash = "sha256:a624cc00ef2158e04188df5e3016385b9353638139a06fb77057b3498f794782"},
- {file = "rpds_py-0.20.1-cp313-none-win_amd64.whl", hash = "sha256:b71b8666eeea69d6363248822078c075bac6ed135faa9216aa85f295ff009b1e"},
- {file = "rpds_py-0.20.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:5b48e790e0355865197ad0aca8cde3d8ede347831e1959e158369eb3493d2191"},
- {file = "rpds_py-0.20.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3e310838a5801795207c66c73ea903deda321e6146d6f282e85fa7e3e4854804"},
- {file = "rpds_py-0.20.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2249280b870e6a42c0d972339e9cc22ee98730a99cd7f2f727549af80dd5a963"},
- {file = "rpds_py-0.20.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e79059d67bea28b53d255c1437b25391653263f0e69cd7dec170d778fdbca95e"},
- {file = "rpds_py-0.20.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b431c777c9653e569986ecf69ff4a5dba281cded16043d348bf9ba505486f36"},
- {file = "rpds_py-0.20.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da584ff96ec95e97925174eb8237e32f626e7a1a97888cdd27ee2f1f24dd0ad8"},
- {file = "rpds_py-0.20.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02a0629ec053fc013808a85178524e3cb63a61dbc35b22499870194a63578fb9"},
- {file = "rpds_py-0.20.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fbf15aff64a163db29a91ed0868af181d6f68ec1a3a7d5afcfe4501252840bad"},
- {file = "rpds_py-0.20.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:07924c1b938798797d60c6308fa8ad3b3f0201802f82e4a2c41bb3fafb44cc28"},
- {file = "rpds_py-0.20.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:4a5a844f68776a7715ecb30843b453f07ac89bad393431efbf7accca3ef599c1"},
- {file = "rpds_py-0.20.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:518d2ca43c358929bf08f9079b617f1c2ca6e8848f83c1225c88caeac46e6cbc"},
- {file = "rpds_py-0.20.1-cp38-none-win32.whl", hash = "sha256:3aea7eed3e55119635a74bbeb80b35e776bafccb70d97e8ff838816c124539f1"},
- {file = "rpds_py-0.20.1-cp38-none-win_amd64.whl", hash = "sha256:7dca7081e9a0c3b6490a145593f6fe3173a94197f2cb9891183ef75e9d64c425"},
- {file = "rpds_py-0.20.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:b41b6321805c472f66990c2849e152aff7bc359eb92f781e3f606609eac877ad"},
- {file = "rpds_py-0.20.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0a90c373ea2975519b58dece25853dbcb9779b05cc46b4819cb1917e3b3215b6"},
- {file = "rpds_py-0.20.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16d4477bcb9fbbd7b5b0e4a5d9b493e42026c0bf1f06f723a9353f5153e75d30"},
- {file = "rpds_py-0.20.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:84b8382a90539910b53a6307f7c35697bc7e6ffb25d9c1d4e998a13e842a5e83"},
- {file = "rpds_py-0.20.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4888e117dd41b9d34194d9e31631af70d3d526efc363085e3089ab1a62c32ed1"},
- {file = "rpds_py-0.20.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5265505b3d61a0f56618c9b941dc54dc334dc6e660f1592d112cd103d914a6db"},
- {file = "rpds_py-0.20.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e75ba609dba23f2c95b776efb9dd3f0b78a76a151e96f96cc5b6b1b0004de66f"},
- {file = "rpds_py-0.20.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1791ff70bc975b098fe6ecf04356a10e9e2bd7dc21fa7351c1742fdeb9b4966f"},
- {file = "rpds_py-0.20.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:d126b52e4a473d40232ec2052a8b232270ed1f8c9571aaf33f73a14cc298c24f"},
- {file = "rpds_py-0.20.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:c14937af98c4cc362a1d4374806204dd51b1e12dded1ae30645c298e5a5c4cb1"},
- {file = "rpds_py-0.20.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3d089d0b88996df627693639d123c8158cff41c0651f646cd8fd292c7da90eaf"},
- {file = "rpds_py-0.20.1-cp39-none-win32.whl", hash = "sha256:653647b8838cf83b2e7e6a0364f49af96deec64d2a6578324db58380cff82aca"},
- {file = "rpds_py-0.20.1-cp39-none-win_amd64.whl", hash = "sha256:fa41a64ac5b08b292906e248549ab48b69c5428f3987b09689ab2441f267d04d"},
- {file = "rpds_py-0.20.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7a07ced2b22f0cf0b55a6a510078174c31b6d8544f3bc00c2bcee52b3d613f74"},
- {file = "rpds_py-0.20.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:68cb0a499f2c4a088fd2f521453e22ed3527154136a855c62e148b7883b99f9a"},
- {file = "rpds_py-0.20.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa3060d885657abc549b2a0f8e1b79699290e5d83845141717c6c90c2df38311"},
- {file = "rpds_py-0.20.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:95f3b65d2392e1c5cec27cff08fdc0080270d5a1a4b2ea1d51d5f4a2620ff08d"},
- {file = "rpds_py-0.20.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2cc3712a4b0b76a1d45a9302dd2f53ff339614b1c29603a911318f2357b04dd2"},
- {file = "rpds_py-0.20.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d4eea0761e37485c9b81400437adb11c40e13ef513375bbd6973e34100aeb06"},
- {file = "rpds_py-0.20.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f5179583d7a6cdb981151dd349786cbc318bab54963a192692d945dd3f6435d"},
- {file = "rpds_py-0.20.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2fbb0ffc754490aff6dabbf28064be47f0f9ca0b9755976f945214965b3ace7e"},
- {file = "rpds_py-0.20.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:a94e52537a0e0a85429eda9e49f272ada715506d3b2431f64b8a3e34eb5f3e75"},
- {file = "rpds_py-0.20.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:92b68b79c0da2a980b1c4197e56ac3dd0c8a149b4603747c4378914a68706979"},
- {file = "rpds_py-0.20.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:93da1d3db08a827eda74356f9f58884adb254e59b6664f64cc04cdff2cc19b0d"},
- {file = "rpds_py-0.20.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:754bbed1a4ca48479e9d4182a561d001bbf81543876cdded6f695ec3d465846b"},
- {file = "rpds_py-0.20.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:ca449520e7484534a2a44faf629362cae62b660601432d04c482283c47eaebab"},
- {file = "rpds_py-0.20.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:9c4cb04a16b0f199a8c9bf807269b2f63b7b5b11425e4a6bd44bd6961d28282c"},
- {file = "rpds_py-0.20.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb63804105143c7e24cee7db89e37cb3f3941f8e80c4379a0b355c52a52b6780"},
- {file = "rpds_py-0.20.1-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:55cd1fa4ecfa6d9f14fbd97ac24803e6f73e897c738f771a9fe038f2f11ff07c"},
- {file = "rpds_py-0.20.1-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0f8f741b6292c86059ed175d80eefa80997125b7c478fb8769fd9ac8943a16c0"},
- {file = "rpds_py-0.20.1-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fc212779bf8411667234b3cdd34d53de6c2b8b8b958e1e12cb473a5f367c338"},
- {file = "rpds_py-0.20.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ad56edabcdb428c2e33bbf24f255fe2b43253b7d13a2cdbf05de955217313e6"},
- {file = "rpds_py-0.20.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0a3a1e9ee9728b2c1734f65d6a1d376c6f2f6fdcc13bb007a08cc4b1ff576dc5"},
- {file = "rpds_py-0.20.1-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:e13de156137b7095442b288e72f33503a469aa1980ed856b43c353ac86390519"},
- {file = "rpds_py-0.20.1-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:07f59760ef99f31422c49038964b31c4dfcfeb5d2384ebfc71058a7c9adae2d2"},
- {file = "rpds_py-0.20.1-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:59240685e7da61fb78f65a9f07f8108e36a83317c53f7b276b4175dc44151684"},
- {file = "rpds_py-0.20.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:83cba698cfb3c2c5a7c3c6bac12fe6c6a51aae69513726be6411076185a8b24a"},
- {file = "rpds_py-0.20.1.tar.gz", hash = "sha256:e1791c4aabd117653530dccd24108fa03cc6baf21f58b950d0a73c3b3b29a350"},
+ {file = "rpds_py-0.21.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a017f813f24b9df929674d0332a374d40d7f0162b326562daae8066b502d0590"},
+ {file = "rpds_py-0.21.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:20cc1ed0bcc86d8e1a7e968cce15be45178fd16e2ff656a243145e0b439bd250"},
+ {file = "rpds_py-0.21.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad116dda078d0bc4886cb7840e19811562acdc7a8e296ea6ec37e70326c1b41c"},
+ {file = "rpds_py-0.21.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:808f1ac7cf3b44f81c9475475ceb221f982ef548e44e024ad5f9e7060649540e"},
+ {file = "rpds_py-0.21.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de552f4a1916e520f2703ec474d2b4d3f86d41f353e7680b597512ffe7eac5d0"},
+ {file = "rpds_py-0.21.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:efec946f331349dfc4ae9d0e034c263ddde19414fe5128580f512619abed05f1"},
+ {file = "rpds_py-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b80b4690bbff51a034bfde9c9f6bf9357f0a8c61f548942b80f7b66356508bf5"},
+ {file = "rpds_py-0.21.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:085ed25baac88953d4283e5b5bd094b155075bb40d07c29c4f073e10623f9f2e"},
+ {file = "rpds_py-0.21.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:daa8efac2a1273eed2354397a51216ae1e198ecbce9036fba4e7610b308b6153"},
+ {file = "rpds_py-0.21.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:95a5bad1ac8a5c77b4e658671642e4af3707f095d2b78a1fdd08af0dfb647624"},
+ {file = "rpds_py-0.21.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3e53861b29a13d5b70116ea4230b5f0f3547b2c222c5daa090eb7c9c82d7f664"},
+ {file = "rpds_py-0.21.0-cp310-none-win32.whl", hash = "sha256:ea3a6ac4d74820c98fcc9da4a57847ad2cc36475a8bd9683f32ab6d47a2bd682"},
+ {file = "rpds_py-0.21.0-cp310-none-win_amd64.whl", hash = "sha256:b8f107395f2f1d151181880b69a2869c69e87ec079c49c0016ab96860b6acbe5"},
+ {file = "rpds_py-0.21.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:5555db3e618a77034954b9dc547eae94166391a98eb867905ec8fcbce1308d95"},
+ {file = "rpds_py-0.21.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:97ef67d9bbc3e15584c2f3c74bcf064af36336c10d2e21a2131e123ce0f924c9"},
+ {file = "rpds_py-0.21.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ab2c2a26d2f69cdf833174f4d9d86118edc781ad9a8fa13970b527bf8236027"},
+ {file = "rpds_py-0.21.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4e8921a259f54bfbc755c5bbd60c82bb2339ae0324163f32868f63f0ebb873d9"},
+ {file = "rpds_py-0.21.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8a7ff941004d74d55a47f916afc38494bd1cfd4b53c482b77c03147c91ac0ac3"},
+ {file = "rpds_py-0.21.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5145282a7cd2ac16ea0dc46b82167754d5e103a05614b724457cffe614f25bd8"},
+ {file = "rpds_py-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de609a6f1b682f70bb7163da745ee815d8f230d97276db049ab447767466a09d"},
+ {file = "rpds_py-0.21.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:40c91c6e34cf016fa8e6b59d75e3dbe354830777fcfd74c58b279dceb7975b75"},
+ {file = "rpds_py-0.21.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d2132377f9deef0c4db89e65e8bb28644ff75a18df5293e132a8d67748397b9f"},
+ {file = "rpds_py-0.21.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:0a9e0759e7be10109645a9fddaaad0619d58c9bf30a3f248a2ea57a7c417173a"},
+ {file = "rpds_py-0.21.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9e20da3957bdf7824afdd4b6eeb29510e83e026473e04952dca565170cd1ecc8"},
+ {file = "rpds_py-0.21.0-cp311-none-win32.whl", hash = "sha256:f71009b0d5e94c0e86533c0b27ed7cacc1239cb51c178fd239c3cfefefb0400a"},
+ {file = "rpds_py-0.21.0-cp311-none-win_amd64.whl", hash = "sha256:e168afe6bf6ab7ab46c8c375606298784ecbe3ba31c0980b7dcbb9631dcba97e"},
+ {file = "rpds_py-0.21.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:30b912c965b2aa76ba5168fd610087bad7fcde47f0a8367ee8f1876086ee6d1d"},
+ {file = "rpds_py-0.21.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ca9989d5d9b1b300bc18e1801c67b9f6d2c66b8fd9621b36072ed1df2c977f72"},
+ {file = "rpds_py-0.21.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f54e7106f0001244a5f4cf810ba8d3f9c542e2730821b16e969d6887b664266"},
+ {file = "rpds_py-0.21.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fed5dfefdf384d6fe975cc026886aece4f292feaf69d0eeb716cfd3c5a4dd8be"},
+ {file = "rpds_py-0.21.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:590ef88db231c9c1eece44dcfefd7515d8bf0d986d64d0caf06a81998a9e8cab"},
+ {file = "rpds_py-0.21.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f983e4c2f603c95dde63df633eec42955508eefd8d0f0e6d236d31a044c882d7"},
+ {file = "rpds_py-0.21.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b229ce052ddf1a01c67d68166c19cb004fb3612424921b81c46e7ea7ccf7c3bf"},
+ {file = "rpds_py-0.21.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ebf64e281a06c904a7636781d2e973d1f0926a5b8b480ac658dc0f556e7779f4"},
+ {file = "rpds_py-0.21.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:998a8080c4495e4f72132f3d66ff91f5997d799e86cec6ee05342f8f3cda7dca"},
+ {file = "rpds_py-0.21.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:98486337f7b4f3c324ab402e83453e25bb844f44418c066623db88e4c56b7c7b"},
+ {file = "rpds_py-0.21.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a78d8b634c9df7f8d175451cfeac3810a702ccb85f98ec95797fa98b942cea11"},
+ {file = "rpds_py-0.21.0-cp312-none-win32.whl", hash = "sha256:a58ce66847711c4aa2ecfcfaff04cb0327f907fead8945ffc47d9407f41ff952"},
+ {file = "rpds_py-0.21.0-cp312-none-win_amd64.whl", hash = "sha256:e860f065cc4ea6f256d6f411aba4b1251255366e48e972f8a347cf88077b24fd"},
+ {file = "rpds_py-0.21.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:ee4eafd77cc98d355a0d02f263efc0d3ae3ce4a7c24740010a8b4012bbb24937"},
+ {file = "rpds_py-0.21.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:688c93b77e468d72579351a84b95f976bd7b3e84aa6686be6497045ba84be560"},
+ {file = "rpds_py-0.21.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c38dbf31c57032667dd5a2f0568ccde66e868e8f78d5a0d27dcc56d70f3fcd3b"},
+ {file = "rpds_py-0.21.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2d6129137f43f7fa02d41542ffff4871d4aefa724a5fe38e2c31a4e0fd343fb0"},
+ {file = "rpds_py-0.21.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:520ed8b99b0bf86a176271f6fe23024323862ac674b1ce5b02a72bfeff3fff44"},
+ {file = "rpds_py-0.21.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aaeb25ccfb9b9014a10eaf70904ebf3f79faaa8e60e99e19eef9f478651b9b74"},
+ {file = "rpds_py-0.21.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af04ac89c738e0f0f1b913918024c3eab6e3ace989518ea838807177d38a2e94"},
+ {file = "rpds_py-0.21.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b9b76e2afd585803c53c5b29e992ecd183f68285b62fe2668383a18e74abe7a3"},
+ {file = "rpds_py-0.21.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5afb5efde74c54724e1a01118c6e5c15e54e642c42a1ba588ab1f03544ac8c7a"},
+ {file = "rpds_py-0.21.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:52c041802a6efa625ea18027a0723676a778869481d16803481ef6cc02ea8cb3"},
+ {file = "rpds_py-0.21.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee1e4fc267b437bb89990b2f2abf6c25765b89b72dd4a11e21934df449e0c976"},
+ {file = "rpds_py-0.21.0-cp313-none-win32.whl", hash = "sha256:0c025820b78817db6a76413fff6866790786c38f95ea3f3d3c93dbb73b632202"},
+ {file = "rpds_py-0.21.0-cp313-none-win_amd64.whl", hash = "sha256:320c808df533695326610a1b6a0a6e98f033e49de55d7dc36a13c8a30cfa756e"},
+ {file = "rpds_py-0.21.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:2c51d99c30091f72a3c5d126fad26236c3f75716b8b5e5cf8effb18889ced928"},
+ {file = "rpds_py-0.21.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cbd7504a10b0955ea287114f003b7ad62330c9e65ba012c6223dba646f6ffd05"},
+ {file = "rpds_py-0.21.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6dcc4949be728ede49e6244eabd04064336012b37f5c2200e8ec8eb2988b209c"},
+ {file = "rpds_py-0.21.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f414da5c51bf350e4b7960644617c130140423882305f7574b6cf65a3081cecb"},
+ {file = "rpds_py-0.21.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9afe42102b40007f588666bc7de82451e10c6788f6f70984629db193849dced1"},
+ {file = "rpds_py-0.21.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b929c2bb6e29ab31f12a1117c39f7e6d6450419ab7464a4ea9b0b417174f044"},
+ {file = "rpds_py-0.21.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8404b3717da03cbf773a1d275d01fec84ea007754ed380f63dfc24fb76ce4592"},
+ {file = "rpds_py-0.21.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e12bb09678f38b7597b8346983d2323a6482dcd59e423d9448108c1be37cac9d"},
+ {file = "rpds_py-0.21.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:58a0e345be4b18e6b8501d3b0aa540dad90caeed814c515e5206bb2ec26736fd"},
+ {file = "rpds_py-0.21.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:c3761f62fcfccf0864cc4665b6e7c3f0c626f0380b41b8bd1ce322103fa3ef87"},
+ {file = "rpds_py-0.21.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c2b2f71c6ad6c2e4fc9ed9401080badd1469fa9889657ec3abea42a3d6b2e1ed"},
+ {file = "rpds_py-0.21.0-cp39-none-win32.whl", hash = "sha256:b21747f79f360e790525e6f6438c7569ddbfb1b3197b9e65043f25c3c9b489d8"},
+ {file = "rpds_py-0.21.0-cp39-none-win_amd64.whl", hash = "sha256:0626238a43152918f9e72ede9a3b6ccc9e299adc8ade0d67c5e142d564c9a83d"},
+ {file = "rpds_py-0.21.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:6b4ef7725386dc0762857097f6b7266a6cdd62bfd209664da6712cb26acef035"},
+ {file = "rpds_py-0.21.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:6bc0e697d4d79ab1aacbf20ee5f0df80359ecf55db33ff41481cf3e24f206919"},
+ {file = "rpds_py-0.21.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da52d62a96e61c1c444f3998c434e8b263c384f6d68aca8274d2e08d1906325c"},
+ {file = "rpds_py-0.21.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:98e4fe5db40db87ce1c65031463a760ec7906ab230ad2249b4572c2fc3ef1f9f"},
+ {file = "rpds_py-0.21.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:30bdc973f10d28e0337f71d202ff29345320f8bc49a31c90e6c257e1ccef4333"},
+ {file = "rpds_py-0.21.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:faa5e8496c530f9c71f2b4e1c49758b06e5f4055e17144906245c99fa6d45356"},
+ {file = "rpds_py-0.21.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32eb88c30b6a4f0605508023b7141d043a79b14acb3b969aa0b4f99b25bc7d4a"},
+ {file = "rpds_py-0.21.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a89a8ce9e4e75aeb7fa5d8ad0f3fecdee813802592f4f46a15754dcb2fd6b061"},
+ {file = "rpds_py-0.21.0-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:241e6c125568493f553c3d0fdbb38c74babf54b45cef86439d4cd97ff8feb34d"},
+ {file = "rpds_py-0.21.0-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:3b766a9f57663396e4f34f5140b3595b233a7b146e94777b97a8413a1da1be18"},
+ {file = "rpds_py-0.21.0-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:af4a644bf890f56e41e74be7d34e9511e4954894d544ec6b8efe1e21a1a8da6c"},
+ {file = "rpds_py-0.21.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:3e30a69a706e8ea20444b98a49f386c17b26f860aa9245329bab0851ed100677"},
+ {file = "rpds_py-0.21.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:031819f906bb146561af051c7cef4ba2003d28cff07efacef59da973ff7969ba"},
+ {file = "rpds_py-0.21.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:b876f2bc27ab5954e2fd88890c071bd0ed18b9c50f6ec3de3c50a5ece612f7a6"},
+ {file = "rpds_py-0.21.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc5695c321e518d9f03b7ea6abb5ea3af4567766f9852ad1560f501b17588c7b"},
+ {file = "rpds_py-0.21.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b4de1da871b5c0fd5537b26a6fc6814c3cc05cabe0c941db6e9044ffbb12f04a"},
+ {file = "rpds_py-0.21.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:878f6fea96621fda5303a2867887686d7a198d9e0f8a40be100a63f5d60c88c9"},
+ {file = "rpds_py-0.21.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8eeec67590e94189f434c6d11c426892e396ae59e4801d17a93ac96b8c02a6c"},
+ {file = "rpds_py-0.21.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ff2eba7f6c0cb523d7e9cff0903f2fe1feff8f0b2ceb6bd71c0e20a4dcee271"},
+ {file = "rpds_py-0.21.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a429b99337062877d7875e4ff1a51fe788424d522bd64a8c0a20ef3021fdb6ed"},
+ {file = "rpds_py-0.21.0-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:d167e4dbbdac48bd58893c7e446684ad5d425b407f9336e04ab52e8b9194e2ed"},
+ {file = "rpds_py-0.21.0-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:4eb2de8a147ffe0626bfdc275fc6563aa7bf4b6db59cf0d44f0ccd6ca625a24e"},
+ {file = "rpds_py-0.21.0-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:e78868e98f34f34a88e23ee9ccaeeec460e4eaf6db16d51d7a9b883e5e785a5e"},
+ {file = "rpds_py-0.21.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:4991ca61656e3160cdaca4851151fd3f4a92e9eba5c7a530ab030d6aee96ec89"},
+ {file = "rpds_py-0.21.0.tar.gz", hash = "sha256:ed6378c9d66d0de903763e7706383d60c33829581f0adff47b6535f1802fa6db"},
]
[[package]]
@@ -7788,18 +7851,23 @@ train = ["accelerate (>=0.20.3)", "datasets"]
[[package]]
name = "setuptools"
-version = "70.0.0"
+version = "75.2.0"
description = "Easily download, build, install, upgrade, and uninstall Python packages"
optional = false
python-versions = ">=3.8"
files = [
- {file = "setuptools-70.0.0-py3-none-any.whl", hash = "sha256:54faa7f2e8d2d11bcd2c07bed282eef1046b5c080d1c32add737d7b5817b1ad4"},
- {file = "setuptools-70.0.0.tar.gz", hash = "sha256:f211a66637b8fa059bb28183da127d4e86396c991a942b028c6650d4319c3fd0"},
+ {file = "setuptools-75.2.0-py3-none-any.whl", hash = "sha256:a7fcb66f68b4d9e8e66b42f9876150a3371558f98fa32222ffaa5bced76406f8"},
+ {file = "setuptools-75.2.0.tar.gz", hash = "sha256:753bb6ebf1f465a1912e19ed1d41f403a79173a9acf66a42e7e6aec45c3c16ec"},
]
[package.extras]
-docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
-testing = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"]
+core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"]
+cover = ["pytest-cov"]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
+enabler = ["pytest-enabler (>=2.2)"]
+test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
+type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.11.*)", "pytest-mypy"]
[[package]]
name = "sgmllib3k"
@@ -8115,6 +8183,20 @@ lint = ["mypy", "ruff (==0.5.5)", "types-docutils"]
standalone = ["Sphinx (>=5)"]
test = ["pytest"]
+[[package]]
+name = "sqlglot"
+version = "11.7.1"
+description = "An easily customizable SQL parser and transpiler"
+optional = false
+python-versions = "*"
+files = [
+ {file = "sqlglot-11.7.1-py3-none-any.whl", hash = "sha256:1ed7f5965eb4c917821f8a324af6586432d0019628c2e067958d1470637e1398"},
+ {file = "sqlglot-11.7.1.tar.gz", hash = "sha256:72624837266f3760b17ea9d20abf86835735f17d7eb7351c919b2188dc7905aa"},
+]
+
+[package.extras]
+dev = ["autoflake", "black", "duckdb", "isort", "mypy (>=0.990)", "pandas", "pdoc", "pre-commit", "pyspark", "python-dateutil"]
+
[[package]]
name = "statsmodels"
version = "0.14.4"
@@ -8266,6 +8348,24 @@ files = [
{file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"},
]
+[[package]]
+name = "thrift"
+version = "0.20.0"
+description = "Python bindings for the Apache Thrift RPC system"
+optional = false
+python-versions = "*"
+files = [
+ {file = "thrift-0.20.0.tar.gz", hash = "sha256:4dd662eadf6b8aebe8a41729527bd69adf6ceaa2a8681cbef64d1273b3e8feba"},
+]
+
+[package.dependencies]
+six = ">=1.7.2"
+
+[package.extras]
+all = ["tornado (>=4.0)", "twisted"]
+tornado = ["tornado (>=4.0)"]
+twisted = ["twisted"]
+
[[package]]
name = "tika"
version = "2.6.0"
@@ -8519,13 +8619,13 @@ optree = ["optree (>=0.9.1)"]
[[package]]
name = "tqdm"
-version = "4.66.6"
+version = "4.67.0"
description = "Fast, Extensible Progress Meter"
optional = false
python-versions = ">=3.7"
files = [
- {file = "tqdm-4.66.6-py3-none-any.whl", hash = "sha256:223e8b5359c2efc4b30555531f09e9f2f3589bcd7fdd389271191031b49b7a63"},
- {file = "tqdm-4.66.6.tar.gz", hash = "sha256:4bdd694238bef1485ce839d67967ab50af8f9272aab687c0d7702a01da0be090"},
+ {file = "tqdm-4.67.0-py3-none-any.whl", hash = "sha256:0cd8af9d56911acab92182e88d763100d4788bdf421d251616040cc4d44863be"},
+ {file = "tqdm-4.67.0.tar.gz", hash = "sha256:fe5a6f95e6fe0b9755e9469b77b9c3cf850048224ecaa8293d7d2d31f97d869a"},
]
[package.dependencies]
@@ -8533,6 +8633,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""}
[package.extras]
dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"]
+discord = ["requests"]
notebook = ["ipywidgets (>=6)"]
slack = ["slack-sdk"]
telegram = ["requests"]
@@ -8679,13 +8780,13 @@ tutorials = ["matplotlib", "pandas", "tabulate", "torch"]
[[package]]
name = "typer"
-version = "0.12.5"
+version = "0.13.0"
description = "Typer, build great CLIs. Easy to code. Based on Python type hints."
optional = false
python-versions = ">=3.7"
files = [
- {file = "typer-0.12.5-py3-none-any.whl", hash = "sha256:62fe4e471711b147e3365034133904df3e235698399bc4de2b36c8579298d52b"},
- {file = "typer-0.12.5.tar.gz", hash = "sha256:f592f089bedcc8ec1b974125d64851029c3b1af145f04aca64d69410f0c9b722"},
+ {file = "typer-0.13.0-py3-none-any.whl", hash = "sha256:d85fe0b777b2517cc99c8055ed735452f2659cd45e451507c76f48ce5c1d00e2"},
+ {file = "typer-0.13.0.tar.gz", hash = "sha256:f1c7198347939361eec90139ffa0fd8b3df3a2259d5852a0f7400e476d95985c"},
]
[package.dependencies]
@@ -9459,13 +9560,13 @@ pyjwt = ">=2.8.0,<2.9.0"
[[package]]
name = "zipp"
-version = "3.20.2"
+version = "3.21.0"
description = "Backport of pathlib-compatible object wrapper for zip files"
optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
files = [
- {file = "zipp-3.20.2-py3-none-any.whl", hash = "sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350"},
- {file = "zipp-3.20.2.tar.gz", hash = "sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29"},
+ {file = "zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931"},
+ {file = "zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4"},
]
[package.extras]
@@ -9516,4 +9617,4 @@ files = [
[metadata]
lock-version = "2.0"
python-versions = ">=3.11,<3.13"
-content-hash = "20e083f02ca704eafe149d84e1ad5d1c53cf19a413017c2ca3044117caf1e02e"
+content-hash = "ddd01c5c6f54ee067adc0e1fc10c04f772bde2685a3a42421ba23dfac8000369"
diff --git a/pyproject.toml b/pyproject.toml
index 3cb576492a3..afcfe777763 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,22 +46,23 @@ hanziconv = "0.3.2"
html-text = "0.6.2"
httpx = "0.27.0"
huggingface-hub = "^0.25.0"
-infinity-emb = "0.0.51"
+infinity-sdk = "0.5.0.dev2"
+infinity-emb = "^0.0.66"
itsdangerous = "2.1.2"
markdown = "3.6"
markdown-to-json = "2.1.1"
minio = "7.2.4"
mistralai = "0.4.2"
nltk = "3.9.1"
-numpy = "1.26.4"
+numpy = "^1.26.0"
ollama = "0.2.1"
onnxruntime = "1.19.2"
openai = "1.45.0"
opencv-python = "4.10.0.84"
opencv-python-headless = "4.10.0.84"
-openpyxl = "3.1.2"
+openpyxl = "^3.1.0"
ormsgpack = "1.5.0"
-pandas = "2.2.2"
+pandas = "^2.2.0"
pdfplumber = "0.10.4"
peewee = "3.17.1"
pillow = "10.4.0"
@@ -70,7 +71,7 @@ psycopg2-binary = "2.9.9"
pyclipper = "1.3.0.post5"
pycryptodomex = "3.20.0"
pypdf = "^5.0.0"
-pytest = "8.2.2"
+pytest = "^8.3.0"
python-dotenv = "1.0.1"
python-dateutil = "2.8.2"
python-pptx = "^1.0.2"
@@ -86,7 +87,7 @@ ruamel-base = "1.0.0"
scholarly = "1.7.11"
scikit-learn = "1.5.0"
selenium = "4.22.0"
-setuptools = "70.0.0"
+setuptools = "^75.2.0"
shapely = "2.0.5"
six = "1.16.0"
strenum = "0.4.15"
@@ -115,6 +116,7 @@ pymysql = "^1.1.1"
mini-racer = "^0.12.4"
pyicu = "^2.13.1"
flasgger = "^0.9.7.1"
+polars = "^1.9.0"
[tool.poetry.group.full]
diff --git a/rag/app/presentation.py b/rag/app/presentation.py
index af8c59387c6..54d897616f9 100644
--- a/rag/app/presentation.py
+++ b/rag/app/presentation.py
@@ -20,6 +20,7 @@
from rag.nlp import rag_tokenizer
from deepdoc.parser import PdfParser, PptParser, PlainParser
from PyPDF2 import PdfReader as pdf2_read
+import json
class Ppt(PptParser):
@@ -107,9 +108,9 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
d = copy.deepcopy(doc)
pn += from_page
d["image"] = img
- d["page_num_int"] = [pn + 1]
- d["top_int"] = [0]
- d["position_int"] = [(pn + 1, 0, img.size[0], 0, img.size[1])]
+ d["page_num_list"] = json.dumps([pn + 1])
+ d["top_list"] = json.dumps([0])
+ d["position_list"] = json.dumps([(pn + 1, 0, img.size[0], 0, img.size[1])])
tokenize(d, txt, eng)
res.append(d)
return res
@@ -123,10 +124,10 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
pn += from_page
if img:
d["image"] = img
- d["page_num_int"] = [pn + 1]
- d["top_int"] = [0]
- d["position_int"] = [
- (pn + 1, 0, img.size[0] if img else 0, 0, img.size[1] if img else 0)]
+ d["page_num_list"] = json.dumps([pn + 1])
+ d["top_list"] = json.dumps([0])
+ d["position_list"] = json.dumps([
+ (pn + 1, 0, img.size[0] if img else 0, 0, img.size[1] if img else 0)])
tokenize(d, txt, eng)
res.append(d)
return res
diff --git a/rag/app/table.py b/rag/app/table.py
index c03ccaff37c..b5148983ce7 100644
--- a/rag/app/table.py
+++ b/rag/app/table.py
@@ -74,7 +74,7 @@ def __call__(self, fnm, binary=None, from_page=0,
def trans_datatime(s):
try:
return datetime_parse(s.strip()).strftime("%Y-%m-%d %H:%M:%S")
- except Exception as e:
+ except Exception:
pass
@@ -112,7 +112,7 @@ def column_data_type(arr):
continue
try:
arr[i] = trans[ty](str(arr[i]))
- except Exception as e:
+ except Exception:
arr[i] = None
# if ty == "text":
# if len(arr) > 128 and uni / len(arr) < 0.1:
@@ -182,7 +182,7 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000,
"datetime": "_dt",
"bool": "_kwd"}
for df in dfs:
- for n in ["id", "_id", "index", "idx"]:
+ for n in ["id", "index", "idx"]:
if n in df.columns:
del df[n]
clmns = df.columns.values
diff --git a/rag/benchmark.py b/rag/benchmark.py
index ce1f2b4618d..c26ea8251e2 100644
--- a/rag/benchmark.py
+++ b/rag/benchmark.py
@@ -1,280 +1,310 @@
-#
-# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-import json
-import os
-from collections import defaultdict
-from concurrent.futures import ThreadPoolExecutor
-from copy import deepcopy
-
-from api.db import LLMType
-from api.db.services.llm_service import LLMBundle
-from api.db.services.knowledgebase_service import KnowledgebaseService
-from api.settings import retrievaler
-from api.utils import get_uuid
-from api.utils.file_utils import get_project_base_directory
-from rag.nlp import tokenize, search
-from rag.utils.es_conn import ELASTICSEARCH
-from ranx import evaluate
-import pandas as pd
-from tqdm import tqdm
-from ranx import Qrels, Run
-
-
-class Benchmark:
- def __init__(self, kb_id):
- e, self.kb = KnowledgebaseService.get_by_id(kb_id)
- self.similarity_threshold = self.kb.similarity_threshold
- self.vector_similarity_weight = self.kb.vector_similarity_weight
- self.embd_mdl = LLMBundle(self.kb.tenant_id, LLMType.EMBEDDING, llm_name=self.kb.embd_id, lang=self.kb.language)
-
- def _get_benchmarks(self, query, dataset_idxnm, count=16):
-
- req = {"question": query, "size": count, "vector": True, "similarity": self.similarity_threshold}
- sres = retrievaler.search(req, search.index_name(dataset_idxnm), self.embd_mdl)
- return sres
-
- def _get_retrieval(self, qrels, dataset_idxnm):
- run = defaultdict(dict)
- query_list = list(qrels.keys())
- for query in query_list:
-
- ranks = retrievaler.retrieval(query, self.embd_mdl,
- dataset_idxnm, [self.kb.id], 1, 30,
- 0.0, self.vector_similarity_weight)
- for c in ranks["chunks"]:
- if "vector" in c:
- del c["vector"]
- run[query][c["chunk_id"]] = c["similarity"]
-
- return run
-
- def embedding(self, docs, batch_size=16):
- vects = []
- cnts = [d["content_with_weight"] for d in docs]
- for i in range(0, len(cnts), batch_size):
- vts, c = self.embd_mdl.encode(cnts[i: i + batch_size])
- vects.extend(vts.tolist())
- assert len(docs) == len(vects)
- for i, d in enumerate(docs):
- v = vects[i]
- d["q_%d_vec" % len(v)] = v
- return docs
-
- @staticmethod
- def init_kb(index_name):
- idxnm = search.index_name(index_name)
- if ELASTICSEARCH.indexExist(idxnm):
- ELASTICSEARCH.deleteIdx(search.index_name(index_name))
-
- return ELASTICSEARCH.createIdx(idxnm, json.load(
- open(os.path.join(get_project_base_directory(), "conf", "mapping.json"), "r")))
-
- def ms_marco_index(self, file_path, index_name):
- qrels = defaultdict(dict)
- texts = defaultdict(dict)
- docs = []
- filelist = os.listdir(file_path)
- self.init_kb(index_name)
-
- max_workers = int(os.environ.get('MAX_WORKERS', 3))
- exe = ThreadPoolExecutor(max_workers=max_workers)
- threads = []
-
- def slow_actions(es_docs, idx_nm):
- es_docs = self.embedding(es_docs)
- ELASTICSEARCH.bulk(es_docs, idx_nm)
- return True
-
- for dir in filelist:
- data = pd.read_parquet(os.path.join(file_path, dir))
- for i in tqdm(range(len(data)), colour="green", desc="Tokenizing:" + dir):
-
- query = data.iloc[i]['query']
- for rel, text in zip(data.iloc[i]['passages']['is_selected'], data.iloc[i]['passages']['passage_text']):
- d = {
- "id": get_uuid(),
- "kb_id": self.kb.id,
- "docnm_kwd": "xxxxx",
- "doc_id": "ksksks"
- }
- tokenize(d, text, "english")
- docs.append(d)
- texts[d["id"]] = text
- qrels[query][d["id"]] = int(rel)
- if len(docs) >= 32:
- threads.append(
- exe.submit(slow_actions, deepcopy(docs), search.index_name(index_name)))
- docs = []
-
- threads.append(
- exe.submit(slow_actions, deepcopy(docs), search.index_name(index_name)))
-
- for i in tqdm(range(len(threads)), colour="red", desc="Indexing:" + dir):
- if not threads[i].result().output:
- print("Indexing error...")
-
- return qrels, texts
-
- def trivia_qa_index(self, file_path, index_name):
- qrels = defaultdict(dict)
- texts = defaultdict(dict)
- docs = []
- filelist = os.listdir(file_path)
- for dir in filelist:
- data = pd.read_parquet(os.path.join(file_path, dir))
- for i in tqdm(range(len(data)), colour="green", desc="Indexing:" + dir):
- query = data.iloc[i]['question']
- for rel, text in zip(data.iloc[i]["search_results"]['rank'],
- data.iloc[i]["search_results"]['search_context']):
- d = {
- "id": get_uuid(),
- "kb_id": self.kb.id,
- "docnm_kwd": "xxxxx",
- "doc_id": "ksksks"
- }
- tokenize(d, text, "english")
- docs.append(d)
- texts[d["id"]] = text
- qrels[query][d["id"]] = int(rel)
- if len(docs) >= 32:
- docs = self.embedding(docs)
- ELASTICSEARCH.bulk(docs, search.index_name(index_name))
- docs = []
-
- docs = self.embedding(docs)
- ELASTICSEARCH.bulk(docs, search.index_name(index_name))
- return qrels, texts
-
- def miracl_index(self, file_path, corpus_path, index_name):
-
- corpus_total = {}
- for corpus_file in os.listdir(corpus_path):
- tmp_data = pd.read_json(os.path.join(corpus_path, corpus_file), lines=True)
- for index, i in tmp_data.iterrows():
- corpus_total[i['docid']] = i['text']
-
- topics_total = {}
- for topics_file in os.listdir(os.path.join(file_path, 'topics')):
- if 'test' in topics_file:
- continue
- tmp_data = pd.read_csv(os.path.join(file_path, 'topics', topics_file), sep='\t', names=['qid', 'query'])
- for index, i in tmp_data.iterrows():
- topics_total[i['qid']] = i['query']
-
- qrels = defaultdict(dict)
- texts = defaultdict(dict)
- docs = []
- for qrels_file in os.listdir(os.path.join(file_path, 'qrels')):
- if 'test' in qrels_file:
- continue
-
- tmp_data = pd.read_csv(os.path.join(file_path, 'qrels', qrels_file), sep='\t',
- names=['qid', 'Q0', 'docid', 'relevance'])
- for i in tqdm(range(len(tmp_data)), colour="green", desc="Indexing:" + qrels_file):
- query = topics_total[tmp_data.iloc[i]['qid']]
- text = corpus_total[tmp_data.iloc[i]['docid']]
- rel = tmp_data.iloc[i]['relevance']
- d = {
- "id": get_uuid(),
- "kb_id": self.kb.id,
- "docnm_kwd": "xxxxx",
- "doc_id": "ksksks"
- }
- tokenize(d, text, 'english')
- docs.append(d)
- texts[d["id"]] = text
- qrels[query][d["id"]] = int(rel)
- if len(docs) >= 32:
- docs = self.embedding(docs)
- ELASTICSEARCH.bulk(docs, search.index_name(index_name))
- docs = []
-
- docs = self.embedding(docs)
- ELASTICSEARCH.bulk(docs, search.index_name(index_name))
-
- return qrels, texts
-
- def save_results(self, qrels, run, texts, dataset, file_path):
- keep_result = []
- run_keys = list(run.keys())
- for run_i in tqdm(range(len(run_keys)), desc="Calculating ndcg@10 for single query"):
- key = run_keys[run_i]
- keep_result.append({'query': key, 'qrel': qrels[key], 'run': run[key],
- 'ndcg@10': evaluate(Qrels({key: qrels[key]}), Run({key: run[key]}), "ndcg@10")})
- keep_result = sorted(keep_result, key=lambda kk: kk['ndcg@10'])
- with open(os.path.join(file_path, dataset + 'result.md'), 'w', encoding='utf-8') as f:
- f.write('## Score For Every Query\n')
- for keep_result_i in keep_result:
- f.write('### query: ' + keep_result_i['query'] + ' ndcg@10:' + str(keep_result_i['ndcg@10']) + '\n')
- scores = [[i[0], i[1]] for i in keep_result_i['run'].items()]
- scores = sorted(scores, key=lambda kk: kk[1])
- for score in scores[:10]:
- f.write('- text: ' + str(texts[score[0]]) + '\t qrel: ' + str(score[1]) + '\n')
- json.dump(qrels, open(os.path.join(file_path, dataset + '.qrels.json'), "w+"), indent=2)
- json.dump(run, open(os.path.join(file_path, dataset + '.run.json'), "w+"), indent=2)
- print(os.path.join(file_path, dataset + '_result.md'), 'Saved!')
-
- def __call__(self, dataset, file_path, miracl_corpus=''):
- if dataset == "ms_marco_v1.1":
- qrels, texts = self.ms_marco_index(file_path, "benchmark_ms_marco_v1.1")
- run = self._get_retrieval(qrels, "benchmark_ms_marco_v1.1")
- print(dataset, evaluate(Qrels(qrels), Run(run), ["ndcg@10", "map@5", "mrr"]))
- self.save_results(qrels, run, texts, dataset, file_path)
- if dataset == "trivia_qa":
- qrels, texts = self.trivia_qa_index(file_path, "benchmark_trivia_qa")
- run = self._get_retrieval(qrels, "benchmark_trivia_qa")
- print(dataset, evaluate((qrels), Run(run), ["ndcg@10", "map@5", "mrr"]))
- self.save_results(qrels, run, texts, dataset, file_path)
- if dataset == "miracl":
- for lang in ['ar', 'bn', 'de', 'en', 'es', 'fa', 'fi', 'fr', 'hi', 'id', 'ja', 'ko', 'ru', 'sw', 'te', 'th',
- 'yo', 'zh']:
- if not os.path.isdir(os.path.join(file_path, 'miracl-v1.0-' + lang)):
- print('Directory: ' + os.path.join(file_path, 'miracl-v1.0-' + lang) + ' not found!')
- continue
- if not os.path.isdir(os.path.join(file_path, 'miracl-v1.0-' + lang, 'qrels')):
- print('Directory: ' + os.path.join(file_path, 'miracl-v1.0-' + lang, 'qrels') + 'not found!')
- continue
- if not os.path.isdir(os.path.join(file_path, 'miracl-v1.0-' + lang, 'topics')):
- print('Directory: ' + os.path.join(file_path, 'miracl-v1.0-' + lang, 'topics') + 'not found!')
- continue
- if not os.path.isdir(os.path.join(miracl_corpus, 'miracl-corpus-v1.0-' + lang)):
- print('Directory: ' + os.path.join(miracl_corpus, 'miracl-corpus-v1.0-' + lang) + ' not found!')
- continue
- qrels, texts = self.miracl_index(os.path.join(file_path, 'miracl-v1.0-' + lang),
- os.path.join(miracl_corpus, 'miracl-corpus-v1.0-' + lang),
- "benchmark_miracl_" + lang)
- run = self._get_retrieval(qrels, "benchmark_miracl_" + lang)
- print(dataset, evaluate(Qrels(qrels), Run(run), ["ndcg@10", "map@5", "mrr"]))
- self.save_results(qrels, run, texts, dataset, file_path)
-
-
-if __name__ == '__main__':
- print('*****************RAGFlow Benchmark*****************')
- kb_id = input('Please input kb_id:\n')
- ex = Benchmark(kb_id)
- dataset = input(
- 'RAGFlow Benchmark Support:\n\tms_marco_v1.1:\n\ttrivia_qa:\n\tmiracl:\nPlease input dataset choice:\n')
- if dataset in ['ms_marco_v1.1', 'trivia_qa']:
- if dataset == "ms_marco_v1.1":
- print("Notice: Please provide the ms_marco_v1.1 dataset only. ms_marco_v2.1 is not supported!")
- dataset_path = input('Please input ' + dataset + ' dataset path:\n')
- ex(dataset, dataset_path)
- elif dataset == 'miracl':
- dataset_path = input('Please input ' + dataset + ' dataset path:\n')
- corpus_path = input('Please input ' + dataset + '-corpus dataset path:\n')
- ex(dataset, dataset_path, miracl_corpus=corpus_path)
- else:
- print("Dataset: ", dataset, "not supported!")
+#
+# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import json
+import os
+import sys
+import time
+import argparse
+from collections import defaultdict
+
+from api.db import LLMType
+from api.db.services.llm_service import LLMBundle
+from api.db.services.knowledgebase_service import KnowledgebaseService
+from api.settings import retrievaler, docStoreConn
+from api.utils import get_uuid
+from rag.nlp import tokenize, search
+from ranx import evaluate
+import pandas as pd
+from tqdm import tqdm
+
+global max_docs
+max_docs = sys.maxsize
+
+class Benchmark:
+ def __init__(self, kb_id):
+ self.kb_id = kb_id
+ e, self.kb = KnowledgebaseService.get_by_id(kb_id)
+ self.similarity_threshold = self.kb.similarity_threshold
+ self.vector_similarity_weight = self.kb.vector_similarity_weight
+ self.embd_mdl = LLMBundle(self.kb.tenant_id, LLMType.EMBEDDING, llm_name=self.kb.embd_id, lang=self.kb.language)
+ self.tenant_id = ''
+ self.index_name = ''
+ self.initialized_index = False
+
+ def _get_retrieval(self, qrels):
+ # Need to wait for the ES and Infinity index to be ready
+ time.sleep(20)
+ run = defaultdict(dict)
+ query_list = list(qrels.keys())
+ for query in query_list:
+ ranks = retrievaler.retrieval(query, self.embd_mdl, self.tenant_id, [self.kb.id], 1, 30,
+ 0.0, self.vector_similarity_weight)
+ if len(ranks["chunks"]) == 0:
+ print(f"deleted query: {query}")
+ del qrels[query]
+ continue
+ for c in ranks["chunks"]:
+ if "vector" in c:
+ del c["vector"]
+ run[query][c["chunk_id"]] = c["similarity"]
+ return run
+
+ def embedding(self, docs, batch_size=16):
+ vects = []
+ cnts = [d["content_with_weight"] for d in docs]
+ for i in range(0, len(cnts), batch_size):
+ vts, c = self.embd_mdl.encode(cnts[i: i + batch_size])
+ vects.extend(vts.tolist())
+ assert len(docs) == len(vects)
+ vector_size = 0
+ for i, d in enumerate(docs):
+ v = vects[i]
+ vector_size = len(v)
+ d["q_%d_vec" % len(v)] = v
+ return docs, vector_size
+
+ def init_index(self, vector_size: int):
+ if self.initialized_index:
+ return
+ if docStoreConn.indexExist(self.index_name, self.kb_id):
+ docStoreConn.deleteIdx(self.index_name, self.kb_id)
+ docStoreConn.createIdx(self.index_name, self.kb_id, vector_size)
+ self.initialized_index = True
+
+ def ms_marco_index(self, file_path, index_name):
+ qrels = defaultdict(dict)
+ texts = defaultdict(dict)
+ docs_count = 0
+ docs = []
+ filelist = sorted(os.listdir(file_path))
+
+ for fn in filelist:
+ if docs_count >= max_docs:
+ break
+ if not fn.endswith(".parquet"):
+ continue
+ data = pd.read_parquet(os.path.join(file_path, fn))
+ for i in tqdm(range(len(data)), colour="green", desc="Tokenizing:" + fn):
+ if docs_count >= max_docs:
+ break
+ query = data.iloc[i]['query']
+ for rel, text in zip(data.iloc[i]['passages']['is_selected'], data.iloc[i]['passages']['passage_text']):
+ d = {
+ "id": get_uuid(),
+ "kb_id": self.kb.id,
+ "docnm_kwd": "xxxxx",
+ "doc_id": "ksksks"
+ }
+ tokenize(d, text, "english")
+ docs.append(d)
+ texts[d["id"]] = text
+ qrels[query][d["id"]] = int(rel)
+ if len(docs) >= 32:
+ docs_count += len(docs)
+ docs, vector_size = self.embedding(docs)
+ self.init_index(vector_size)
+ docStoreConn.insert(docs, self.index_name, self.kb_id)
+ docs = []
+
+ if docs:
+ docs, vector_size = self.embedding(docs)
+ self.init_index(vector_size)
+ docStoreConn.insert(docs, self.index_name, self.kb_id)
+ return qrels, texts
+
+ def trivia_qa_index(self, file_path, index_name):
+ qrels = defaultdict(dict)
+ texts = defaultdict(dict)
+ docs_count = 0
+ docs = []
+ filelist = sorted(os.listdir(file_path))
+ for fn in filelist:
+ if docs_count >= max_docs:
+ break
+ if not fn.endswith(".parquet"):
+ continue
+ data = pd.read_parquet(os.path.join(file_path, fn))
+ for i in tqdm(range(len(data)), colour="green", desc="Indexing:" + fn):
+ if docs_count >= max_docs:
+ break
+ query = data.iloc[i]['question']
+ for rel, text in zip(data.iloc[i]["search_results"]['rank'],
+ data.iloc[i]["search_results"]['search_context']):
+ d = {
+ "id": get_uuid(),
+ "kb_id": self.kb.id,
+ "docnm_kwd": "xxxxx",
+ "doc_id": "ksksks"
+ }
+ tokenize(d, text, "english")
+ docs.append(d)
+ texts[d["id"]] = text
+ qrels[query][d["id"]] = int(rel)
+ if len(docs) >= 32:
+ docs_count += len(docs)
+ docs, vector_size = self.embedding(docs)
+ self.init_index(vector_size)
+ docStoreConn.insert(docs,self.index_name)
+ docs = []
+
+ docs, vector_size = self.embedding(docs)
+ self.init_index(vector_size)
+ docStoreConn.insert(docs, self.index_name)
+ return qrels, texts
+
+ def miracl_index(self, file_path, corpus_path, index_name):
+ corpus_total = {}
+ for corpus_file in os.listdir(corpus_path):
+ tmp_data = pd.read_json(os.path.join(corpus_path, corpus_file), lines=True)
+ for index, i in tmp_data.iterrows():
+ corpus_total[i['docid']] = i['text']
+
+ topics_total = {}
+ for topics_file in os.listdir(os.path.join(file_path, 'topics')):
+ if 'test' in topics_file:
+ continue
+ tmp_data = pd.read_csv(os.path.join(file_path, 'topics', topics_file), sep='\t', names=['qid', 'query'])
+ for index, i in tmp_data.iterrows():
+ topics_total[i['qid']] = i['query']
+
+ qrels = defaultdict(dict)
+ texts = defaultdict(dict)
+ docs_count = 0
+ docs = []
+ for qrels_file in os.listdir(os.path.join(file_path, 'qrels')):
+ if 'test' in qrels_file:
+ continue
+ if docs_count >= max_docs:
+ break
+
+ tmp_data = pd.read_csv(os.path.join(file_path, 'qrels', qrels_file), sep='\t',
+ names=['qid', 'Q0', 'docid', 'relevance'])
+ for i in tqdm(range(len(tmp_data)), colour="green", desc="Indexing:" + qrels_file):
+ if docs_count >= max_docs:
+ break
+ query = topics_total[tmp_data.iloc[i]['qid']]
+ text = corpus_total[tmp_data.iloc[i]['docid']]
+ rel = tmp_data.iloc[i]['relevance']
+ d = {
+ "id": get_uuid(),
+ "kb_id": self.kb.id,
+ "docnm_kwd": "xxxxx",
+ "doc_id": "ksksks"
+ }
+ tokenize(d, text, 'english')
+ docs.append(d)
+ texts[d["id"]] = text
+ qrels[query][d["id"]] = int(rel)
+ if len(docs) >= 32:
+ docs_count += len(docs)
+ docs, vector_size = self.embedding(docs)
+ self.init_index(vector_size)
+ docStoreConn.insert(docs, self.index_name)
+ docs = []
+
+ docs, vector_size = self.embedding(docs)
+ self.init_index(vector_size)
+ docStoreConn.insert(docs, self.index_name)
+ return qrels, texts
+
+ def save_results(self, qrels, run, texts, dataset, file_path):
+ keep_result = []
+ run_keys = list(run.keys())
+ for run_i in tqdm(range(len(run_keys)), desc="Calculating ndcg@10 for single query"):
+ key = run_keys[run_i]
+ keep_result.append({'query': key, 'qrel': qrels[key], 'run': run[key],
+ 'ndcg@10': evaluate({key: qrels[key]}, {key: run[key]}, "ndcg@10")})
+ keep_result = sorted(keep_result, key=lambda kk: kk['ndcg@10'])
+ with open(os.path.join(file_path, dataset + 'result.md'), 'w', encoding='utf-8') as f:
+ f.write('## Score For Every Query\n')
+ for keep_result_i in keep_result:
+ f.write('### query: ' + keep_result_i['query'] + ' ndcg@10:' + str(keep_result_i['ndcg@10']) + '\n')
+ scores = [[i[0], i[1]] for i in keep_result_i['run'].items()]
+ scores = sorted(scores, key=lambda kk: kk[1])
+ for score in scores[:10]:
+ f.write('- text: ' + str(texts[score[0]]) + '\t qrel: ' + str(score[1]) + '\n')
+ json.dump(qrels, open(os.path.join(file_path, dataset + '.qrels.json'), "w+"), indent=2)
+ json.dump(run, open(os.path.join(file_path, dataset + '.run.json'), "w+"), indent=2)
+ print(os.path.join(file_path, dataset + '_result.md'), 'Saved!')
+
+ def __call__(self, dataset, file_path, miracl_corpus=''):
+ if dataset == "ms_marco_v1.1":
+ self.tenant_id = "benchmark_ms_marco_v11"
+ self.index_name = search.index_name(self.tenant_id)
+ qrels, texts = self.ms_marco_index(file_path, "benchmark_ms_marco_v1.1")
+ run = self._get_retrieval(qrels)
+ print(dataset, evaluate(qrels, run, ["ndcg@10", "map@5", "mrr"]))
+ self.save_results(qrels, run, texts, dataset, file_path)
+ if dataset == "trivia_qa":
+ self.tenant_id = "benchmark_trivia_qa"
+ self.index_name = search.index_name(self.tenant_id)
+ qrels, texts = self.trivia_qa_index(file_path, "benchmark_trivia_qa")
+ run = self._get_retrieval(qrels)
+ print(dataset, evaluate(qrels, run, ["ndcg@10", "map@5", "mrr"]))
+ self.save_results(qrels, run, texts, dataset, file_path)
+ if dataset == "miracl":
+ for lang in ['ar', 'bn', 'de', 'en', 'es', 'fa', 'fi', 'fr', 'hi', 'id', 'ja', 'ko', 'ru', 'sw', 'te', 'th',
+ 'yo', 'zh']:
+ if not os.path.isdir(os.path.join(file_path, 'miracl-v1.0-' + lang)):
+ print('Directory: ' + os.path.join(file_path, 'miracl-v1.0-' + lang) + ' not found!')
+ continue
+ if not os.path.isdir(os.path.join(file_path, 'miracl-v1.0-' + lang, 'qrels')):
+ print('Directory: ' + os.path.join(file_path, 'miracl-v1.0-' + lang, 'qrels') + 'not found!')
+ continue
+ if not os.path.isdir(os.path.join(file_path, 'miracl-v1.0-' + lang, 'topics')):
+ print('Directory: ' + os.path.join(file_path, 'miracl-v1.0-' + lang, 'topics') + 'not found!')
+ continue
+ if not os.path.isdir(os.path.join(miracl_corpus, 'miracl-corpus-v1.0-' + lang)):
+ print('Directory: ' + os.path.join(miracl_corpus, 'miracl-corpus-v1.0-' + lang) + ' not found!')
+ continue
+ self.tenant_id = "benchmark_miracl_" + lang
+ self.index_name = search.index_name(self.tenant_id)
+ self.initialized_index = False
+ qrels, texts = self.miracl_index(os.path.join(file_path, 'miracl-v1.0-' + lang),
+ os.path.join(miracl_corpus, 'miracl-corpus-v1.0-' + lang),
+ "benchmark_miracl_" + lang)
+ run = self._get_retrieval(qrels)
+ print(dataset, evaluate(qrels, run, ["ndcg@10", "map@5", "mrr"]))
+ self.save_results(qrels, run, texts, dataset, file_path)
+
+
+if __name__ == '__main__':
+ print('*****************RAGFlow Benchmark*****************')
+ parser = argparse.ArgumentParser(usage="benchmark.py [])", description='RAGFlow Benchmark')
+ parser.add_argument('max_docs', metavar='max_docs', type=int, help='max docs to evaluate')
+ parser.add_argument('kb_id', metavar='kb_id', help='knowledgebase id')
+ parser.add_argument('dataset', metavar='dataset', help='dataset name, shall be one of ms_marco_v1.1(https://huggingface.co/datasets/microsoft/ms_marco), trivia_qa(https://huggingface.co/datasets/mandarjoshi/trivia_qa>), miracl(https://huggingface.co/datasets/miracl/miracl')
+ parser.add_argument('dataset_path', metavar='dataset_path', help='dataset path')
+ parser.add_argument('miracl_corpus_path', metavar='miracl_corpus_path', nargs='?', default="", help='miracl corpus path. Only needed when dataset is miracl')
+
+ args = parser.parse_args()
+ max_docs = args.max_docs
+ kb_id = args.kb_id
+ ex = Benchmark(kb_id)
+
+ dataset = args.dataset
+ dataset_path = args.dataset_path
+
+ if dataset == "ms_marco_v1.1" or dataset == "trivia_qa":
+ ex(dataset, dataset_path)
+ elif dataset == "miracl":
+ if len(args) < 5:
+ print('Please input the correct parameters!')
+ exit(1)
+ miracl_corpus_path = args[4]
+ ex(dataset, dataset_path, miracl_corpus=args.miracl_corpus_path)
+ else:
+ print("Dataset: ", dataset, "not supported!")
diff --git a/rag/nlp/__init__.py b/rag/nlp/__init__.py
index 7c7b6cec629..03fca45f11a 100644
--- a/rag/nlp/__init__.py
+++ b/rag/nlp/__init__.py
@@ -25,6 +25,7 @@
from word2number import w2n
from cn2an import cn2an
from PIL import Image
+import json
all_codecs = [
'utf-8', 'gb2312', 'gbk', 'utf_16', 'ascii', 'big5', 'big5hkscs',
@@ -51,12 +52,12 @@ def find_codec(blob):
try:
blob[:1024].decode(c)
return c
- except Exception as e:
+ except Exception:
pass
try:
blob.decode(c)
return c
- except Exception as e:
+ except Exception:
pass
return "utf-8"
@@ -241,7 +242,7 @@ def tokenize_chunks(chunks, doc, eng, pdf_parser=None):
d["image"], poss = pdf_parser.crop(ck, need_position=True)
add_positions(d, poss)
ck = pdf_parser.remove_tag(ck)
- except NotImplementedError as e:
+ except NotImplementedError:
pass
tokenize(d, ck, eng)
res.append(d)
@@ -289,13 +290,16 @@ def tokenize_table(tbls, doc, eng, batch_size=10):
def add_positions(d, poss):
if not poss:
return
- d["page_num_int"] = []
- d["position_int"] = []
- d["top_int"] = []
+ page_num_list = []
+ position_list = []
+ top_list = []
for pn, left, right, top, bottom in poss:
- d["page_num_int"].append(int(pn + 1))
- d["top_int"].append(int(top))
- d["position_int"].append((int(pn + 1), int(left), int(right), int(top), int(bottom)))
+ page_num_list.append(int(pn + 1))
+ top_list.append(int(top))
+ position_list.append((int(pn + 1), int(left), int(right), int(top), int(bottom)))
+ d["page_num_list"] = json.dumps(page_num_list)
+ d["position_list"] = json.dumps(position_list)
+ d["top_list"] = json.dumps(top_list)
def remove_contents_table(sections, eng=False):
diff --git a/rag/nlp/query.py b/rag/nlp/query.py
index 30abbc8bd21..e5b55401ffe 100644
--- a/rag/nlp/query.py
+++ b/rag/nlp/query.py
@@ -15,20 +15,25 @@
#
import json
-import math
import re
import logging
-import copy
-from elasticsearch_dsl import Q
+from rag.utils.doc_store_conn import MatchTextExpr
from rag.nlp import rag_tokenizer, term_weight, synonym
-class EsQueryer:
- def __init__(self, es):
+
+class FulltextQueryer:
+ def __init__(self):
self.tw = term_weight.Dealer()
- self.es = es
self.syn = synonym.Dealer()
- self.flds = ["ask_tks^10", "ask_small_tks"]
+ self.query_fields = [
+ "title_tks^10",
+ "title_sm_tks^5",
+ "important_kwd^30",
+ "important_tks^20",
+ "content_ltks^2",
+ "content_sm_ltks",
+ ]
@staticmethod
def subSpecialChar(line):
@@ -43,12 +48,15 @@ def isChinese(line):
for t in arr:
if not re.match(r"[a-zA-Z]+$", t):
e += 1
- return e * 1. / len(arr) >= 0.7
+ return e * 1.0 / len(arr) >= 0.7
@staticmethod
def rmWWW(txt):
patts = [
- (r"是*(什么样的|哪家|一下|那家|请问|啥样|咋样了|什么时候|何时|何地|何人|是否|是不是|多少|哪里|怎么|哪儿|怎么样|如何|哪些|是啥|啥是|啊|吗|呢|吧|咋|什么|有没有|呀)是*", ""),
+ (
+ r"是*(什么样的|哪家|一下|那家|请问|啥样|咋样了|什么时候|何时|何地|何人|是否|是不是|多少|哪里|怎么|哪儿|怎么样|如何|哪些|是啥|啥是|啊|吗|呢|吧|咋|什么|有没有|呀)是*",
+ "",
+ ),
(r"(^| )(what|who|how|which|where|why)('re|'s)? ", " "),
(r"(^| )('s|'re|is|are|were|was|do|does|did|don't|doesn't|didn't|has|have|be|there|you|me|your|my|mine|just|please|may|i|should|would|wouldn't|will|won't|done|go|for|with|so|the|a|an|by|i'm|it's|he's|she's|they|they're|you're|as|by|on|in|at|up|out|down|of) ", " ")
]
@@ -56,16 +64,16 @@ def rmWWW(txt):
txt = re.sub(r, p, txt, flags=re.IGNORECASE)
return txt
- def question(self, txt, tbl="qa", min_match="60%"):
+ def question(self, txt, tbl="qa", min_match:float=0.6):
txt = re.sub(
r"[ :\r\n\t,,。??/`!!&\^%%]+",
" ",
- rag_tokenizer.tradi2simp(
- rag_tokenizer.strQ2B(
- txt.lower()))).strip()
+ rag_tokenizer.tradi2simp(rag_tokenizer.strQ2B(txt.lower())),
+ ).strip()
+ txt = FulltextQueryer.rmWWW(txt)
if not self.isChinese(txt):
- txt = EsQueryer.rmWWW(txt)
+ txt = FulltextQueryer.rmWWW(txt)
tks = rag_tokenizer.tokenize(txt).split(" ")
tks_w = self.tw.weights(tks)
tks_w = [(re.sub(r"[ \\\"'^]", "", tk), w) for tk, w in tks_w]
@@ -73,14 +81,20 @@ def question(self, txt, tbl="qa", min_match="60%"):
tks_w = [(re.sub(r"^[\+-]", "", tk), w) for tk, w in tks_w if tk]
q = ["{}^{:.4f}".format(tk, w) for tk, w in tks_w if tk]
for i in range(1, len(tks_w)):
- q.append("\"%s %s\"^%.4f" % (tks_w[i - 1][0], tks_w[i][0], max(tks_w[i - 1][1], tks_w[i][1])*2))
+ q.append(
+ '"%s %s"^%.4f'
+ % (
+ tks_w[i - 1][0],
+ tks_w[i][0],
+ max(tks_w[i - 1][1], tks_w[i][1]) * 2,
+ )
+ )
if not q:
q.append(txt)
- return Q("bool",
- must=Q("query_string", fields=self.flds,
- type="best_fields", query=" ".join(q),
- boost=1)#, minimum_should_match=min_match)
- ), list(set([t for t in txt.split(" ") if t]))
+ query = " ".join(q)
+ return MatchTextExpr(
+ self.query_fields, query, 100
+ ), tks
def need_fine_grained_tokenize(tk):
if len(tk) < 3:
@@ -89,7 +103,7 @@ def need_fine_grained_tokenize(tk):
return False
return True
- txt = EsQueryer.rmWWW(txt)
+ txt = FulltextQueryer.rmWWW(txt)
qs, keywords = [], []
for tt in self.tw.split(txt)[:256]: # .split(" "):
if not tt:
@@ -101,65 +115,71 @@ def need_fine_grained_tokenize(tk):
logging.info(json.dumps(twts, ensure_ascii=False))
tms = []
for tk, w in sorted(twts, key=lambda x: x[1] * -1):
- sm = rag_tokenizer.fine_grained_tokenize(tk).split(" ") if need_fine_grained_tokenize(tk) else []
+ sm = (
+ rag_tokenizer.fine_grained_tokenize(tk).split(" ")
+ if need_fine_grained_tokenize(tk)
+ else []
+ )
sm = [
re.sub(
r"[ ,\./;'\[\]\\`~!@#$%\^&\*\(\)=\+_<>\?:\"\{\}\|,。;‘’【】、!¥……()——《》?:“”-]+",
"",
- m) for m in sm]
- sm = [EsQueryer.subSpecialChar(m) for m in sm if len(m) > 1]
+ m,
+ )
+ for m in sm
+ ]
+ sm = [FulltextQueryer.subSpecialChar(m) for m in sm if len(m) > 1]
sm = [m for m in sm if len(m) > 1]
keywords.append(re.sub(r"[ \\\"']+", "", tk))
keywords.extend(sm)
- if len(keywords) >= 12: break
+ if len(keywords) >= 12:
+ break
tk_syns = self.syn.lookup(tk)
- tk = EsQueryer.subSpecialChar(tk)
+ tk = FulltextQueryer.subSpecialChar(tk)
if tk.find(" ") > 0:
- tk = "\"%s\"" % tk
+ tk = '"%s"' % tk
if tk_syns:
tk = f"({tk} %s)" % " ".join(tk_syns)
if sm:
- tk = f"{tk} OR \"%s\" OR (\"%s\"~2)^0.5" % (
- " ".join(sm), " ".join(sm))
+ tk = f'{tk} OR "%s" OR ("%s"~2)^0.5' % (" ".join(sm), " ".join(sm))
if tk.strip():
tms.append((tk, w))
tms = " ".join([f"({t})^{w}" for t, w in tms])
if len(twts) > 1:
- tms += f" (\"%s\"~4)^1.5" % (" ".join([t for t, _ in twts]))
+ tms += ' ("%s"~4)^1.5' % (" ".join([t for t, _ in twts]))
if re.match(r"[0-9a-z ]+$", tt):
- tms = f"(\"{tt}\" OR \"%s\")" % rag_tokenizer.tokenize(tt)
+ tms = f'("{tt}" OR "%s")' % rag_tokenizer.tokenize(tt)
syns = " OR ".join(
- ["\"%s\"^0.7" % EsQueryer.subSpecialChar(rag_tokenizer.tokenize(s)) for s in syns])
+ [
+ '"%s"^0.7'
+ % FulltextQueryer.subSpecialChar(rag_tokenizer.tokenize(s))
+ for s in syns
+ ]
+ )
if syns:
tms = f"({tms})^5 OR ({syns})^0.7"
qs.append(tms)
- flds = copy.deepcopy(self.flds)
- mst = []
if qs:
- mst.append(
- Q("query_string", fields=flds, type="best_fields",
- query=" OR ".join([f"({t})" for t in qs if t]), boost=1, minimum_should_match=min_match)
- )
-
- return Q("bool",
- must=mst,
- ), list(set(keywords))
+ query = " OR ".join([f"({t})" for t in qs if t])
+ return MatchTextExpr(
+ self.query_fields, query, 100, {"minimum_should_match": min_match}
+ ), keywords
+ return None, keywords
- def hybrid_similarity(self, avec, bvecs, atks, btkss, tkweight=0.3,
- vtweight=0.7):
+ def hybrid_similarity(self, avec, bvecs, atks, btkss, tkweight=0.3, vtweight=0.7):
from sklearn.metrics.pairwise import cosine_similarity as CosineSimilarity
import numpy as np
+
sims = CosineSimilarity([avec], bvecs)
tksim = self.token_similarity(atks, btkss)
- return np.array(sims[0]) * vtweight + \
- np.array(tksim) * tkweight, tksim, sims[0]
+ return np.array(sims[0]) * vtweight + np.array(tksim) * tkweight, tksim, sims[0]
def token_similarity(self, atks, btkss):
def toDict(tks):
diff --git a/rag/nlp/search.py b/rag/nlp/search.py
index 8bdc96393e0..6737f22e295 100644
--- a/rag/nlp/search.py
+++ b/rag/nlp/search.py
@@ -14,34 +14,25 @@
# limitations under the License.
#
-import json
import re
-from copy import deepcopy
-
-from elasticsearch_dsl import Q, Search
+import json
from typing import List, Optional, Dict, Union
from dataclasses import dataclass
-from rag.settings import es_logger
+from rag.settings import doc_store_logger
from rag.utils import rmSpace
-from rag.nlp import rag_tokenizer, query, is_english
+from rag.nlp import rag_tokenizer, query
import numpy as np
+from rag.utils.doc_store_conn import DocStoreConnection, MatchDenseExpr, FusionExpr, OrderByExpr
def index_name(uid): return f"ragflow_{uid}"
class Dealer:
- def __init__(self, es):
- self.qryr = query.EsQueryer(es)
- self.qryr.flds = [
- "title_tks^10",
- "title_sm_tks^5",
- "important_kwd^30",
- "important_tks^20",
- "content_ltks^2",
- "content_sm_ltks"]
- self.es = es
+ def __init__(self, dataStore: DocStoreConnection):
+ self.qryr = query.FulltextQueryer()
+ self.dataStore = dataStore
@dataclass
class SearchResult:
@@ -54,170 +45,99 @@ class SearchResult:
keywords: Optional[List[str]] = None
group_docs: List[List] = None
- def _vector(self, txt, emb_mdl, sim=0.8, topk=10):
- qv, c = emb_mdl.encode_queries(txt)
- return {
- "field": "q_%d_vec" % len(qv),
- "k": topk,
- "similarity": sim,
- "num_candidates": topk * 2,
- "query_vector": [float(v) for v in qv]
- }
-
- def _add_filters(self, bqry, req):
- if req.get("kb_ids"):
- bqry.filter.append(Q("terms", kb_id=req["kb_ids"]))
- if req.get("doc_ids"):
- bqry.filter.append(Q("terms", doc_id=req["doc_ids"]))
- if req.get("knowledge_graph_kwd"):
- bqry.filter.append(Q("terms", knowledge_graph_kwd=req["knowledge_graph_kwd"]))
- if "available_int" in req:
- if req["available_int"] == 0:
- bqry.filter.append(Q("range", available_int={"lt": 1}))
- else:
- bqry.filter.append(
- Q("bool", must_not=Q("range", available_int={"lt": 1})))
- return bqry
-
- def search(self, req, idxnms, emb_mdl=None, highlight=False):
- qst = req.get("question", "")
- bqry, keywords = self.qryr.question(qst, min_match="30%")
- bqry = self._add_filters(bqry, req)
- bqry.boost = 0.05
+ def get_vector(self, txt, emb_mdl, topk=10, similarity=0.1):
+ qv, _ = emb_mdl.encode_queries(txt)
+ embedding_data = [float(v) for v in qv]
+ vector_column_name = f"q_{len(embedding_data)}_vec"
+ return MatchDenseExpr(vector_column_name, embedding_data, 'float', 'cosine', topk, {"similarity": similarity})
+
+ def get_filters(self, req):
+ condition = dict()
+ for key, field in {"kb_ids": "kb_id", "doc_ids": "doc_id"}.items():
+ if key in req and req[key] is not None:
+ condition[field] = req[key]
+ # TODO(yzc): `available_int` is nullable however infinity doesn't support nullable columns.
+ for key in ["knowledge_graph_kwd"]:
+ if key in req and req[key] is not None:
+ condition[key] = req[key]
+ return condition
+
+ def search(self, req, idx_names: list[str], kb_ids: list[str], emb_mdl=None, highlight = False):
+ filters = self.get_filters(req)
+ orderBy = OrderByExpr()
- s = Search()
pg = int(req.get("page", 1)) - 1
topk = int(req.get("topk", 1024))
ps = int(req.get("size", topk))
+ offset, limit = pg * ps, (pg + 1) * ps
+
src = req.get("fields", ["docnm_kwd", "content_ltks", "kb_id", "img_id", "title_tks", "important_kwd",
- "image_id", "doc_id", "q_512_vec", "q_768_vec", "position_int", "knowledge_graph_kwd",
- "q_1024_vec", "q_1536_vec", "available_int", "content_with_weight"])
+ "doc_id", "position_list", "knowledge_graph_kwd",
+ "available_int", "content_with_weight"])
+ kwds = set([])
- s = s.query(bqry)[pg * ps:(pg + 1) * ps]
- s = s.highlight("content_ltks")
- s = s.highlight("title_ltks")
+ qst = req.get("question", "")
+ q_vec = []
if not qst:
- if not req.get("sort"):
- s = s.sort(
- #{"create_time": {"order": "desc", "unmapped_type": "date"}},
- {"create_timestamp_flt": {
- "order": "desc", "unmapped_type": "float"}}
- )
+ if req.get("sort"):
+ orderBy.desc("create_timestamp_flt")
+ res = self.dataStore.search(src, [], filters, [], orderBy, offset, limit, idx_names, kb_ids)
+ total=self.dataStore.getTotal(res)
+ doc_store_logger.info("Dealer.search TOTAL: {}".format(total))
+ else:
+ highlightFields = ["content_ltks", "title_tks"] if highlight else []
+ matchText, keywords = self.qryr.question(qst, min_match=0.3)
+ if emb_mdl is None:
+ matchExprs = [matchText]
+ res = self.dataStore.search(src, highlightFields, filters, matchExprs, orderBy, offset, limit, idx_names, kb_ids)
+ total=self.dataStore.getTotal(res)
+ doc_store_logger.info("Dealer.search TOTAL: {}".format(total))
else:
- s = s.sort(
- {"page_num_int": {"order": "asc", "unmapped_type": "float",
- "mode": "avg", "numeric_type": "double"}},
- {"top_int": {"order": "asc", "unmapped_type": "float",
- "mode": "avg", "numeric_type": "double"}},
- #{"create_time": {"order": "desc", "unmapped_type": "date"}},
- {"create_timestamp_flt": {
- "order": "desc", "unmapped_type": "float"}}
- )
-
- if qst:
- s = s.highlight_options(
- fragment_size=120,
- number_of_fragments=5,
- boundary_scanner_locale="zh-CN",
- boundary_scanner="SENTENCE",
- boundary_chars=",./;:\\!(),。?:!……()——、"
- )
- s = s.to_dict()
- q_vec = []
- if req.get("vector"):
- assert emb_mdl, "No embedding model selected"
- s["knn"] = self._vector(
- qst, emb_mdl, req.get(
- "similarity", 0.1), topk)
- s["knn"]["filter"] = bqry.to_dict()
- if not highlight and "highlight" in s:
- del s["highlight"]
- q_vec = s["knn"]["query_vector"]
- es_logger.info("【Q】: {}".format(json.dumps(s)))
- res = self.es.search(deepcopy(s), idxnms=idxnms, timeout="600s", src=src)
- es_logger.info("TOTAL: {}".format(self.es.getTotal(res)))
- if self.es.getTotal(res) == 0 and "knn" in s:
- bqry, _ = self.qryr.question(qst, min_match="10%")
- if req.get("doc_ids"):
- bqry = Q("bool", must=[])
- bqry = self._add_filters(bqry, req)
- s["query"] = bqry.to_dict()
- s["knn"]["filter"] = bqry.to_dict()
- s["knn"]["similarity"] = 0.17
- res = self.es.search(s, idxnms=idxnms, timeout="600s", src=src)
- es_logger.info("【Q】: {}".format(json.dumps(s)))
-
- kwds = set([])
- for k in keywords:
- kwds.add(k)
- for kk in rag_tokenizer.fine_grained_tokenize(k).split(" "):
- if len(kk) < 2:
- continue
- if kk in kwds:
- continue
- kwds.add(kk)
-
- aggs = self.getAggregation(res, "docnm_kwd")
-
+ matchDense = self.get_vector(qst, emb_mdl, topk, req.get("similarity", 0.1))
+ q_vec = matchDense.embedding_data
+ src.append(f"q_{len(q_vec)}_vec")
+
+ fusionExpr = FusionExpr("weighted_sum", topk, {"weights": "0.05, 0.95"})
+ matchExprs = [matchText, matchDense, fusionExpr]
+
+ res = self.dataStore.search(src, highlightFields, filters, matchExprs, orderBy, offset, limit, idx_names, kb_ids)
+ total=self.dataStore.getTotal(res)
+ doc_store_logger.info("Dealer.search TOTAL: {}".format(total))
+
+ # If result is empty, try again with lower min_match
+ if total == 0:
+ matchText, _ = self.qryr.question(qst, min_match=0.1)
+ if "doc_ids" in filters:
+ del filters["doc_ids"]
+ matchDense.extra_options["similarity"] = 0.17
+ res = self.dataStore.search(src, highlightFields, filters, [matchText, matchDense, fusionExpr], orderBy, offset, limit, idx_names, kb_ids)
+ total=self.dataStore.getTotal(res)
+ doc_store_logger.info("Dealer.search 2 TOTAL: {}".format(total))
+
+ for k in keywords:
+ kwds.add(k)
+ for kk in rag_tokenizer.fine_grained_tokenize(k).split(" "):
+ if len(kk) < 2:
+ continue
+ if kk in kwds:
+ continue
+ kwds.add(kk)
+
+ doc_store_logger.info(f"TOTAL: {total}")
+ ids=self.dataStore.getChunkIds(res)
+ keywords=list(kwds)
+ highlight = self.dataStore.getHighlight(res, keywords, "content_with_weight")
+ aggs = self.dataStore.getAggregation(res, "docnm_kwd")
return self.SearchResult(
- total=self.es.getTotal(res),
- ids=self.es.getDocIds(res),
+ total=total,
+ ids=ids,
query_vector=q_vec,
aggregation=aggs,
- highlight=self.getHighlight(res, keywords, "content_with_weight"),
- field=self.getFields(res, src),
- keywords=list(kwds)
+ highlight=highlight,
+ field=self.dataStore.getFields(res, src),
+ keywords=keywords
)
- def getAggregation(self, res, g):
- if not "aggregations" in res or "aggs_" + g not in res["aggregations"]:
- return
- bkts = res["aggregations"]["aggs_" + g]["buckets"]
- return [(b["key"], b["doc_count"]) for b in bkts]
-
- def getHighlight(self, res, keywords, fieldnm):
- ans = {}
- for d in res["hits"]["hits"]:
- hlts = d.get("highlight")
- if not hlts:
- continue
- txt = "...".join([a for a in list(hlts.items())[0][1]])
- if not is_english(txt.split(" ")):
- ans[d["_id"]] = txt
- continue
-
- txt = d["_source"][fieldnm]
- txt = re.sub(r"[\r\n]", " ", txt, flags=re.IGNORECASE|re.MULTILINE)
- txts = []
- for t in re.split(r"[.?!;\n]", txt):
- for w in keywords:
- t = re.sub(r"(^|[ .?/'\"\(\)!,:;-])(%s)([ .?/'\"\(\)!,:;-])"%re.escape(w), r"\1\2\3", t, flags=re.IGNORECASE|re.MULTILINE)
- if not re.search(r"[^<>]+", t, flags=re.IGNORECASE|re.MULTILINE): continue
- txts.append(t)
- ans[d["_id"]] = "...".join(txts) if txts else "...".join([a for a in list(hlts.items())[0][1]])
-
- return ans
-
- def getFields(self, sres, flds):
- res = {}
- if not flds:
- return {}
- for d in self.es.getSource(sres):
- m = {n: d.get(n) for n in flds if d.get(n) is not None}
- for n, v in m.items():
- if isinstance(v, type([])):
- m[n] = "\t".join([str(vv) if not isinstance(
- vv, list) else "\t".join([str(vvv) for vvv in vv]) for vv in v])
- continue
- if not isinstance(v, type("")):
- m[n] = str(m[n])
- #if n.find("tks") > 0:
- # m[n] = rmSpace(m[n])
-
- if m:
- res[d["id"]] = m
- return res
-
@staticmethod
def trans2floats(txt):
return [float(t) for t in txt.split("\t")]
@@ -260,7 +180,7 @@ def insert_citations(self, answer, chunks, chunk_v,
continue
idx.append(i)
pieces_.append(t)
- es_logger.info("{} => {}".format(answer, pieces_))
+ doc_store_logger.info("{} => {}".format(answer, pieces_))
if not pieces_:
return answer, set([])
@@ -281,7 +201,7 @@ def insert_citations(self, answer, chunks, chunk_v,
chunks_tks,
tkweight, vtweight)
mx = np.max(sim) * 0.99
- es_logger.info("{} SIM: {}".format(pieces_[i], mx))
+ doc_store_logger.info("{} SIM: {}".format(pieces_[i], mx))
if mx < thr:
continue
cites[idx[i]] = list(
@@ -309,9 +229,15 @@ def insert_citations(self, answer, chunks, chunk_v,
def rerank(self, sres, query, tkweight=0.3,
vtweight=0.7, cfield="content_ltks"):
_, keywords = self.qryr.question(query)
- ins_embd = [
- Dealer.trans2floats(
- sres.field[i].get("q_%d_vec" % len(sres.query_vector), "\t".join(["0"] * len(sres.query_vector)))) for i in sres.ids]
+ vector_size = len(sres.query_vector)
+ vector_column = f"q_{vector_size}_vec"
+ zero_vector = [0.0] * vector_size
+ ins_embd = []
+ for chunk_id in sres.ids:
+ vector = sres.field[chunk_id].get(vector_column, zero_vector)
+ if isinstance(vector, str):
+ vector = [float(v) for v in vector.split("\t")]
+ ins_embd.append(vector)
if not ins_embd:
return [], [], []
@@ -377,7 +303,7 @@ def retrieval(self, question, embd_mdl, tenant_ids, kb_ids, page, page_size, sim
if isinstance(tenant_ids, str):
tenant_ids = tenant_ids.split(",")
- sres = self.search(req, [index_name(tid) for tid in tenant_ids], embd_mdl, highlight)
+ sres = self.search(req, [index_name(tid) for tid in tenant_ids], kb_ids, embd_mdl, highlight)
ranks["total"] = sres.total
if page <= RERANK_PAGE_LIMIT:
@@ -393,6 +319,8 @@ def retrieval(self, question, embd_mdl, tenant_ids, kb_ids, page, page_size, sim
idx = list(range(len(sres.ids)))
dim = len(sres.query_vector)
+ vector_column = f"q_{dim}_vec"
+ zero_vector = [0.0] * dim
for i in idx:
if sim[i] < similarity_threshold:
break
@@ -401,34 +329,32 @@ def retrieval(self, question, embd_mdl, tenant_ids, kb_ids, page, page_size, sim
continue
break
id = sres.ids[i]
- dnm = sres.field[id]["docnm_kwd"]
- did = sres.field[id]["doc_id"]
+ chunk = sres.field[id]
+ dnm = chunk["docnm_kwd"]
+ did = chunk["doc_id"]
+ position_list = chunk.get("position_list", "[]")
+ if not position_list:
+ position_list = "[]"
d = {
"chunk_id": id,
- "content_ltks": sres.field[id]["content_ltks"],
- "content_with_weight": sres.field[id]["content_with_weight"],
- "doc_id": sres.field[id]["doc_id"],
+ "content_ltks": chunk["content_ltks"],
+ "content_with_weight": chunk["content_with_weight"],
+ "doc_id": chunk["doc_id"],
"docnm_kwd": dnm,
- "kb_id": sres.field[id]["kb_id"],
- "important_kwd": sres.field[id].get("important_kwd", []),
- "img_id": sres.field[id].get("img_id", ""),
+ "kb_id": chunk["kb_id"],
+ "important_kwd": chunk.get("important_kwd", []),
+ "image_id": chunk.get("img_id", ""),
"similarity": sim[i],
"vector_similarity": vsim[i],
"term_similarity": tsim[i],
- "vector": self.trans2floats(sres.field[id].get("q_%d_vec" % dim, "\t".join(["0"] * dim))),
- "positions": sres.field[id].get("position_int", "").split("\t")
+ "vector": chunk.get(vector_column, zero_vector),
+ "positions": json.loads(position_list)
}
if highlight:
if id in sres.highlight:
d["highlight"] = rmSpace(sres.highlight[id])
else:
d["highlight"] = d["content_with_weight"]
- if len(d["positions"]) % 5 == 0:
- poss = []
- for i in range(0, len(d["positions"]), 5):
- poss.append([float(d["positions"][i]), float(d["positions"][i + 1]), float(d["positions"][i + 2]),
- float(d["positions"][i + 3]), float(d["positions"][i + 4])])
- d["positions"] = poss
ranks["chunks"].append(d)
if dnm not in ranks["doc_aggs"]:
ranks["doc_aggs"][dnm] = {"doc_id": did, "count": 0}
@@ -442,39 +368,11 @@ def retrieval(self, question, embd_mdl, tenant_ids, kb_ids, page, page_size, sim
return ranks
def sql_retrieval(self, sql, fetch_size=128, format="json"):
- from api.settings import chat_logger
- sql = re.sub(r"[ `]+", " ", sql)
- sql = sql.replace("%", "")
- es_logger.info(f"Get es sql: {sql}")
- replaces = []
- for r in re.finditer(r" ([a-z_]+_l?tks)( like | ?= ?)'([^']+)'", sql):
- fld, v = r.group(1), r.group(3)
- match = " MATCH({}, '{}', 'operator=OR;minimum_should_match=30%') ".format(
- fld, rag_tokenizer.fine_grained_tokenize(rag_tokenizer.tokenize(v)))
- replaces.append(
- ("{}{}'{}'".format(
- r.group(1),
- r.group(2),
- r.group(3)),
- match))
-
- for p, r in replaces:
- sql = sql.replace(p, r, 1)
- chat_logger.info(f"To es: {sql}")
-
- try:
- tbl = self.es.sql(sql, fetch_size, format)
- return tbl
- except Exception as e:
- chat_logger.error(f"SQL failure: {sql} =>" + str(e))
- return {"error": str(e)}
-
- def chunk_list(self, doc_id, tenant_id, max_count=1024, fields=["docnm_kwd", "content_with_weight", "img_id"]):
- s = Search()
- s = s.query(Q("match", doc_id=doc_id))[0:max_count]
- s = s.to_dict()
- es_res = self.es.search(s, idxnms=index_name(tenant_id), timeout="600s", src=fields)
- res = []
- for index, chunk in enumerate(es_res['hits']['hits']):
- res.append({fld: chunk['_source'].get(fld) for fld in fields})
- return res
+ tbl = self.dataStore.sql(sql, fetch_size, format)
+ return tbl
+
+ def chunk_list(self, doc_id: str, tenant_id: str, kb_ids: list[str], max_count=1024, fields=["docnm_kwd", "content_with_weight", "img_id"]):
+ condition = {"doc_id": doc_id}
+ res = self.dataStore.search(fields, [], condition, [], OrderByExpr(), 0, max_count, index_name(tenant_id), kb_ids)
+ dict_chunks = self.dataStore.getFields(res, fields)
+ return dict_chunks.values()
diff --git a/rag/settings.py b/rag/settings.py
index 8c88c4067b1..74165822fed 100644
--- a/rag/settings.py
+++ b/rag/settings.py
@@ -25,12 +25,13 @@
SUBPROCESS_STD_LOG_NAME = "std.log"
ES = get_base_config("es", {})
+INFINITY = get_base_config("infinity", {"uri": "infinity:23817"})
AZURE = get_base_config("azure", {})
S3 = get_base_config("s3", {})
MINIO = decrypt_database_config(name="minio")
try:
REDIS = decrypt_database_config(name="redis")
-except Exception as e:
+except Exception:
REDIS = {}
pass
DOC_MAXIMUM_SIZE = int(os.environ.get("MAX_CONTENT_LENGTH", 128 * 1024 * 1024))
@@ -44,7 +45,7 @@
# {CRITICAL: 50, FATAL:50, ERROR:40, WARNING:30, WARN:30, INFO:20, DEBUG:10, NOTSET:0}
LoggerFactory.LEVEL = 30
-es_logger = getLogger("es")
+doc_store_logger = getLogger("doc_store")
minio_logger = getLogger("minio")
s3_logger = getLogger("s3")
azure_logger = getLogger("azure")
@@ -53,7 +54,7 @@
database_logger = getLogger("database")
formatter = logging.Formatter("%(asctime)-15s %(levelname)-8s (%(process)d) %(message)s")
-for logger in [es_logger, minio_logger, s3_logger, azure_logger, cron_logger, chunk_logger, database_logger]:
+for logger in [doc_store_logger, minio_logger, s3_logger, azure_logger, cron_logger, chunk_logger, database_logger]:
logger.setLevel(logging.INFO)
for handler in logger.handlers:
handler.setFormatter(fmt=formatter)
diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py
index 5e65f7c840f..15b5aaf33b7 100644
--- a/rag/svr/task_executor.py
+++ b/rag/svr/task_executor.py
@@ -31,7 +31,6 @@
import numpy as np
import pandas as pd
-from elasticsearch_dsl import Q
from api.db import LLMType, ParserType
from api.db.services.dialog_service import keyword_extraction, question_proposal
@@ -39,8 +38,7 @@
from api.db.services.llm_service import LLMBundle
from api.db.services.task_service import TaskService
from api.db.services.file2document_service import File2DocumentService
-from api.settings import retrievaler
-from api.utils.file_utils import get_project_base_directory
+from api.settings import retrievaler, docStoreConn
from api.db.db_models import close_connection
from rag.app import laws, paper, presentation, manual, qa, table, book, resume, picture, naive, one, audio, knowledge_graph, email
from rag.nlp import search, rag_tokenizer
@@ -48,7 +46,6 @@
from rag.settings import database_logger, SVR_QUEUE_NAME
from rag.settings import cron_logger, DOC_MAXIMUM_SIZE
from rag.utils import rmSpace, num_tokens_from_string
-from rag.utils.es_conn import ELASTICSEARCH
from rag.utils.redis_conn import REDIS_CONN, Payload
from rag.utils.storage_factory import STORAGE_IMPL
@@ -126,7 +123,7 @@ def collect():
return pd.DataFrame()
tasks = TaskService.get_tasks(msg["id"])
if not tasks:
- cron_logger.warn("{} empty task!".format(msg["id"]))
+ cron_logger.warning("{} empty task!".format(msg["id"]))
return []
tasks = pd.DataFrame(tasks)
@@ -187,7 +184,7 @@ def build(row):
docs = []
doc = {
"doc_id": row["doc_id"],
- "kb_id": [str(row["kb_id"])]
+ "kb_id": str(row["kb_id"])
}
el = 0
for ck in cks:
@@ -196,10 +193,14 @@ def build(row):
md5 = hashlib.md5()
md5.update((ck["content_with_weight"] +
str(d["doc_id"])).encode("utf-8"))
- d["_id"] = md5.hexdigest()
+ d["id"] = md5.hexdigest()
d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19]
d["create_timestamp_flt"] = datetime.datetime.now().timestamp()
if not d.get("image"):
+ d["img_id"] = ""
+ d["page_num_list"] = json.dumps([])
+ d["position_list"] = json.dumps([])
+ d["top_list"] = json.dumps([])
docs.append(d)
continue
@@ -211,13 +212,13 @@ def build(row):
d["image"].save(output_buffer, format='JPEG')
st = timer()
- STORAGE_IMPL.put(row["kb_id"], d["_id"], output_buffer.getvalue())
+ STORAGE_IMPL.put(row["kb_id"], d["id"], output_buffer.getvalue())
el += timer() - st
except Exception as e:
cron_logger.error(str(e))
traceback.print_exc()
- d["img_id"] = "{}-{}".format(row["kb_id"], d["_id"])
+ d["img_id"] = "{}-{}".format(row["kb_id"], d["id"])
del d["image"]
docs.append(d)
cron_logger.info("MINIO PUT({}):{}".format(row["name"], el))
@@ -245,12 +246,9 @@ def build(row):
return docs
-def init_kb(row):
+def init_kb(row, vector_size: int):
idxnm = search.index_name(row["tenant_id"])
- if ELASTICSEARCH.indexExist(idxnm):
- return
- return ELASTICSEARCH.createIdx(idxnm, json.load(
- open(os.path.join(get_project_base_directory(), "conf", "mapping.json"), "r")))
+ return docStoreConn.createIdx(idxnm, row["kb_id"], vector_size)
def embedding(docs, mdl, parser_config=None, callback=None):
@@ -288,17 +286,20 @@ def embedding(docs, mdl, parser_config=None, callback=None):
cnts) if len(tts) == len(cnts) else cnts
assert len(vects) == len(docs)
+ vector_size = 0
for i, d in enumerate(docs):
v = vects[i].tolist()
+ vector_size = len(v)
d["q_%d_vec" % len(v)] = v
- return tk_count
+ return tk_count, vector_size
def run_raptor(row, chat_mdl, embd_mdl, callback=None):
vts, _ = embd_mdl.encode(["ok"])
- vctr_nm = "q_%d_vec" % len(vts[0])
+ vector_size = len(vts[0])
+ vctr_nm = "q_%d_vec" % vector_size
chunks = []
- for d in retrievaler.chunk_list(row["doc_id"], row["tenant_id"], fields=["content_with_weight", vctr_nm]):
+ for d in retrievaler.chunk_list(row["doc_id"], row["tenant_id"], [str(row["kb_id"])], fields=["content_with_weight", vctr_nm]):
chunks.append((d["content_with_weight"], np.array(d[vctr_nm])))
raptor = Raptor(
@@ -323,7 +324,7 @@ def run_raptor(row, chat_mdl, embd_mdl, callback=None):
d = copy.deepcopy(doc)
md5 = hashlib.md5()
md5.update((content + str(d["doc_id"])).encode("utf-8"))
- d["_id"] = md5.hexdigest()
+ d["id"] = md5.hexdigest()
d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19]
d["create_timestamp_flt"] = datetime.datetime.now().timestamp()
d[vctr_nm] = vctr.tolist()
@@ -332,7 +333,7 @@ def run_raptor(row, chat_mdl, embd_mdl, callback=None):
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
res.append(d)
tk_count += num_tokens_from_string(content)
- return res, tk_count
+ return res, tk_count, vector_size
def main():
@@ -352,7 +353,7 @@ def main():
if r.get("task_type", "") == "raptor":
try:
chat_mdl = LLMBundle(r["tenant_id"], LLMType.CHAT, llm_name=r["llm_id"], lang=r["language"])
- cks, tk_count = run_raptor(r, chat_mdl, embd_mdl, callback)
+ cks, tk_count, vector_size = run_raptor(r, chat_mdl, embd_mdl, callback)
except Exception as e:
callback(-1, msg=str(e))
cron_logger.error(str(e))
@@ -373,7 +374,7 @@ def main():
len(cks))
st = timer()
try:
- tk_count = embedding(cks, embd_mdl, r["parser_config"], callback)
+ tk_count, vector_size = embedding(cks, embd_mdl, r["parser_config"], callback)
except Exception as e:
callback(-1, "Embedding error:{}".format(str(e)))
cron_logger.error(str(e))
@@ -381,26 +382,25 @@ def main():
cron_logger.info("Embedding elapsed({}): {:.2f}".format(r["name"], timer() - st))
callback(msg="Finished embedding({:.2f})! Start to build index!".format(timer() - st))
- init_kb(r)
- chunk_count = len(set([c["_id"] for c in cks]))
+ # cron_logger.info(f"task_executor init_kb index {search.index_name(r["tenant_id"])} embd_mdl {embd_mdl.llm_name} vector length {vector_size}")
+ init_kb(r, vector_size)
+ chunk_count = len(set([c["id"] for c in cks]))
st = timer()
es_r = ""
es_bulk_size = 4
for b in range(0, len(cks), es_bulk_size):
- es_r = ELASTICSEARCH.bulk(cks[b:b + es_bulk_size], search.index_name(r["tenant_id"]))
+ es_r = docStoreConn.insert(cks[b:b + es_bulk_size], search.index_name(r["tenant_id"]), r["kb_id"])
if b % 128 == 0:
callback(prog=0.8 + 0.1 * (b + 1) / len(cks), msg="")
cron_logger.info("Indexing elapsed({}): {:.2f}".format(r["name"], timer() - st))
if es_r:
callback(-1, "Insert chunk error, detail info please check ragflow-logs/api/cron_logger.log. Please also check ES status!")
- ELASTICSEARCH.deleteByQuery(
- Q("match", doc_id=r["doc_id"]), idxnm=search.index_name(r["tenant_id"]))
- cron_logger.error(str(es_r))
+ docStoreConn.delete({"doc_id": r["doc_id"]}, search.index_name(r["tenant_id"]), r["kb_id"])
+ cron_logger.error('Insert chunk error: ' + str(es_r))
else:
if TaskService.do_cancel(r["id"]):
- ELASTICSEARCH.deleteByQuery(
- Q("match", doc_id=r["doc_id"]), idxnm=search.index_name(r["tenant_id"]))
+ docStoreConn.delete({"doc_id": r["doc_id"]}, search.index_name(r["tenant_id"]), r["kb_id"])
continue
callback(1., "Done!")
DocumentService.increment_chunk_num(
diff --git a/rag/utils/doc_store_conn.py b/rag/utils/doc_store_conn.py
new file mode 100644
index 00000000000..b550e5c1a9e
--- /dev/null
+++ b/rag/utils/doc_store_conn.py
@@ -0,0 +1,251 @@
+from abc import ABC, abstractmethod
+from typing import Optional, Union
+from dataclasses import dataclass
+import numpy as np
+import polars as pl
+from typing import List, Dict
+
+DEFAULT_MATCH_VECTOR_TOPN = 10
+DEFAULT_MATCH_SPARSE_TOPN = 10
+VEC = Union[list, np.ndarray]
+
+
+@dataclass
+class SparseVector:
+ indices: list[int]
+ values: Union[list[float], list[int], None] = None
+
+ def __post_init__(self):
+ assert (self.values is None) or (len(self.indices) == len(self.values))
+
+ def to_dict_old(self):
+ d = {"indices": self.indices}
+ if self.values is not None:
+ d["values"] = self.values
+ return d
+
+ def to_dict(self):
+ if self.values is None:
+ raise ValueError("SparseVector.values is None")
+ result = {}
+ for i, v in zip(self.indices, self.values):
+ result[str(i)] = v
+ return result
+
+ @staticmethod
+ def from_dict(d):
+ return SparseVector(d["indices"], d.get("values"))
+
+ def __str__(self):
+ return f"SparseVector(indices={self.indices}{'' if self.values is None else f', values={self.values}'})"
+
+ def __repr__(self):
+ return str(self)
+
+
+class MatchTextExpr(ABC):
+ def __init__(
+ self,
+ fields: str,
+ matching_text: str,
+ topn: int,
+ extra_options: dict = dict(),
+ ):
+ self.fields = fields
+ self.matching_text = matching_text
+ self.topn = topn
+ self.extra_options = extra_options
+
+
+class MatchDenseExpr(ABC):
+ def __init__(
+ self,
+ vector_column_name: str,
+ embedding_data: VEC,
+ embedding_data_type: str,
+ distance_type: str,
+ topn: int = DEFAULT_MATCH_VECTOR_TOPN,
+ extra_options: dict = dict(),
+ ):
+ self.vector_column_name = vector_column_name
+ self.embedding_data = embedding_data
+ self.embedding_data_type = embedding_data_type
+ self.distance_type = distance_type
+ self.topn = topn
+ self.extra_options = extra_options
+
+
+class MatchSparseExpr(ABC):
+ def __init__(
+ self,
+ vector_column_name: str,
+ sparse_data: SparseVector | dict,
+ distance_type: str,
+ topn: int,
+ opt_params: Optional[dict] = None,
+ ):
+ self.vector_column_name = vector_column_name
+ self.sparse_data = sparse_data
+ self.distance_type = distance_type
+ self.topn = topn
+ self.opt_params = opt_params
+
+
+class MatchTensorExpr(ABC):
+ def __init__(
+ self,
+ column_name: str,
+ query_data: VEC,
+ query_data_type: str,
+ topn: int,
+ extra_option: Optional[dict] = None,
+ ):
+ self.column_name = column_name
+ self.query_data = query_data
+ self.query_data_type = query_data_type
+ self.topn = topn
+ self.extra_option = extra_option
+
+
+class FusionExpr(ABC):
+ def __init__(self, method: str, topn: int, fusion_params: Optional[dict] = None):
+ self.method = method
+ self.topn = topn
+ self.fusion_params = fusion_params
+
+
+MatchExpr = Union[
+ MatchTextExpr, MatchDenseExpr, MatchSparseExpr, MatchTensorExpr, FusionExpr
+]
+
+
+class OrderByExpr(ABC):
+ def __init__(self):
+ self.fields = list()
+ def asc(self, field: str):
+ self.fields.append((field, 0))
+ return self
+ def desc(self, field: str):
+ self.fields.append((field, 1))
+ return self
+ def fields(self):
+ return self.fields
+
+class DocStoreConnection(ABC):
+ """
+ Database operations
+ """
+
+ @abstractmethod
+ def dbType(self) -> str:
+ """
+ Return the type of the database.
+ """
+ raise NotImplementedError("Not implemented")
+
+ @abstractmethod
+ def health(self) -> dict:
+ """
+ Return the health status of the database.
+ """
+ raise NotImplementedError("Not implemented")
+
+ """
+ Table operations
+ """
+
+ @abstractmethod
+ def createIdx(self, indexName: str, knowledgebaseId: str, vectorSize: int):
+ """
+ Create an index with given name
+ """
+ raise NotImplementedError("Not implemented")
+
+ @abstractmethod
+ def deleteIdx(self, indexName: str, knowledgebaseId: str):
+ """
+ Delete an index with given name
+ """
+ raise NotImplementedError("Not implemented")
+
+ @abstractmethod
+ def indexExist(self, indexName: str, knowledgebaseId: str) -> bool:
+ """
+ Check if an index with given name exists
+ """
+ raise NotImplementedError("Not implemented")
+
+ """
+ CRUD operations
+ """
+
+ @abstractmethod
+ def search(
+ self, selectFields: list[str], highlight: list[str], condition: dict, matchExprs: list[MatchExpr], orderBy: OrderByExpr, offset: int, limit: int, indexNames: str|list[str], knowledgebaseIds: list[str]
+ ) -> list[dict] | pl.DataFrame:
+ """
+ Search with given conjunctive equivalent filtering condition and return all fields of matched documents
+ """
+ raise NotImplementedError("Not implemented")
+
+ @abstractmethod
+ def get(self, chunkId: str, indexName: str, knowledgebaseIds: list[str]) -> dict | None:
+ """
+ Get single chunk with given id
+ """
+ raise NotImplementedError("Not implemented")
+
+ @abstractmethod
+ def insert(self, rows: list[dict], indexName: str, knowledgebaseId: str) -> list[str]:
+ """
+ Update or insert a bulk of rows
+ """
+ raise NotImplementedError("Not implemented")
+
+ @abstractmethod
+ def update(self, condition: dict, newValue: dict, indexName: str, knowledgebaseId: str) -> bool:
+ """
+ Update rows with given conjunctive equivalent filtering condition
+ """
+ raise NotImplementedError("Not implemented")
+
+ @abstractmethod
+ def delete(self, condition: dict, indexName: str, knowledgebaseId: str) -> int:
+ """
+ Delete rows with given conjunctive equivalent filtering condition
+ """
+ raise NotImplementedError("Not implemented")
+
+ """
+ Helper functions for search result
+ """
+
+ @abstractmethod
+ def getTotal(self, res):
+ raise NotImplementedError("Not implemented")
+
+ @abstractmethod
+ def getChunkIds(self, res):
+ raise NotImplementedError("Not implemented")
+
+ @abstractmethod
+ def getFields(self, res, fields: List[str]) -> Dict[str, dict]:
+ raise NotImplementedError("Not implemented")
+
+ @abstractmethod
+ def getHighlight(self, res, keywords: List[str], fieldnm: str):
+ raise NotImplementedError("Not implemented")
+
+ @abstractmethod
+ def getAggregation(self, res, fieldnm: str):
+ raise NotImplementedError("Not implemented")
+
+ """
+ SQL
+ """
+ @abstractmethod
+ def sql(sql: str, fetch_size: int, format: str):
+ """
+ Run the sql generated by text-to-sql
+ """
+ raise NotImplementedError("Not implemented")
diff --git a/rag/utils/es_conn.py b/rag/utils/es_conn.py
index d39e263f7f8..9b38d895e50 100644
--- a/rag/utils/es_conn.py
+++ b/rag/utils/es_conn.py
@@ -1,29 +1,29 @@
import re
import json
import time
-import copy
+import os
+from typing import List, Dict
import elasticsearch
-from elastic_transport import ConnectionTimeout
+import copy
from elasticsearch import Elasticsearch
-from elasticsearch_dsl import UpdateByQuery, Search, Index
-from rag.settings import es_logger
+from elasticsearch_dsl import UpdateByQuery, Q, Search, Index
+from elastic_transport import ConnectionTimeout
+from rag.settings import doc_store_logger
from rag import settings
from rag.utils import singleton
+from api.utils.file_utils import get_project_base_directory
+import polars as pl
+from rag.utils.doc_store_conn import DocStoreConnection, MatchExpr, OrderByExpr, MatchTextExpr, MatchDenseExpr, FusionExpr
+from rag.nlp import is_english, rag_tokenizer
-es_logger.info("Elasticsearch version: "+str(elasticsearch.__version__))
+doc_store_logger.info("Elasticsearch sdk version: "+str(elasticsearch.__version__))
@singleton
-class ESConnection:
+class ESConnection(DocStoreConnection):
def __init__(self):
self.info = {}
- self.conn()
- self.idxnm = settings.ES.get("index_name", "")
- if not self.es.ping():
- raise Exception("Can't connect to ES cluster")
-
- def conn(self):
for _ in range(10):
try:
self.es = Elasticsearch(
@@ -34,390 +34,317 @@ def conn(self):
)
if self.es:
self.info = self.es.info()
- es_logger.info("Connect to es.")
+ doc_store_logger.info("Connect to es.")
break
except Exception as e:
- es_logger.error("Fail to connect to es: " + str(e))
+ doc_store_logger.error("Fail to connect to es: " + str(e))
time.sleep(1)
-
- def version(self):
+ if not self.es.ping():
+ raise Exception("Can't connect to ES cluster")
v = self.info.get("version", {"number": "5.6"})
v = v["number"].split(".")[0]
- return int(v) >= 7
-
- def health(self):
- return dict(self.es.cluster.health())
-
- def upsert(self, df, idxnm=""):
- res = []
- for d in df:
- id = d["id"]
- del d["id"]
- d = {"doc": d, "doc_as_upsert": "true"}
- T = False
- for _ in range(10):
- try:
- if not self.version():
- r = self.es.update(
- index=(
- self.idxnm if not idxnm else idxnm),
- body=d,
- id=id,
- doc_type="doc",
- refresh=True,
- retry_on_conflict=100)
- else:
- r = self.es.update(
- index=(
- self.idxnm if not idxnm else idxnm),
- body=d,
- id=id,
- refresh=True,
- retry_on_conflict=100)
- es_logger.info("Successfully upsert: %s" % id)
- T = True
- break
- except Exception as e:
- es_logger.warning("Fail to index: " +
- json.dumps(d, ensure_ascii=False) + str(e))
- if re.search(r"(Timeout|time out)", str(e), re.IGNORECASE):
- time.sleep(3)
- continue
- self.conn()
- T = False
-
- if not T:
- res.append(d)
- es_logger.error(
- "Fail to index: " +
- re.sub(
- "[\r\n]",
- "",
- json.dumps(
- d,
- ensure_ascii=False)))
- d["id"] = id
- d["_index"] = self.idxnm
-
- if not res:
+ if int(v) < 8:
+ raise Exception(f"ES version must be greater than or equal to 8, current version: {v}")
+ fp_mapping = os.path.join(get_project_base_directory(), "conf", "mapping.json")
+ if not os.path.exists(fp_mapping):
+ raise Exception(f"Mapping file not found at {fp_mapping}")
+ self.mapping = json.load(open(fp_mapping, "r"))
+
+ """
+ Database operations
+ """
+ def dbType(self) -> str:
+ return "elasticsearch"
+
+ def health(self) -> dict:
+ return dict(self.es.cluster.health()) + {"type": "elasticsearch"}
+
+ """
+ Table operations
+ """
+ def createIdx(self, indexName: str, knowledgebaseId: str, vectorSize: int):
+ if self.indexExist(indexName, knowledgebaseId):
return True
- return False
-
- def bulk(self, df, idx_nm=None):
- ids, acts = {}, []
- for d in df:
- id = d["id"] if "id" in d else d["_id"]
- ids[id] = copy.deepcopy(d)
- ids[id]["_index"] = self.idxnm if not idx_nm else idx_nm
- if "id" in d:
- del d["id"]
- if "_id" in d:
- del d["_id"]
- acts.append(
- {"update": {"_id": id, "_index": ids[id]["_index"]}, "retry_on_conflict": 100})
- acts.append({"doc": d, "doc_as_upsert": "true"})
-
- res = []
- for _ in range(100):
- try:
- if elasticsearch.__version__[0] < 8:
- r = self.es.bulk(
- index=(
- self.idxnm if not idx_nm else idx_nm),
- body=acts,
- refresh=False,
- timeout="600s")
- else:
- r = self.es.bulk(index=(self.idxnm if not idx_nm else
- idx_nm), operations=acts,
- refresh=False, timeout="600s")
- if re.search(r"False", str(r["errors"]), re.IGNORECASE):
- return res
-
- for it in r["items"]:
- if "error" in it["update"]:
- res.append(str(it["update"]["_id"]) +
- ":" + str(it["update"]["error"]))
-
- return res
- except Exception as e:
- es_logger.warn("Fail to bulk: " + str(e))
- if re.search(r"(Timeout|time out)", str(e), re.IGNORECASE):
- time.sleep(3)
- continue
- self.conn()
-
- return res
-
- def bulk4script(self, df):
- ids, acts = {}, []
- for d in df:
- id = d["id"]
- ids[id] = copy.deepcopy(d["raw"])
- acts.append({"update": {"_id": id, "_index": self.idxnm}})
- acts.append(d["script"])
- es_logger.info("bulk upsert: %s" % id)
-
- res = []
- for _ in range(10):
- try:
- if not self.version():
- r = self.es.bulk(
- index=self.idxnm,
- body=acts,
- refresh=False,
- timeout="600s",
- doc_type="doc")
- else:
- r = self.es.bulk(
- index=self.idxnm,
- body=acts,
- refresh=False,
- timeout="600s")
- if re.search(r"False", str(r["errors"]), re.IGNORECASE):
- return res
-
- for it in r["items"]:
- if "error" in it["update"]:
- res.append(str(it["update"]["_id"]))
-
- return res
- except Exception as e:
- es_logger.warning("Fail to bulk: " + str(e))
- if re.search(r"(Timeout|time out)", str(e), re.IGNORECASE):
- time.sleep(3)
- continue
- self.conn()
+ try:
+ from elasticsearch.client import IndicesClient
+ return IndicesClient(self.es).create(index=indexName,
+ settings=self.mapping["settings"],
+ mappings=self.mapping["mappings"])
+ except Exception as e:
+ doc_store_logger.error("ES create index error %s ----%s" % (indexName, str(e)))
- return res
+ def deleteIdx(self, indexName: str, knowledgebaseId: str):
+ try:
+ return self.es.indices.delete(indexName, allow_no_indices=True)
+ except Exception as e:
+ doc_store_logger.error("ES delete index error %s ----%s" % (indexName, str(e)))
- def rm(self, d):
- for _ in range(10):
+ def indexExist(self, indexName: str, knowledgebaseId: str) -> bool:
+ s = Index(indexName, self.es)
+ for i in range(3):
try:
- if not self.version():
- r = self.es.delete(
- index=self.idxnm,
- id=d["id"],
- doc_type="doc",
- refresh=True)
- else:
- r = self.es.delete(
- index=self.idxnm,
- id=d["id"],
- refresh=True,
- doc_type="_doc")
- es_logger.info("Remove %s" % d["id"])
- return True
+ return s.exists()
except Exception as e:
- es_logger.warn("Fail to delete: " + str(d) + str(e))
- if re.search(r"(Timeout|time out)", str(e), re.IGNORECASE):
- time.sleep(3)
+ doc_store_logger.error("ES indexExist: " + str(e))
+ if str(e).find("Timeout") > 0 or str(e).find("Conflict") > 0:
continue
- if re.search(r"(not_found)", str(e), re.IGNORECASE):
- return True
- self.conn()
-
- es_logger.error("Fail to delete: " + str(d))
-
return False
- def search(self, q, idxnms=None, src=False, timeout="2s"):
- if not isinstance(q, dict):
- q = Search().query(q).to_dict()
- if isinstance(idxnms, str):
- idxnms = idxnms.split(",")
+ """
+ CRUD operations
+ """
+ def search(self, selectFields: list[str], highlightFields: list[str], condition: dict, matchExprs: list[MatchExpr], orderBy: OrderByExpr, offset: int, limit: int, indexNames: str|list[str], knowledgebaseIds: list[str]) -> list[dict] | pl.DataFrame:
+ """
+ Refers to https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html
+ """
+ if isinstance(indexNames, str):
+ indexNames = indexNames.split(",")
+ assert isinstance(indexNames, list) and len(indexNames) > 0
+ assert "_id" not in condition
+ s = Search()
+ bqry = None
+ vector_similarity_weight = 0.5
+ for m in matchExprs:
+ if isinstance(m, FusionExpr) and m.method=="weighted_sum" and "weights" in m.fusion_params:
+ assert len(matchExprs)==3 and isinstance(matchExprs[0], MatchTextExpr) and isinstance(matchExprs[1], MatchDenseExpr) and isinstance(matchExprs[2], FusionExpr)
+ weights = m.fusion_params["weights"]
+ vector_similarity_weight = float(weights.split(",")[1])
+ for m in matchExprs:
+ if isinstance(m, MatchTextExpr):
+ minimum_should_match = "0%"
+ if "minimum_should_match" in m.extra_options:
+ minimum_should_match = str(int(m.extra_options["minimum_should_match"] * 100)) + "%"
+ bqry = Q("bool",
+ must=Q("query_string", fields=m.fields,
+ type="best_fields", query=m.matching_text,
+ minimum_should_match = minimum_should_match,
+ boost=1),
+ boost = 1.0 - vector_similarity_weight,
+ )
+ if condition:
+ for k, v in condition.items():
+ if not isinstance(k, str) or not v:
+ continue
+ if isinstance(v, list):
+ bqry.filter.append(Q("terms", **{k: v}))
+ elif isinstance(v, str) or isinstance(v, int):
+ bqry.filter.append(Q("term", **{k: v}))
+ else:
+ raise Exception(f"Condition `{str(k)}={str(v)}` value type is {str(type(v))}, expected to be int, str or list.")
+ elif isinstance(m, MatchDenseExpr):
+ assert(bqry is not None)
+ similarity = 0.0
+ if "similarity" in m.extra_options:
+ similarity = m.extra_options["similarity"]
+ s = s.knn(m.vector_column_name,
+ m.topn,
+ m.topn * 2,
+ query_vector = list(m.embedding_data),
+ filter = bqry.to_dict(),
+ similarity = similarity,
+ )
+ if matchExprs:
+ s.query = bqry
+ for field in highlightFields:
+ s = s.highlight(field)
+
+ if orderBy:
+ orders = list()
+ for field, order in orderBy.fields:
+ order = "asc" if order == 0 else "desc"
+ orders.append({field: {"order": order, "unmapped_type": "float",
+ "mode": "avg", "numeric_type": "double"}})
+ s = s.sort(*orders)
+
+ if limit > 0:
+ s = s[offset:limit]
+ q = s.to_dict()
+ doc_store_logger.info("ESConnection.search [Q]: " + json.dumps(q))
+
for i in range(3):
try:
- res = self.es.search(index=(self.idxnm if not idxnms else idxnms),
+ res = self.es.search(index=indexNames,
body=q,
- timeout=timeout,
+ timeout="600s",
# search_type="dfs_query_then_fetch",
track_total_hits=True,
- _source=src)
+ _source=True)
if str(res.get("timed_out", "")).lower() == "true":
raise Exception("Es Timeout.")
+ doc_store_logger.info("ESConnection.search res: " + str(res))
return res
except Exception as e:
- es_logger.error(
+ doc_store_logger.error(
"ES search exception: " +
str(e) +
- "【Q】:" +
+ "\n[Q]: " +
str(q))
if str(e).find("Timeout") > 0:
continue
raise e
- es_logger.error("ES search timeout for 3 times!")
+ doc_store_logger.error("ES search timeout for 3 times!")
raise Exception("ES search timeout.")
- def sql(self, sql, fetch_size=128, format="json", timeout="2s"):
- for i in range(3):
- try:
- res = self.es.sql.query(body={"query": sql, "fetch_size": fetch_size}, format=format, request_timeout=timeout)
- return res
- except ConnectionTimeout as e:
- es_logger.error("Timeout【Q】:" + sql)
- continue
- except Exception as e:
- raise e
- es_logger.error("ES search timeout for 3 times!")
- raise ConnectionTimeout()
-
-
- def get(self, doc_id, idxnm=None):
+ def get(self, chunkId: str, indexName: str, knowledgebaseIds: list[str]) -> dict | None:
for i in range(3):
try:
- res = self.es.get(index=(self.idxnm if not idxnm else idxnm),
- id=doc_id)
+ res = self.es.get(index=(indexName),
+ id=chunkId, source=True,)
if str(res.get("timed_out", "")).lower() == "true":
raise Exception("Es Timeout.")
- return res
+ if not res.get("found"):
+ return None
+ chunk = res["_source"]
+ chunk["id"] = chunkId
+ return chunk
except Exception as e:
- es_logger.error(
+ doc_store_logger.error(
"ES get exception: " +
str(e) +
- "【Q】:" +
- doc_id)
+ "[Q]: " +
+ chunkId)
if str(e).find("Timeout") > 0:
continue
raise e
- es_logger.error("ES search timeout for 3 times!")
+ doc_store_logger.error("ES search timeout for 3 times!")
raise Exception("ES search timeout.")
- def updateByQuery(self, q, d):
- ubq = UpdateByQuery(index=self.idxnm).using(self.es).query(q)
- scripts = ""
- for k, v in d.items():
- scripts += "ctx._source.%s = params.%s;" % (str(k), str(k))
- ubq = ubq.script(source=scripts, params=d)
- ubq = ubq.params(refresh=False)
- ubq = ubq.params(slices=5)
- ubq = ubq.params(conflicts="proceed")
- for i in range(3):
- try:
- r = ubq.execute()
- return True
- except Exception as e:
- es_logger.error("ES updateByQuery exception: " +
- str(e) + "【Q】:" + str(q.to_dict()))
- if str(e).find("Timeout") > 0 or str(e).find("Conflict") > 0:
- continue
- self.conn()
-
- return False
+ def insert(self, documents: list[dict], indexName: str, knowledgebaseId: str) -> list[str]:
+ # Refers to https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html
+ operations = []
+ for d in documents:
+ assert "_id" not in d
+ assert "id" in d
+ d_copy = copy.deepcopy(d)
+ meta_id = d_copy["id"]
+ del d_copy["id"]
+ operations.append(
+ {"index": {"_index": indexName, "_id": meta_id}})
+ operations.append(d_copy)
- def updateScriptByQuery(self, q, scripts, idxnm=None):
- ubq = UpdateByQuery(
- index=self.idxnm if not idxnm else idxnm).using(
- self.es).query(q)
- ubq = ubq.script(source=scripts)
- ubq = ubq.params(refresh=True)
- ubq = ubq.params(slices=5)
- ubq = ubq.params(conflicts="proceed")
- for i in range(3):
+ res = []
+ for _ in range(100):
try:
- r = ubq.execute()
- return True
- except Exception as e:
- es_logger.error("ES updateByQuery exception: " +
- str(e) + "【Q】:" + str(q.to_dict()))
- if str(e).find("Timeout") > 0 or str(e).find("Conflict") > 0:
- continue
- self.conn()
-
- return False
+ r = self.es.bulk(index=(indexName), operations=operations,
+ refresh=False, timeout="600s")
+ if re.search(r"False", str(r["errors"]), re.IGNORECASE):
+ return res
- def deleteByQuery(self, query, idxnm=""):
- for i in range(3):
- try:
- r = self.es.delete_by_query(
- index=idxnm if idxnm else self.idxnm,
- refresh = True,
- body=Search().query(query).to_dict())
- return True
+ for item in r["items"]:
+ for action in ["create", "delete", "index", "update"]:
+ if action in item and "error" in item[action]:
+ res.append(str(item[action]["_id"]) + ":" + str(item[action]["error"]))
+ return res
except Exception as e:
- es_logger.error("ES updateByQuery deleteByQuery: " +
- str(e) + "【Q】:" + str(query.to_dict()))
- if str(e).find("NotFoundError") > 0: return True
- if str(e).find("Timeout") > 0 or str(e).find("Conflict") > 0:
+ doc_store_logger.warning("Fail to bulk: " + str(e))
+ if re.search(r"(Timeout|time out)", str(e), re.IGNORECASE):
+ time.sleep(3)
continue
+ return res
- return False
-
- def update(self, id, script, routing=None):
- for i in range(3):
- try:
- if not self.version():
- r = self.es.update(
- index=self.idxnm,
- id=id,
- body=json.dumps(
- script,
- ensure_ascii=False),
- doc_type="doc",
- routing=routing,
- refresh=False)
- else:
- r = self.es.update(index=self.idxnm, id=id, body=json.dumps(script, ensure_ascii=False),
- routing=routing, refresh=False) # , doc_type="_doc")
- return True
- except Exception as e:
- es_logger.error(
- "ES update exception: " + str(e) + " id:" + str(id) + ", version:" + str(self.version()) +
- json.dumps(script, ensure_ascii=False))
- if str(e).find("Timeout") > 0:
+ def update(self, condition: dict, newValue: dict, indexName: str, knowledgebaseId: str) -> bool:
+ doc = copy.deepcopy(newValue)
+ del doc['id']
+ if "id" in condition and isinstance(condition["id"], str):
+ # update specific single document
+ chunkId = condition["id"]
+ for i in range(3):
+ try:
+ self.es.update(index=indexName, id=chunkId, doc=doc)
+ return True
+ except Exception as e:
+ doc_store_logger.error(
+ "ES update exception: " + str(e) + " id:" + str(id) +
+ json.dumps(newValue, ensure_ascii=False))
+ if str(e).find("Timeout") > 0:
+ continue
+ else:
+ # update unspecific maybe-multiple documents
+ bqry = Q("bool")
+ for k, v in condition.items():
+ if not isinstance(k, str) or not v:
continue
-
- return False
-
- def indexExist(self, idxnm):
- s = Index(idxnm if idxnm else self.idxnm, self.es)
- for i in range(3):
- try:
- return s.exists()
- except Exception as e:
- es_logger.error("ES updateByQuery indexExist: " + str(e))
- if str(e).find("Timeout") > 0 or str(e).find("Conflict") > 0:
+ if isinstance(v, list):
+ bqry.filter.append(Q("terms", **{k: v}))
+ elif isinstance(v, str) or isinstance(v, int):
+ bqry.filter.append(Q("term", **{k: v}))
+ else:
+ raise Exception(f"Condition `{str(k)}={str(v)}` value type is {str(type(v))}, expected to be int, str or list.")
+ scripts = []
+ for k, v in newValue.items():
+ if not isinstance(k, str) or not v:
continue
-
+ if isinstance(v, str):
+ scripts.append(f"ctx._source.{k} = '{v}'")
+ elif isinstance(v, int):
+ scripts.append(f"ctx._source.{k} = {v}")
+ else:
+ raise Exception(f"newValue `{str(k)}={str(v)}` value type is {str(type(v))}, expected to be int, str.")
+ ubq = UpdateByQuery(
+ index=indexName).using(
+ self.es).query(bqry)
+ ubq = ubq.script(source="; ".join(scripts))
+ ubq = ubq.params(refresh=True)
+ ubq = ubq.params(slices=5)
+ ubq = ubq.params(conflicts="proceed")
+ for i in range(3):
+ try:
+ _ = ubq.execute()
+ return True
+ except Exception as e:
+ doc_store_logger.error("ES update exception: " +
+ str(e) + "[Q]:" + str(bqry.to_dict()))
+ if str(e).find("Timeout") > 0 or str(e).find("Conflict") > 0:
+ continue
return False
- def docExist(self, docid, idxnm=None):
- for i in range(3):
+ def delete(self, condition: dict, indexName: str, knowledgebaseId: str) -> int:
+ qry = None
+ assert "_id" not in condition
+ if "id" in condition:
+ chunk_ids = condition["id"]
+ if not isinstance(chunk_ids, list):
+ chunk_ids = [chunk_ids]
+ qry = Q("ids", values=chunk_ids)
+ else:
+ qry = Q("bool")
+ for k, v in condition.items():
+ if isinstance(v, list):
+ qry.must.append(Q("terms", **{k: v}))
+ elif isinstance(v, str) or isinstance(v, int):
+ qry.must.append(Q("term", **{k: v}))
+ else:
+ raise Exception("Condition value must be int, str or list.")
+ doc_store_logger.info("ESConnection.delete [Q]: " + json.dumps(qry.to_dict()))
+ for _ in range(10):
try:
- return self.es.exists(index=(idxnm if idxnm else self.idxnm),
- id=docid)
+ res = self.es.delete_by_query(
+ index=indexName,
+ body = Search().query(qry).to_dict(),
+ refresh=True)
+ return res["deleted"]
except Exception as e:
- es_logger.error("ES Doc Exist: " + str(e))
- if str(e).find("Timeout") > 0 or str(e).find("Conflict") > 0:
+ doc_store_logger.warning("Fail to delete: " + str(filter) + str(e))
+ if re.search(r"(Timeout|time out)", str(e), re.IGNORECASE):
+ time.sleep(3)
continue
- return False
-
- def createIdx(self, idxnm, mapping):
- try:
- if elasticsearch.__version__[0] < 8:
- return self.es.indices.create(idxnm, body=mapping)
- from elasticsearch.client import IndicesClient
- return IndicesClient(self.es).create(index=idxnm,
- settings=mapping["settings"],
- mappings=mapping["mappings"])
- except Exception as e:
- es_logger.error("ES create index error %s ----%s" % (idxnm, str(e)))
+ if re.search(r"(not_found)", str(e), re.IGNORECASE):
+ return 0
+ return 0
- def deleteIdx(self, idxnm):
- try:
- return self.es.indices.delete(idxnm, allow_no_indices=True)
- except Exception as e:
- es_logger.error("ES delete index error %s ----%s" % (idxnm, str(e)))
+ """
+ Helper functions for search result
+ """
def getTotal(self, res):
if isinstance(res["hits"]["total"], type({})):
return res["hits"]["total"]["value"]
return res["hits"]["total"]
- def getDocIds(self, res):
+ def getChunkIds(self, res):
return [d["_id"] for d in res["hits"]["hits"]]
- def getSource(self, res):
+ def __getSource(self, res):
rr = []
for d in res["hits"]["hits"]:
d["_source"]["id"] = d["_id"]
@@ -425,40 +352,89 @@ def getSource(self, res):
rr.append(d["_source"])
return rr
- def scrollIter(self, pagesize=100, scroll_time='2m', q={
- "query": {"match_all": {}}, "sort": [{"updated_at": {"order": "desc"}}]}):
- for _ in range(100):
- try:
- page = self.es.search(
- index=self.idxnm,
- scroll=scroll_time,
- size=pagesize,
- body=q,
- _source=None
- )
- break
- except Exception as e:
- es_logger.error("ES scrolling fail. " + str(e))
- time.sleep(3)
-
- sid = page['_scroll_id']
- scroll_size = page['hits']['total']["value"]
- es_logger.info("[TOTAL]%d" % scroll_size)
- # Start scrolling
- while scroll_size > 0:
- yield page["hits"]["hits"]
- for _ in range(100):
- try:
- page = self.es.scroll(scroll_id=sid, scroll=scroll_time)
- break
- except Exception as e:
- es_logger.error("ES scrolling fail. " + str(e))
- time.sleep(3)
+ def getFields(self, res, fields: List[str]) -> Dict[str, dict]:
+ res_fields = {}
+ if not fields:
+ return {}
+ for d in self.__getSource(res):
+ m = {n: d.get(n) for n in fields if d.get(n) is not None}
+ for n, v in m.items():
+ if isinstance(v, list):
+ m[n] = v
+ continue
+ if not isinstance(v, str):
+ m[n] = str(m[n])
+ # if n.find("tks") > 0:
+ # m[n] = rmSpace(m[n])
- # Update the scroll ID
- sid = page['_scroll_id']
- # Get the number of results that we returned in the last scroll
- scroll_size = len(page['hits']['hits'])
+ if m:
+ res_fields[d["id"]] = m
+ return res_fields
+ def getHighlight(self, res, keywords: List[str], fieldnm: str):
+ ans = {}
+ for d in res["hits"]["hits"]:
+ hlts = d.get("highlight")
+ if not hlts:
+ continue
+ txt = "...".join([a for a in list(hlts.items())[0][1]])
+ if not is_english(txt.split(" ")):
+ ans[d["_id"]] = txt
+ continue
+
+ txt = d["_source"][fieldnm]
+ txt = re.sub(r"[\r\n]", " ", txt, flags=re.IGNORECASE|re.MULTILINE)
+ txts = []
+ for t in re.split(r"[.?!;\n]", txt):
+ for w in keywords:
+ t = re.sub(r"(^|[ .?/'\"\(\)!,:;-])(%s)([ .?/'\"\(\)!,:;-])"%re.escape(w), r"\1\2\3", t, flags=re.IGNORECASE|re.MULTILINE)
+ if not re.search(r"[^<>]+", t, flags=re.IGNORECASE|re.MULTILINE):
+ continue
+ txts.append(t)
+ ans[d["_id"]] = "...".join(txts) if txts else "...".join([a for a in list(hlts.items())[0][1]])
+
+ return ans
+
+ def getAggregation(self, res, fieldnm: str):
+ agg_field = "aggs_" + fieldnm
+ if "aggregations" not in res or agg_field not in res["aggregations"]:
+ return list()
+ bkts = res["aggregations"][agg_field]["buckets"]
+ return [(b["key"], b["doc_count"]) for b in bkts]
+
+
+ """
+ SQL
+ """
+ def sql(self, sql: str, fetch_size: int, format: str):
+ doc_store_logger.info(f"ESConnection.sql get sql: {sql}")
+ sql = re.sub(r"[ `]+", " ", sql)
+ sql = sql.replace("%", "")
+ replaces = []
+ for r in re.finditer(r" ([a-z_]+_l?tks)( like | ?= ?)'([^']+)'", sql):
+ fld, v = r.group(1), r.group(3)
+ match = " MATCH({}, '{}', 'operator=OR;minimum_should_match=30%') ".format(
+ fld, rag_tokenizer.fine_grained_tokenize(rag_tokenizer.tokenize(v)))
+ replaces.append(
+ ("{}{}'{}'".format(
+ r.group(1),
+ r.group(2),
+ r.group(3)),
+ match))
+
+ for p, r in replaces:
+ sql = sql.replace(p, r, 1)
+ doc_store_logger.info(f"ESConnection.sql to es: {sql}")
-ELASTICSEARCH = ESConnection()
+ for i in range(3):
+ try:
+ res = self.es.sql.query(body={"query": sql, "fetch_size": fetch_size}, format=format, request_timeout="2s")
+ return res
+ except ConnectionTimeout:
+ doc_store_logger.error("ESConnection.sql timeout [Q]: " + sql)
+ continue
+ except Exception as e:
+ doc_store_logger.error(f"ESConnection.sql failure: {sql} => " + str(e))
+ return None
+ doc_store_logger.error("ESConnection.sql timeout for 3 times!")
+ return None
diff --git a/rag/utils/infinity_conn.py b/rag/utils/infinity_conn.py
new file mode 100644
index 00000000000..5be47918639
--- /dev/null
+++ b/rag/utils/infinity_conn.py
@@ -0,0 +1,436 @@
+import os
+import re
+import json
+from typing import List, Dict
+import infinity
+from infinity.common import ConflictType, InfinityException
+from infinity.index import IndexInfo, IndexType
+from infinity.connection_pool import ConnectionPool
+from rag import settings
+from rag.settings import doc_store_logger
+from rag.utils import singleton
+import polars as pl
+from polars.series.series import Series
+from api.utils.file_utils import get_project_base_directory
+
+from rag.utils.doc_store_conn import (
+ DocStoreConnection,
+ MatchExpr,
+ MatchTextExpr,
+ MatchDenseExpr,
+ FusionExpr,
+ OrderByExpr,
+)
+
+
+def equivalent_condition_to_str(condition: dict) -> str:
+ assert "_id" not in condition
+ cond = list()
+ for k, v in condition.items():
+ if not isinstance(k, str) or not v:
+ continue
+ if isinstance(v, list):
+ inCond = list()
+ for item in v:
+ if isinstance(item, str):
+ inCond.append(f"'{item}'")
+ else:
+ inCond.append(str(item))
+ if inCond:
+ strInCond = ", ".join(inCond)
+ strInCond = f"{k} IN ({strInCond})"
+ cond.append(strInCond)
+ elif isinstance(v, str):
+ cond.append(f"{k}='{v}'")
+ else:
+ cond.append(f"{k}={str(v)}")
+ return " AND ".join(cond)
+
+
+@singleton
+class InfinityConnection(DocStoreConnection):
+ def __init__(self):
+ self.dbName = settings.INFINITY.get("db_name", "default_db")
+ infinity_uri = settings.INFINITY["uri"]
+ if ":" in infinity_uri:
+ host, port = infinity_uri.split(":")
+ infinity_uri = infinity.common.NetworkAddress(host, int(port))
+ self.connPool = ConnectionPool(infinity_uri)
+ doc_store_logger.info(f"Connected to infinity {infinity_uri}.")
+
+ """
+ Database operations
+ """
+
+ def dbType(self) -> str:
+ return "infinity"
+
+ def health(self) -> dict:
+ """
+ Return the health status of the database.
+ TODO: Infinity-sdk provides health() to wrap `show global variables` and `show tables`
+ """
+ inf_conn = self.connPool.get_conn()
+ res = infinity.show_current_node()
+ self.connPool.release_conn(inf_conn)
+ color = "green" if res.error_code == 0 else "red"
+ res2 = {
+ "type": "infinity",
+ "status": f"{res.role} {color}",
+ "error": res.error_msg,
+ }
+ return res2
+
+ """
+ Table operations
+ """
+
+ def createIdx(self, indexName: str, knowledgebaseId: str, vectorSize: int):
+ table_name = f"{indexName}_{knowledgebaseId}"
+ inf_conn = self.connPool.get_conn()
+ inf_db = inf_conn.create_database(self.dbName, ConflictType.Ignore)
+
+ fp_mapping = os.path.join(
+ get_project_base_directory(), "conf", "infinity_mapping.json"
+ )
+ if not os.path.exists(fp_mapping):
+ raise Exception(f"Mapping file not found at {fp_mapping}")
+ schema = json.load(open(fp_mapping))
+ vector_name = f"q_{vectorSize}_vec"
+ schema[vector_name] = {"type": f"vector,{vectorSize},float"}
+ inf_table = inf_db.create_table(
+ table_name,
+ schema,
+ ConflictType.Ignore,
+ )
+ inf_table.create_index(
+ "q_vec_idx",
+ IndexInfo(
+ vector_name,
+ IndexType.Hnsw,
+ {
+ "M": "16",
+ "ef_construction": "50",
+ "metric": "cosine",
+ "encode": "lvq",
+ },
+ ),
+ ConflictType.Ignore,
+ )
+ text_suffix = ["_tks", "_ltks", "_kwd"]
+ for field_name, field_info in schema.items():
+ if field_info["type"] != "varchar":
+ continue
+ for suffix in text_suffix:
+ if field_name.endswith(suffix):
+ inf_table.create_index(
+ f"text_idx_{field_name}",
+ IndexInfo(
+ field_name, IndexType.FullText, {"ANALYZER": "standard"}
+ ),
+ ConflictType.Ignore,
+ )
+ break
+ self.connPool.release_conn(inf_conn)
+ doc_store_logger.info(
+ f"INFINITY created table {table_name}, vector size {vectorSize}"
+ )
+
+ def deleteIdx(self, indexName: str, knowledgebaseId: str):
+ table_name = f"{indexName}_{knowledgebaseId}"
+ inf_conn = self.connPool.get_conn()
+ db_instance = inf_conn.get_database(self.dbName)
+ db_instance.drop_table(table_name, ConflictType.Ignore)
+ self.connPool.release_conn(inf_conn)
+ doc_store_logger.info(f"INFINITY dropped table {table_name}")
+
+ def indexExist(self, indexName: str, knowledgebaseId: str) -> bool:
+ table_name = f"{indexName}_{knowledgebaseId}"
+ try:
+ inf_conn = self.connPool.get_conn()
+ db_instance = inf_conn.get_database(self.dbName)
+ _ = db_instance.get_table(table_name)
+ self.connPool.release_conn(inf_conn)
+ return True
+ except Exception as e:
+ doc_store_logger.error("INFINITY indexExist: " + str(e))
+ return False
+
+ """
+ CRUD operations
+ """
+
+ def search(
+ self,
+ selectFields: list[str],
+ highlightFields: list[str],
+ condition: dict,
+ matchExprs: list[MatchExpr],
+ orderBy: OrderByExpr,
+ offset: int,
+ limit: int,
+ indexNames: str|list[str],
+ knowledgebaseIds: list[str],
+ ) -> list[dict] | pl.DataFrame:
+ """
+ TODO: Infinity doesn't provide highlight
+ """
+ if isinstance(indexNames, str):
+ indexNames = indexNames.split(",")
+ assert isinstance(indexNames, list) and len(indexNames) > 0
+ inf_conn = self.connPool.get_conn()
+ db_instance = inf_conn.get_database(self.dbName)
+ df_list = list()
+ table_list = list()
+ if "id" not in selectFields:
+ selectFields.append("id")
+
+ # Prepare expressions common to all tables
+ filter_cond = ""
+ filter_fulltext = ""
+ if condition:
+ filter_cond = equivalent_condition_to_str(condition)
+ for matchExpr in matchExprs:
+ if isinstance(matchExpr, MatchTextExpr):
+ if len(filter_cond) != 0 and "filter" not in matchExpr.extra_options:
+ matchExpr.extra_options.update({"filter": filter_cond})
+ fields = ",".join(matchExpr.fields)
+ filter_fulltext = (
+ f"filter_fulltext('{fields}', '{matchExpr.matching_text}')"
+ )
+ if len(filter_cond) != 0:
+ filter_fulltext = f"({filter_cond}) AND {filter_fulltext}"
+ # doc_store_logger.info(f"filter_fulltext: {filter_fulltext}")
+ minimum_should_match = "0%"
+ if "minimum_should_match" in matchExpr.extra_options:
+ minimum_should_match = (
+ str(int(matchExpr.extra_options["minimum_should_match"] * 100))
+ + "%"
+ )
+ matchExpr.extra_options.update(
+ {"minimum_should_match": minimum_should_match}
+ )
+ for k, v in matchExpr.extra_options.items():
+ if not isinstance(v, str):
+ matchExpr.extra_options[k] = str(v)
+ elif isinstance(matchExpr, MatchDenseExpr):
+ if len(filter_cond) != 0 and "filter" not in matchExpr.extra_options:
+ matchExpr.extra_options.update({"filter": filter_fulltext})
+ for k, v in matchExpr.extra_options.items():
+ if not isinstance(v, str):
+ matchExpr.extra_options[k] = str(v)
+ if orderBy.fields:
+ order_by_expr_list = list()
+ for order_field in orderBy.fields:
+ order_by_expr_list.append((order_field[0], order_field[1] == 0))
+
+ # Scatter search tables and gather the results
+ for indexName in indexNames:
+ for knowledgebaseId in knowledgebaseIds:
+ table_name = f"{indexName}_{knowledgebaseId}"
+ try:
+ table_instance = db_instance.get_table(table_name)
+ except Exception:
+ continue
+ table_list.append(table_name)
+ builder = table_instance.output(selectFields)
+ for matchExpr in matchExprs:
+ if isinstance(matchExpr, MatchTextExpr):
+ fields = ",".join(matchExpr.fields)
+ builder = builder.match_text(
+ fields,
+ matchExpr.matching_text,
+ matchExpr.topn,
+ matchExpr.extra_options,
+ )
+ elif isinstance(matchExpr, MatchDenseExpr):
+ builder = builder.match_dense(
+ matchExpr.vector_column_name,
+ matchExpr.embedding_data,
+ matchExpr.embedding_data_type,
+ matchExpr.distance_type,
+ matchExpr.topn,
+ matchExpr.extra_options,
+ )
+ elif isinstance(matchExpr, FusionExpr):
+ builder = builder.fusion(
+ matchExpr.method, matchExpr.topn, matchExpr.fusion_params
+ )
+ if orderBy.fields:
+ builder.sort(order_by_expr_list)
+ builder.offset(offset).limit(limit)
+ kb_res = builder.to_pl()
+ df_list.append(kb_res)
+ self.connPool.release_conn(inf_conn)
+ res = pl.concat(df_list)
+ doc_store_logger.info("INFINITY search tables: " + str(table_list))
+ return res
+
+ def get(
+ self, chunkId: str, indexName: str, knowledgebaseIds: list[str]
+ ) -> dict | None:
+ inf_conn = self.connPool.get_conn()
+ db_instance = inf_conn.get_database(self.dbName)
+ df_list = list()
+ assert isinstance(knowledgebaseIds, list)
+ for knowledgebaseId in knowledgebaseIds:
+ table_name = f"{indexName}_{knowledgebaseId}"
+ table_instance = db_instance.get_table(table_name)
+ kb_res = table_instance.output(["*"]).filter(f"id = '{chunkId}'").to_pl()
+ df_list.append(kb_res)
+ self.connPool.release_conn(inf_conn)
+ res = pl.concat(df_list)
+ res_fields = self.getFields(res, res.columns)
+ return res_fields.get(chunkId, None)
+
+ def insert(
+ self, documents: list[dict], indexName: str, knowledgebaseId: str
+ ) -> list[str]:
+ inf_conn = self.connPool.get_conn()
+ db_instance = inf_conn.get_database(self.dbName)
+ table_name = f"{indexName}_{knowledgebaseId}"
+ try:
+ table_instance = db_instance.get_table(table_name)
+ except InfinityException as e:
+ # src/common/status.cppm, kTableNotExist = 3022
+ if e.error_code != 3022:
+ raise
+ vector_size = 0
+ patt = re.compile(r"q_(?P\d+)_vec")
+ for k in documents[0].keys():
+ m = patt.match(k)
+ if m:
+ vector_size = int(m.group("vector_size"))
+ break
+ if vector_size == 0:
+ raise ValueError("Cannot infer vector size from documents")
+ self.createIdx(indexName, knowledgebaseId, vector_size)
+ table_instance = db_instance.get_table(table_name)
+
+ for d in documents:
+ assert "_id" not in d
+ assert "id" in d
+ for k, v in d.items():
+ if k.endswith("_kwd") and isinstance(v, list):
+ d[k] = " ".join(v)
+ ids = [f"'{d["id"]}'" for d in documents]
+ str_ids = ", ".join(ids)
+ str_filter = f"id IN ({str_ids})"
+ table_instance.delete(str_filter)
+ # for doc in documents:
+ # doc_store_logger.info(f"insert position_list: {doc['position_list']}")
+ # doc_store_logger.info(f"InfinityConnection.insert {json.dumps(documents)}")
+ table_instance.insert(documents)
+ self.connPool.release_conn(inf_conn)
+ doc_store_logger.info(f"inserted into {table_name} {str_ids}.")
+ return []
+
+ def update(
+ self, condition: dict, newValue: dict, indexName: str, knowledgebaseId: str
+ ) -> bool:
+ # if 'position_list' in newValue:
+ # doc_store_logger.info(f"update position_list: {newValue['position_list']}")
+ inf_conn = self.connPool.get_conn()
+ db_instance = inf_conn.get_database(self.dbName)
+ table_name = f"{indexName}_{knowledgebaseId}"
+ table_instance = db_instance.get_table(table_name)
+ filter = equivalent_condition_to_str(condition)
+ for k, v in newValue.items():
+ if k.endswith("_kwd") and isinstance(v, list):
+ newValue[k] = " ".join(v)
+ table_instance.update(filter, newValue)
+ self.connPool.release_conn(inf_conn)
+ return True
+
+ def delete(self, condition: dict, indexName: str, knowledgebaseId: str) -> int:
+ inf_conn = self.connPool.get_conn()
+ db_instance = inf_conn.get_database(self.dbName)
+ table_name = f"{indexName}_{knowledgebaseId}"
+ filter = equivalent_condition_to_str(condition)
+ try:
+ table_instance = db_instance.get_table(table_name)
+ except Exception:
+ doc_store_logger.warning(
+ f"Skipped deleting `{filter}` from table {table_name} since the table doesn't exist."
+ )
+ return 0
+ res = table_instance.delete(filter)
+ self.connPool.release_conn(inf_conn)
+ return res.deleted_rows
+
+ """
+ Helper functions for search result
+ """
+
+ def getTotal(self, res):
+ return len(res)
+
+ def getChunkIds(self, res):
+ return list(res["id"])
+
+ def getFields(self, res, fields: List[str]) -> Dict[str, dict]:
+ res_fields = {}
+ if not fields:
+ return {}
+ num_rows = len(res)
+ column_id = res["id"]
+ for i in range(num_rows):
+ id = column_id[i]
+ m = {"id": id}
+ for fieldnm in fields:
+ if fieldnm not in res:
+ m[fieldnm] = None
+ continue
+ v = res[fieldnm][i]
+ if isinstance(v, Series):
+ v = list(v)
+ elif fieldnm == "important_kwd":
+ assert isinstance(v, str)
+ v = v.split(" ")
+ else:
+ if not isinstance(v, str):
+ v = str(v)
+ # if fieldnm.endswith("_tks"):
+ # v = rmSpace(v)
+ m[fieldnm] = v
+ res_fields[id] = m
+ return res_fields
+
+ def getHighlight(self, res, keywords: List[str], fieldnm: str):
+ ans = {}
+ num_rows = len(res)
+ column_id = res["id"]
+ for i in range(num_rows):
+ id = column_id[i]
+ txt = res[fieldnm][i]
+ txt = re.sub(r"[\r\n]", " ", txt, flags=re.IGNORECASE | re.MULTILINE)
+ txts = []
+ for t in re.split(r"[.?!;\n]", txt):
+ for w in keywords:
+ t = re.sub(
+ r"(^|[ .?/'\"\(\)!,:;-])(%s)([ .?/'\"\(\)!,:;-])"
+ % re.escape(w),
+ r"\1\2\3",
+ t,
+ flags=re.IGNORECASE | re.MULTILINE,
+ )
+ if not re.search(
+ r"[^<>]+", t, flags=re.IGNORECASE | re.MULTILINE
+ ):
+ continue
+ txts.append(t)
+ ans[id] = "...".join(txts)
+ return ans
+
+ def getAggregation(self, res, fieldnm: str):
+ """
+ TODO: Infinity doesn't provide aggregation
+ """
+ return list()
+
+ """
+ SQL
+ """
+
+ def sql(sql: str, fetch_size: int, format: str):
+ raise NotImplementedError("Not implemented")
diff --git a/sdk/python/ragflow_sdk/modules/document.py b/sdk/python/ragflow_sdk/modules/document.py
index d2363116882..052f9ccf1cd 100644
--- a/sdk/python/ragflow_sdk/modules/document.py
+++ b/sdk/python/ragflow_sdk/modules/document.py
@@ -50,8 +50,8 @@ def download(self):
return res.content
- def list_chunks(self,page=1, page_size=30, keywords="", id:str=None):
- data={"keywords": keywords,"page":page,"page_size":page_size,"id":id}
+ def list_chunks(self,page=1, page_size=30, keywords=""):
+ data={"keywords": keywords,"page":page,"page_size":page_size}
res = self.get(f'/datasets/{self.dataset_id}/documents/{self.id}/chunks', data)
res = res.json()
if res.get("code") == 0:
diff --git a/sdk/python/test/t_chunk.py b/sdk/python/test/t_chunk.py
index c2f83463339..ba7dd89ba26 100644
--- a/sdk/python/test/t_chunk.py
+++ b/sdk/python/test/t_chunk.py
@@ -126,6 +126,7 @@ def test_delete_chunk_with_success(get_api_key_fixture):
docs = ds.upload_documents(documents)
doc = docs[0]
chunk = doc.add_chunk(content="This is a chunk addition test")
+ sleep(5)
doc.delete_chunks([chunk.id])
@@ -146,6 +147,8 @@ def test_update_chunk_content(get_api_key_fixture):
docs = ds.upload_documents(documents)
doc = docs[0]
chunk = doc.add_chunk(content="This is a chunk addition test")
+ # For ElasticSearch, the chunk is not searchable in shot time (~2s).
+ sleep(3)
chunk.update({"content":"This is a updated content"})
def test_update_chunk_available(get_api_key_fixture):
@@ -165,7 +168,9 @@ def test_update_chunk_available(get_api_key_fixture):
docs = ds.upload_documents(documents)
doc = docs[0]
chunk = doc.add_chunk(content="This is a chunk addition test")
- chunk.update({"available":False})
+ # For ElasticSearch, the chunk is not searchable in shot time (~2s).
+ sleep(3)
+ chunk.update({"available":0})
def test_retrieve_chunks(get_api_key_fixture):