diff --git a/README.md b/README.md index 86c812faa3f..7da4bce00b1 100644 --- a/README.md +++ b/README.md @@ -285,7 +285,7 @@ docker build -f Dockerfile -t infiniflow/ragflow:dev . git clone https://github.com/infiniflow/ragflow.git cd ragflow/ export POETRY_VIRTUALENVS_CREATE=true POETRY_VIRTUALENVS_IN_PROJECT=true - ~/.local/bin/poetry install --sync --no-root # install RAGFlow dependent python modules + ~/.local/bin/poetry install --sync --no-root --with=full # install RAGFlow dependent python modules ``` 3. Launch the dependent services (MinIO, Elasticsearch, Redis, and MySQL) using Docker Compose: @@ -295,7 +295,7 @@ docker build -f Dockerfile -t infiniflow/ragflow:dev . Add the following line to `/etc/hosts` to resolve all hosts specified in **docker/service_conf.yaml** to `127.0.0.1`: ``` - 127.0.0.1 es01 mysql minio redis + 127.0.0.1 es01 infinity mysql minio redis ``` In **docker/service_conf.yaml**, update mysql port to `5455` and es port to `1200`, as specified in **docker/.env**. diff --git a/README_ja.md b/README_ja.md index a5c9e150272..6b31b76fe6f 100644 --- a/README_ja.md +++ b/README_ja.md @@ -249,7 +249,7 @@ docker build -f Dockerfile -t infiniflow/ragflow:dev . `/etc/hosts` に以下の行を追加して、**docker/service_conf.yaml** に指定されたすべてのホストを `127.0.0.1` に解決します: ``` - 127.0.0.1 es01 mysql minio redis + 127.0.0.1 es01 infinity mysql minio redis ``` **docker/service_conf.yaml** で mysql のポートを `5455` に、es のポートを `1200` に更新します(**docker/.env** に指定された通り). diff --git a/README_ko.md b/README_ko.md index 7d52669125f..c302fa9db1d 100644 --- a/README_ko.md +++ b/README_ko.md @@ -253,7 +253,7 @@ docker build -f Dockerfile -t infiniflow/ragflow:dev . `/etc/hosts` 에 다음 줄을 추가하여 **docker/service_conf.yaml** 에 지정된 모든 호스트를 `127.0.0.1` 로 해결합니다: ``` - 127.0.0.1 es01 mysql minio redis + 127.0.0.1 es01 infinity mysql minio redis ``` **docker/service_conf.yaml** 에서 mysql 포트를 `5455` 로, es 포트를 `1200` 으로 업데이트합니다( **docker/.env** 에 지정된 대로). diff --git a/README_zh.md b/README_zh.md index 2806d0b4ac4..99be96e4497 100644 --- a/README_zh.md +++ b/README_zh.md @@ -251,7 +251,7 @@ docker build -f Dockerfile -t infiniflow/ragflow:dev . 在 `/etc/hosts` 中添加以下代码,将 **docker/service_conf.yaml** 文件中的所有 host 地址都解析为 `127.0.0.1`: ``` - 127.0.0.1 es01 mysql minio redis + 127.0.0.1 es01 infinity mysql minio redis ``` 在文件 **docker/service_conf.yaml** 中,对照 **docker/.env** 的配置将 mysql 端口更新为 `5455`,es 端口更新为 `1200`。 diff --git a/api/apps/api_app.py b/api/apps/api_app.py index 61e5cc25b03..30d8defc7e9 100644 --- a/api/apps/api_app.py +++ b/api/apps/api_app.py @@ -528,8 +528,9 @@ def list_chunks(): return get_json_result( data=False, retmsg="Can't find doc_name or doc_id" ) + kb_ids = KnowledgebaseService.get_kb_ids(tenant_id) - res = retrievaler.chunk_list(doc_id=doc_id, tenant_id=tenant_id) + res = retrievaler.chunk_list(doc_id, tenant_id, kb_ids) res = [ { "content": res_item["content_with_weight"], @@ -840,6 +841,6 @@ def retrieval(): return get_json_result(data=ranks) except Exception as e: if str(e).find("not_found") > 0: - return get_json_result(data=False, retmsg=f'No chunk found! Check the chunk status please!', + return get_json_result(data=False, retmsg='No chunk found! Check the chunk status please!', retcode=RetCode.DATA_ERROR) return server_error_response(e) diff --git a/api/apps/chunk_app.py b/api/apps/chunk_app.py index f79a3673eed..acee2c61901 100644 --- a/api/apps/chunk_app.py +++ b/api/apps/chunk_app.py @@ -15,16 +15,13 @@ # import datetime import json -import traceback from flask import request from flask_login import login_required, current_user -from elasticsearch_dsl import Q from api.db.services.dialog_service import keyword_extraction from rag.app.qa import rmPrefix, beAdoc from rag.nlp import search, rag_tokenizer -from rag.utils.es_conn import ELASTICSEARCH from rag.utils import rmSpace from api.db import LLMType, ParserType from api.db.services.knowledgebase_service import KnowledgebaseService @@ -32,7 +29,7 @@ from api.db.services.user_service import UserTenantService from api.utils.api_utils import server_error_response, get_data_error_result, validate_request from api.db.services.document_service import DocumentService -from api.settings import RetCode, retrievaler, kg_retrievaler +from api.settings import RetCode, retrievaler, kg_retrievaler, docStoreConn from api.utils.api_utils import get_json_result import hashlib import re @@ -72,19 +69,15 @@ def list_chunk(): "important_kwd": sres.field[id].get("important_kwd", []), "img_id": sres.field[id].get("img_id", ""), "available_int": sres.field[id].get("available_int", 1), - "positions": sres.field[id].get("position_int", "").split("\t") + "positions": json.loads(sres.field[id].get("position_list", "[]")), } - if len(d["positions"]) % 5 == 0: - poss = [] - for i in range(0, len(d["positions"]), 5): - poss.append([float(d["positions"][i]), float(d["positions"][i + 1]), float(d["positions"][i + 2]), - float(d["positions"][i + 3]), float(d["positions"][i + 4])]) - d["positions"] = poss + assert isinstance(d["positions"], list) + assert len(d["positions"])==0 or (isinstance(d["positions"][0], list) and len(d["positions"][0]) == 5) res["chunks"].append(d) return get_json_result(data=res) except Exception as e: if str(e).find("not_found") > 0: - return get_json_result(data=False, retmsg=f'No chunk found!', + return get_json_result(data=False, retmsg='No chunk found!', retcode=RetCode.DATA_ERROR) return server_error_response(e) @@ -97,9 +90,11 @@ def get(): tenants = UserTenantService.query(user_id=current_user.id) if not tenants: return get_data_error_result(retmsg="Tenant not found!") - res = ELASTICSEARCH.get( + + kb_ids = KnowledgebaseService.get_kb_ids(tenant_id) + res = docStoreConn.get( chunk_id, search.index_name( - tenants[0].tenant_id)) + tenants[0].tenant_id), kb_ids) if not res.get("found"): return server_error_response("Chunk not found") id = res["_id"] @@ -115,7 +110,7 @@ def get(): return get_json_result(data=res) except Exception as e: if str(e).find("NotFoundError") >= 0: - return get_json_result(data=False, retmsg=f'Chunk not found!', + return get_json_result(data=False, retmsg='Chunk not found!', retcode=RetCode.DATA_ERROR) return server_error_response(e) @@ -163,7 +158,7 @@ def set(): v, c = embd_mdl.encode([doc.name, req["content_with_weight"]]) v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1] d["q_%d_vec" % len(v)] = v.tolist() - ELASTICSEARCH.upsert([d], search.index_name(tenant_id)) + docStoreConn.upsert([d], search.index_name(tenant_id), doc.kb_id) return get_json_result(data=True) except Exception as e: return server_error_response(e) @@ -175,11 +170,11 @@ def set(): def switch(): req = request.json try: - tenant_id = DocumentService.get_tenant_id(req["doc_id"]) - if not tenant_id: - return get_data_error_result(retmsg="Tenant not found!") - if not ELASTICSEARCH.upsert([{"id": i, "available_int": int(req["available_int"])} for i in req["chunk_ids"]], - search.index_name(tenant_id)): + e, doc = DocumentService.get_by_id(req["doc_id"]) + if not e: + return get_data_error_result(retmsg="Document not found!") + if not docStoreConn.upsert([{"id": i, "available_int": int(req["available_int"])} for i in req["chunk_ids"]], + search.index_name(doc.tenant_id), doc.kb_id): return get_data_error_result(retmsg="Index updating failure") return get_json_result(data=True) except Exception as e: @@ -192,12 +187,11 @@ def switch(): def rm(): req = request.json try: - if not ELASTICSEARCH.deleteByQuery( - Q("ids", values=req["chunk_ids"]), search.index_name(current_user.id)): - return get_data_error_result(retmsg="Index updating failure") e, doc = DocumentService.get_by_id(req["doc_id"]) if not e: return get_data_error_result(retmsg="Document not found!") + if not docStoreConn.delete({"_id": req["chunk_ids"]}, search.index_name(current_user.id), doc.kb_id): + return get_data_error_result(retmsg="Index updating failure") deleted_chunk_ids = req["chunk_ids"] chunk_number = len(deleted_chunk_ids) DocumentService.decrement_chunk_num(doc.id, doc.kb_id, 1, chunk_number, 0) @@ -240,7 +234,7 @@ def create(): v, c = embd_mdl.encode([doc.name, req["content_with_weight"]]) v = 0.1 * v[0] + 0.9 * v[1] d["q_%d_vec" % len(v)] = v.tolist() - ELASTICSEARCH.upsert([d], search.index_name(tenant_id)) + docStoreConn.upsert([d], search.index_name(tenant_id), doc.kb_id) DocumentService.increment_chunk_num( doc.id, doc.kb_id, c, 1, 0) @@ -257,8 +251,9 @@ def retrieval_test(): page = int(req.get("page", 1)) size = int(req.get("size", 30)) question = req["question"] - kb_id = req["kb_id"] - if isinstance(kb_id, str): kb_id = [kb_id] + kb_ids = req["kb_id"] + if isinstance(kb_ids, str): + kb_ids = [kb_ids] doc_ids = req.get("doc_ids", []) similarity_threshold = float(req.get("similarity_threshold", 0.0)) vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3)) @@ -266,17 +261,17 @@ def retrieval_test(): try: tenants = UserTenantService.query(user_id=current_user.id) - for kid in kb_id: + for kb_id in kb_ids: for tenant in tenants: if KnowledgebaseService.query( - tenant_id=tenant.tenant_id, id=kid): + tenant_id=tenant.tenant_id, id=kb_id): break else: return get_json_result( - data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.', + data=False, retmsg='Only owner of knowledgebase authorized for this operation.', retcode=RetCode.OPERATING_ERROR) - e, kb = KnowledgebaseService.get_by_id(kb_id[0]) + e, kb = KnowledgebaseService.get_by_id(kb_ids[0]) if not e: return get_data_error_result(retmsg="Knowledgebase not found!") @@ -291,7 +286,7 @@ def retrieval_test(): question += keyword_extraction(chat_mdl, question) retr = retrievaler if kb.parser_id != ParserType.KG else kg_retrievaler - ranks = retr.retrieval(question, embd_mdl, kb.tenant_id, kb_id, page, size, + ranks = retr.retrieval(question, embd_mdl, kb.tenant_id, kb_ids, page, size, similarity_threshold, vector_similarity_weight, top, doc_ids, rerank_mdl=rerank_mdl, highlight=req.get("highlight")) for c in ranks["chunks"]: @@ -301,7 +296,7 @@ def retrieval_test(): return get_json_result(data=ranks) except Exception as e: if str(e).find("not_found") > 0: - return get_json_result(data=False, retmsg=f'No chunk found! Check the chunk status please!', + return get_json_result(data=False, retmsg='No chunk found! Check the chunk status please!', retcode=RetCode.DATA_ERROR) return server_error_response(e) @@ -315,13 +310,16 @@ def knowledge_graph(): "knowledge_graph_kwd": ["graph", "mind_map"] } tenant_id = DocumentService.get_tenant_id(doc_id) - sres = retrievaler.search(req, search.index_name(tenant_id)) + e, doc = DocumentService.get_by_id(req["doc_id"]) + if not e: + return get_data_error_result(retmsg="Document not found!") + sres = retrievaler.search(req, search.index_name(tenant_id), doc.kb_id) obj = {"graph": {}, "mind_map": {}} for id in sres.ids[:2]: ty = sres.field[id]["knowledge_graph_kwd"] try: content_json = json.loads(sres.field[id]["content_with_weight"]) - except Exception as e: + except Exception: continue if ty == 'mind_map': diff --git a/api/apps/document_app.py b/api/apps/document_app.py index 0e4af144786..a41e7129a66 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -17,7 +17,6 @@ import re import flask -from elasticsearch_dsl import Q from flask import request from flask_login import login_required, current_user @@ -27,14 +26,13 @@ from api.db.services.task_service import TaskService, queue_tasks from api.db.services.user_service import UserTenantService from rag.nlp import search -from rag.utils.es_conn import ELASTICSEARCH from api.db.services import duplicate_name from api.db.services.knowledgebase_service import KnowledgebaseService from api.utils.api_utils import server_error_response, get_data_error_result, validate_request from api.utils import get_uuid from api.db import FileType, TaskStatus, ParserType, FileSource from api.db.services.document_service import DocumentService, doc_upload_and_parse -from api.settings import RetCode +from api.settings import RetCode, docStoreConn from api.utils.api_utils import get_json_result from rag.utils.storage_factory import STORAGE_IMPL from api.utils.file_utils import filename_type, thumbnail @@ -187,7 +185,7 @@ def list_docs(): break else: return get_json_result( - data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.', + data=False, retmsg='Only owner of knowledgebase authorized for this operation.', retcode=RetCode.OPERATING_ERROR) keywords = request.args.get("keywords", "") @@ -275,18 +273,8 @@ def change_status(): return get_data_error_result( retmsg="Database error (Document update)!") - if str(req["status"]) == "0": - ELASTICSEARCH.updateScriptByQuery(Q("term", doc_id=req["doc_id"]), - scripts="ctx._source.available_int=0;", - idxnm=search.index_name( - kb.tenant_id) - ) - else: - ELASTICSEARCH.updateScriptByQuery(Q("term", doc_id=req["doc_id"]), - scripts="ctx._source.available_int=1;", - idxnm=search.index_name( - kb.tenant_id) - ) + status = int(req["status"]) + docStoreConn.update({"doc_id": req["doc_id"]}, {"available_int": status}, search.index_name(kb.tenant_id), doc.kb_id) return get_json_result(data=True) except Exception as e: return server_error_response(e) @@ -365,8 +353,11 @@ def run(): tenant_id = DocumentService.get_tenant_id(id) if not tenant_id: return get_data_error_result(retmsg="Tenant not found!") - ELASTICSEARCH.deleteByQuery( - Q("match", doc_id=id), idxnm=search.index_name(tenant_id)) + e, doc = DocumentService.get_by_id(id) + if not e: + return get_data_error_result(retmsg="Document not found!") + if docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id): + docStoreConn.delete({"doc_id": id}, search.index_name(tenant_id), doc.kb_id) if str(req["run"]) == TaskStatus.RUNNING.value: TaskService.filter_delete([Task.doc_id == id]) @@ -490,19 +481,19 @@ def change_parser(): tenant_id = DocumentService.get_tenant_id(req["doc_id"]) if not tenant_id: return get_data_error_result(retmsg="Tenant not found!") - ELASTICSEARCH.deleteByQuery( - Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id)) + if docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id): + docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id) return get_json_result(data=True) except Exception as e: return server_error_response(e) -@manager.route('/image/', methods=['GET']) +@manager.route('/image/', methods=['GET']) # @login_required -def get_image(image_id): +def get_image(img_id): try: - bkt, nm = image_id.split("-") + bkt, nm = img_id.split("-") response = flask.make_response(STORAGE_IMPL.get(bkt, nm)) response.headers.set('Content-Type', 'image/JPEG') return response diff --git a/api/apps/file2document_app.py b/api/apps/file2document_app.py index 1e4b2c9ad3f..173e7209fe3 100644 --- a/api/apps/file2document_app.py +++ b/api/apps/file2document_app.py @@ -13,9 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License # -from elasticsearch_dsl import Q -from api.db.db_models import File2Document from api.db.services.file2document_service import File2DocumentService from api.db.services.file_service import FileService @@ -28,8 +26,6 @@ from api.db.services.document_service import DocumentService from api.settings import RetCode from api.utils.api_utils import get_json_result -from rag.nlp import search -from rag.utils.es_conn import ELASTICSEARCH @manager.route('/convert', methods=['POST']) diff --git a/api/apps/file_app.py b/api/apps/file_app.py index 4f4b44af98f..c34cff51cf7 100644 --- a/api/apps/file_app.py +++ b/api/apps/file_app.py @@ -18,7 +18,6 @@ import re import flask -from elasticsearch_dsl import Q from flask import request from flask_login import login_required, current_user @@ -32,8 +31,6 @@ from api.settings import RetCode from api.utils.api_utils import get_json_result from api.utils.file_utils import filename_type -from rag.nlp import search -from rag.utils.es_conn import ELASTICSEARCH from rag.utils.storage_factory import STORAGE_IMPL diff --git a/api/apps/kb_app.py b/api/apps/kb_app.py index 551e7867ed6..ffa6cbb1db4 100644 --- a/api/apps/kb_app.py +++ b/api/apps/kb_app.py @@ -72,7 +72,7 @@ def update(): if not KnowledgebaseService.query( created_by=current_user.id, id=req["kb_id"]): return get_json_result( - data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.', retcode=RetCode.OPERATING_ERROR) + data=False, retmsg='Only owner of knowledgebase authorized for this operation.', retcode=RetCode.OPERATING_ERROR) e, kb = KnowledgebaseService.get_by_id(req["kb_id"]) if not e: @@ -110,7 +110,7 @@ def detail(): break else: return get_json_result( - data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.', + data=False, retmsg='Only owner of knowledgebase authorized for this operation.', retcode=RetCode.OPERATING_ERROR) kb = KnowledgebaseService.get_detail(kb_id) if not kb: @@ -153,7 +153,7 @@ def rm(): created_by=current_user.id, id=req["kb_id"]) if not kbs: return get_json_result( - data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.', retcode=RetCode.OPERATING_ERROR) + data=False, retmsg='Only owner of knowledgebase authorized for this operation.', retcode=RetCode.OPERATING_ERROR) for doc in DocumentService.query(kb_id=req["kb_id"]): if not DocumentService.remove_document(doc, kbs[0].tenant_id): diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index 41a690855d8..8c5f7bd2bb3 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -16,35 +16,38 @@ import pathlib import datetime +from flask import request + from api.db.services.dialog_service import keyword_extraction from rag.app.qa import rmPrefix, beAdoc -from rag.nlp import rag_tokenizer +from rag.nlp import search, rag_tokenizer +from rag.utils import rmSpace from api.db import LLMType, ParserType from api.db.services.llm_service import TenantLLMService -from api.settings import kg_retrievaler +from api.utils.api_utils import server_error_response, get_error_data_result +from api.db.services.document_service import DocumentService +from api.settings import RetCode, retrievaler, kg_retrievaler, docStoreConn +from api.utils.api_utils import get_result import hashlib -import re from api.utils.api_utils import token_required -from api.db.db_models import Task + +from api.db.db_models import Task, File + from api.db.services.task_service import TaskService, queue_tasks -from api.utils.api_utils import server_error_response -from api.utils.api_utils import get_result, get_error_data_result + + + from io import BytesIO -from elasticsearch_dsl import Q -from flask import request, send_file + +from flask import send_file + from api.db import FileSource, TaskStatus, FileType -from api.db.db_models import File -from api.db.services.document_service import DocumentService from api.db.services.file2document_service import File2DocumentService from api.db.services.file_service import FileService from api.db.services.knowledgebase_service import KnowledgebaseService -from api.settings import RetCode, retrievaler from api.utils.api_utils import construct_json_result,get_parser_config -from rag.nlp import search -from rag.utils import rmSpace -from rag.utils.es_conn import ELASTICSEARCH from rag.utils.storage_factory import STORAGE_IMPL -import os +import json MAXIMUM_OF_UPLOADING_FILES = 256 @@ -159,8 +162,7 @@ def update_doc(tenant_id, dataset_id, document_id): doc.process_duation * -1) if not e: return get_error_data_result(retmsg="Document not found!") - ELASTICSEARCH.deleteByQuery( - Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id)) + docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), dataset_id) return get_result() @@ -299,8 +301,7 @@ def parse(tenant_id,dataset_id): info["chunk_num"] = 0 info["token_num"] = 0 DocumentService.update_by_id(id, info) - ELASTICSEARCH.deleteByQuery( - Q("match", doc_id=id), idxnm=search.index_name(tenant_id)) + docStoreConn.delete({"doc_id": id}, search.index_name(tenant_id), dataset_id) TaskService.filter_delete([Task.doc_id == id]) e, doc = DocumentService.get_by_id(id) doc = doc.to_dict() @@ -325,8 +326,7 @@ def stop_parsing(tenant_id,dataset_id): return get_error_data_result("Can't stop parsing document with progress at 0 or 100") info = {"run": "2", "progress": 0,"chunk_num":0} DocumentService.update_by_id(id, info) - ELASTICSEARCH.deleteByQuery( - Q("match", doc_id=id), idxnm=search.index_name(tenant_id)) + docStoreConn.delete({"doc_id": id}, search.index_name(tenant_id), dataset_id) return get_result() @@ -347,7 +347,7 @@ def list_chunks(tenant_id,dataset_id,document_id): query = { "doc_ids": [doc_id], "page": page, "size": size, "question": question, "sort": True } - sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True) + sres = retrievaler.search(query, search.index_name(tenant_id), dataset_id, highlight=True) key_mapping = { "chunk_num": "chunk_count", "kb_id": "dataset_id", @@ -382,14 +382,10 @@ def list_chunks(tenant_id,dataset_id,document_id): "important_kwd": sres.field[id].get("important_kwd", []), "img_id": sres.field[id].get("img_id", ""), "available_int": sres.field[id].get("available_int", 1), - "positions": sres.field[id].get("position_int", "").split("\t") + "positions": json.loads(sres.field[id].get("position_list", "[]")), } - if len(d["positions"]) % 5 == 0: - poss = [] - for i in range(0, len(d["positions"]), 5): - poss.append([float(d["positions"][i]), float(d["positions"][i + 1]), float(d["positions"][i + 2]), - float(d["positions"][i + 3]), float(d["positions"][i + 4])]) - d["positions"] = poss + assert isinstance(d["positions"], list) + assert len(d["positions"])==0 or (isinstance(d["positions"][0], list) and len(d["positions"][0]) == 5) origin_chunks.append(d) if req.get("id"): @@ -407,7 +403,7 @@ def list_chunks(tenant_id,dataset_id,document_id): "content_with_weight": "content", "doc_id": "document_id", "important_kwd": "important_keywords", - "img_id": "image_id", + "img_id": "img_id" "available_int":"available" } renamed_chunk = {} @@ -459,7 +455,7 @@ def add_chunk(tenant_id,dataset_id,document_id): v, c = embd_mdl.encode([doc.name, req["content"]]) v = 0.1 * v[0] + 0.9 * v[1] d["q_%d_vec" % len(v)] = v.tolist() - ELASTICSEARCH.upsert([d], search.index_name(tenant_id)) + docStoreConn.upsert([d], search.index_name(tenant_id), dataset_id) DocumentService.increment_chunk_num( doc.id, doc.kb_id, c, 1, 0) @@ -508,8 +504,7 @@ def rm_chunk(tenant_id,dataset_id,document_id): for chunk_id in chunk_list: if chunk_id not in sres.ids: return get_error_data_result(f"Chunk {chunk_id} not found") - if not ELASTICSEARCH.deleteByQuery( - Q("ids", values=chunk_list), search.index_name(tenant_id)): + if not docStoreConn.delete({"_id": req["chunk_ids"]}, search.index_name(tenant_id), dataset_id): return get_error_data_result(retmsg="Index updating failure") deleted_chunk_ids = chunk_list chunk_number = len(deleted_chunk_ids) @@ -522,10 +517,8 @@ def rm_chunk(tenant_id,dataset_id,document_id): @token_required def update_chunk(tenant_id,dataset_id,document_id,chunk_id): try: - res = ELASTICSEARCH.get( - chunk_id, search.index_name( - tenant_id)) - except Exception as e: + res = docStoreConn.get(chunk_id, search.index_name(tenant_id), dataset_id) + except Exception: return get_error_data_result(f"Can't find this chunk {chunk_id}") if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id): return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.") @@ -571,7 +564,7 @@ def update_chunk(tenant_id,dataset_id,document_id,chunk_id): v, c = embd_mdl.encode([doc.name, d["content_with_weight"]]) v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1] d["q_%d_vec" % len(v)] = v.tolist() - ELASTICSEARCH.upsert([d], search.index_name(tenant_id)) + docStoreConn.upsert([d], search.index_name(tenant_id), dataset_id) return get_result() @@ -656,6 +649,6 @@ def retrieval_test(tenant_id): return get_result(data=ranks) except Exception as e: if str(e).find("not_found") > 0: - return get_result(retmsg=f'No chunk found! Check the chunk status please!', + return get_result(retmsg='No chunk found! Check the chunk status please!', retcode=RetCode.DATA_ERROR) return server_error_response(e) \ No newline at end of file diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py index 1f44345259e..e670d17de50 100644 --- a/api/apps/sdk/session.py +++ b/api/apps/sdk/session.py @@ -198,7 +198,7 @@ def list(chat_id,tenant_id): "document_id": chunk["doc_id"], "document_name": chunk["docnm_kwd"], "dataset_id": chunk["kb_id"], - "image_id": chunk["img_id"], + "img_id": chunk["img_id"], "similarity": chunk["similarity"], "vector_similarity": chunk["vector_similarity"], "term_similarity": chunk["term_similarity"], diff --git a/api/apps/system_app.py b/api/apps/system_app.py index 28df3d688d6..337a747abb3 100644 --- a/api/apps/system_app.py +++ b/api/apps/system_app.py @@ -22,12 +22,11 @@ from api.db.services.api_service import APITokenService from api.db.services.knowledgebase_service import KnowledgebaseService from api.db.services.user_service import UserTenantService -from api.settings import DATABASE_TYPE +from api.settings import DATABASE_TYPE, docStoreConn from api.utils import current_timestamp, datetime_format from api.utils.api_utils import get_json_result, get_data_error_result, server_error_response, \ - generate_confirmation_token, request, validate_request + generate_confirmation_token from api.versions import get_rag_version -from rag.utils.es_conn import ELASTICSEARCH from rag.utils.storage_factory import STORAGE_IMPL, STORAGE_IMPL_TYPE from timeit import default_timer as timer @@ -46,10 +45,10 @@ def status(): res = {} st = timer() try: - res["es"] = ELASTICSEARCH.health() - res["es"]["elapsed"] = "{:.1f}".format((timer() - st)*1000.) + res["doc_store"] = docStoreConn.health() + res["doc_store"]["elapsed"] = "{:.1f}".format((timer() - st)*1000.) except Exception as e: - res["es"] = {"status": "red", "elapsed": "{:.1f}".format((timer() - st)*1000.), "error": str(e)} + res["doc_store"] = {"status": "red", "elapsed": "{:.1f}".format((timer() - st)*1000.), "error": str(e)} st = timer() try: diff --git a/api/db/db_models.py b/api/db/db_models.py index fbf0f3cde10..cd1881af323 100644 --- a/api/db/db_models.py +++ b/api/db/db_models.py @@ -470,7 +470,7 @@ class User(DataBaseModel, UserMixin): status = CharField( max_length=1, null=True, - help_text="is it validate(0: wasted,1: validate)", + help_text="is it validate(0: wasted, 1: validate)", default="1", index=True) is_superuser = BooleanField(null=True, help_text="is root", default=False, index=True) @@ -525,7 +525,7 @@ class Tenant(DataBaseModel): status = CharField( max_length=1, null=True, - help_text="is it validate(0: wasted,1: validate)", + help_text="is it validate(0: wasted, 1: validate)", default="1", index=True) @@ -542,7 +542,7 @@ class UserTenant(DataBaseModel): status = CharField( max_length=1, null=True, - help_text="is it validate(0: wasted,1: validate)", + help_text="is it validate(0: wasted, 1: validate)", default="1", index=True) @@ -559,7 +559,7 @@ class InvitationCode(DataBaseModel): status = CharField( max_length=1, null=True, - help_text="is it validate(0: wasted,1: validate)", + help_text="is it validate(0: wasted, 1: validate)", default="1", index=True) @@ -582,7 +582,7 @@ class LLMFactories(DataBaseModel): status = CharField( max_length=1, null=True, - help_text="is it validate(0: wasted,1: validate)", + help_text="is it validate(0: wasted, 1: validate)", default="1", index=True) @@ -616,7 +616,7 @@ class LLM(DataBaseModel): status = CharField( max_length=1, null=True, - help_text="is it validate(0: wasted,1: validate)", + help_text="is it validate(0: wasted, 1: validate)", default="1", index=True) @@ -703,7 +703,7 @@ class Knowledgebase(DataBaseModel): status = CharField( max_length=1, null=True, - help_text="is it validate(0: wasted,1: validate)", + help_text="is it validate(0: wasted, 1: validate)", default="1", index=True) @@ -767,7 +767,7 @@ class Document(DataBaseModel): status = CharField( max_length=1, null=True, - help_text="is it validate(0: wasted,1: validate)", + help_text="is it validate(0: wasted, 1: validate)", default="1", index=True) @@ -904,7 +904,7 @@ class Dialog(DataBaseModel): status = CharField( max_length=1, null=True, - help_text="is it validate(0: wasted,1: validate)", + help_text="is it validate(0: wasted, 1: validate)", default="1", index=True) @@ -987,7 +987,7 @@ def migrate_db(): help_text="where dose this document come from", index=True)) ) - except Exception as e: + except Exception: pass try: migrate( @@ -996,7 +996,7 @@ def migrate_db(): help_text="default rerank model ID")) ) - except Exception as e: + except Exception: pass try: migrate( @@ -1004,59 +1004,59 @@ def migrate_db(): help_text="default rerank model ID")) ) - except Exception as e: + except Exception: pass try: migrate( migrator.add_column('dialog', 'top_k', IntegerField(default=1024)) ) - except Exception as e: + except Exception: pass try: migrate( migrator.alter_column_type('tenant_llm', 'api_key', CharField(max_length=1024, null=True, help_text="API KEY", index=True)) ) - except Exception as e: + except Exception: pass try: migrate( migrator.add_column('api_token', 'source', CharField(max_length=16, null=True, help_text="none|agent|dialog", index=True)) ) - except Exception as e: + except Exception: pass try: migrate( migrator.add_column("tenant","tts_id", CharField(max_length=256,null=True,help_text="default tts model ID",index=True)) ) - except Exception as e: + except Exception: pass try: migrate( migrator.add_column('api_4_conversation', 'source', CharField(max_length=16, null=True, help_text="none|agent|dialog", index=True)) ) - except Exception as e: + except Exception: pass try: DB.execute_sql('ALTER TABLE llm DROP PRIMARY KEY;') DB.execute_sql('ALTER TABLE llm ADD PRIMARY KEY (llm_name,fid);') - except Exception as e: + except Exception: pass try: migrate( migrator.add_column('task', 'retry_count', IntegerField(default=0)) ) - except Exception as e: + except Exception: pass try: migrate( migrator.alter_column_type('api_token', 'dialog_id', CharField(max_length=32, null=True, index=True)) ) - except Exception as e: + except Exception: pass diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py index 5e1f5b0e0fa..d70acf98fb7 100644 --- a/api/db/services/document_service.py +++ b/api/db/services/document_service.py @@ -15,7 +15,6 @@ # import hashlib import json -import os import random import re import traceback @@ -24,16 +23,13 @@ from datetime import datetime from io import BytesIO -from elasticsearch_dsl import Q from peewee import fn from api.db.db_utils import bulk_insert_into_db -from api.settings import stat_logger +from api.settings import stat_logger, docStoreConn from api.utils import current_timestamp, get_format_time, get_uuid -from api.utils.file_utils import get_project_base_directory from graphrag.mind_map_extractor import MindMapExtractor from rag.settings import SVR_QUEUE_NAME -from rag.utils.es_conn import ELASTICSEARCH from rag.utils.storage_factory import STORAGE_IMPL from rag.nlp import search, rag_tokenizer @@ -138,8 +134,7 @@ def insert(cls, doc): @classmethod @DB.connection_context() def remove_document(cls, doc, tenant_id): - ELASTICSEARCH.deleteByQuery( - Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id)) + docStoreConn.delete({"doc_id": doc.id}, search.index_name(tenant_id), doc.kb_id) cls.clear_chunk_num(doc.id) return cls.delete_by_id(doc.id) @@ -250,6 +245,15 @@ def get_tenant_id(cls, doc_id): return return docs[0]["tenant_id"] + @classmethod + @DB.connection_context() + def get_knowledgebase_id(cls, doc_id): + docs = cls.model.select(cls.model.kb_id).where(cls.model.id == doc_id) + docs = docs.dicts() + if not docs: + return + return docs[0]["kb_id"] + @classmethod @DB.connection_context() def get_tenant_id_by_name(cls, name): @@ -421,7 +425,7 @@ def do_cancel(cls, doc_id): try: _, doc = DocumentService.get_by_id(doc_id) return doc.run == TaskStatus.CANCEL.value or doc.progress < 0 - except Exception as e: + except Exception: pass return False @@ -462,11 +466,6 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id): if not e: raise LookupError("Can't find this knowledgebase!") - idxnm = search.index_name(kb.tenant_id) - if not ELASTICSEARCH.indexExist(idxnm): - ELASTICSEARCH.createIdx(idxnm, json.load( - open(os.path.join(get_project_base_directory(), "conf", "mapping.json"), "r"))) - embd_mdl = LLMBundle(kb.tenant_id, LLMType.EMBEDDING, llm_name=kb.embd_id, lang=kb.language) err, files = FileService.upload_document(kb, file_objs, user_id) @@ -544,6 +543,9 @@ def embedding(doc_id, cnts, batch_size=16): token_counts[doc_id] += c return vects + idxnm = search.index_name(kb.tenant_id) + try_create_idx = True + _, tenant = TenantService.get_by_id(kb.tenant_id) llm_bdl = LLMBundle(kb.tenant_id, LLMType.CHAT, tenant.llm_id) for doc_id in docids: @@ -574,7 +576,11 @@ def embedding(doc_id, cnts, batch_size=16): v = vects[i] d["q_%d_vec" % len(v)] = v for b in range(0, len(cks), es_bulk_size): - ELASTICSEARCH.bulk(cks[b:b + es_bulk_size], idxnm) + if try_create_idx: + if not docStoreConn.indexExist(idxnm, kb_id): + docStoreConn.createIdx(idxnm, kb_id, len(vects[0])) + try_create_idx = False + docStoreConn.insert(cks[b:b + es_bulk_size], idxnm, kb_id) DocumentService.increment_chunk_num( doc_id, kb.id, token_counts[doc_id], chunk_counts[doc_id], 0) diff --git a/api/db/services/knowledgebase_service.py b/api/db/services/knowledgebase_service.py index 2baba4eaf4e..88d5d653e3c 100644 --- a/api/db/services/knowledgebase_service.py +++ b/api/db/services/knowledgebase_service.py @@ -66,6 +66,16 @@ def get_by_tenant_ids(cls, joined_tenant_ids, user_id, return list(kbs.dicts()) + @classmethod + @DB.connection_context() + def get_kb_ids(cls, tenant_id): + fields = [ + cls.model.id, + ] + kbs = cls.model.select(*fields).where(cls.model.tenant_id == tenant_id) + kb_ids = [kb["id"] for kb in kbs] + return kb_ids + @classmethod @DB.connection_context() def get_by_tenant_ids_by_offset(cls, joined_tenant_ids, user_id, offset, count, orderby, desc): diff --git a/api/settings.py b/api/settings.py index f48a5fe7a59..4e2010162db 100644 --- a/api/settings.py +++ b/api/settings.py @@ -18,6 +18,8 @@ from enum import IntEnum, Enum from api.utils.file_utils import get_project_base_directory from api.utils.log_utils import LoggerFactory, getLogger +import rag.utils.es_conn +import rag.utils.infinity_conn # Logger LoggerFactory.set_directory( @@ -33,7 +35,7 @@ database_logger = getLogger("database") chat_logger = getLogger("chat") -from rag.utils.es_conn import ELASTICSEARCH +import rag.utils from rag.nlp import search from graphrag import search as kg_search from api.utils import get_base_config, decrypt_database_config @@ -205,8 +207,10 @@ PRIVILEGE_COMMAND_WHITELIST = [] CHECK_NODES_IDENTITY = False -retrievaler = search.Dealer(ELASTICSEARCH) -kg_retrievaler = kg_search.KGSearch(ELASTICSEARCH) +docStoreConn = rag.utils.es_conn.ESConnection() +#docStoreConn = rag.utils.infinity_conn.InfinityConnection() +retrievaler = search.Dealer(docStoreConn) +kg_retrievaler = kg_search.KGSearch(docStoreConn) class CustomEnum(Enum): diff --git a/conf/mapping.json b/conf/mapping.json index c8831346aaf..b4331f2be19 100644 --- a/conf/mapping.json +++ b/conf/mapping.json @@ -1,200 +1,203 @@ - { +{ "settings": { "index": { "number_of_shards": 2, "number_of_replicas": 0, - "refresh_interval" : "1000ms" + "refresh_interval": "1000ms" }, "similarity": { - "scripted_sim": { - "type": "scripted", - "script": { - "source": "double idf = Math.log(1+(field.docCount-term.docFreq+0.5)/(term.docFreq + 0.5))/Math.log(1+((field.docCount-0.5)/1.5)); return query.boost * idf * Math.min(doc.freq, 1);" - } + "scripted_sim": { + "type": "scripted", + "script": { + "source": "double idf = Math.log(1+(field.docCount-term.docFreq+0.5)/(term.docFreq + 0.5))/Math.log(1+((field.docCount-0.5)/1.5)); return query.boost * idf * Math.min(doc.freq, 1);" } + } } }, "mappings": { - "properties": { - "lat_lon": {"type": "geo_point", "store":"true"} - }, - "date_detection": "true", - "dynamic_templates": [ - { - "int": { - "match": "*_int", - "mapping": { - "type": "integer", - "store": "true" - } - } - }, - { - "ulong": { - "match": "*_ulong", - "mapping": { - "type": "unsigned_long", - "store": "true" - } - } - }, - { - "long": { - "match": "*_long", - "mapping": { - "type": "long", - "store": "true" - } - } - }, - { - "short": { - "match": "*_short", - "mapping": { - "type": "short", - "store": "true" - } - } - }, - { - "numeric": { - "match": "*_flt", - "mapping": { - "type": "float", - "store": true - } - } - }, - { - "tks": { - "match": "*_tks", - "mapping": { - "type": "text", - "similarity": "scripted_sim", - "analyzer": "whitespace", - "store": true - } - } - }, - { - "ltks":{ - "match": "*_ltks", - "mapping": { - "type": "text", - "analyzer": "whitespace", - "store": true - } - } - }, - { - "kwd": { - "match_pattern": "regex", - "match": "^(.*_(kwd|id|ids|uid|uids)|uid)$", - "mapping": { - "type": "keyword", - "similarity": "boolean", - "store": true - } - } - }, - { - "dt": { - "match_pattern": "regex", - "match": "^.*(_dt|_time|_at)$", - "mapping": { - "type": "date", - "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM-dd_HH:mm:ss", - "store": true - } - } - }, - { - "nested": { - "match": "*_nst", - "mapping": { - "type": "nested" - } - } - }, - { - "object": { - "match": "*_obj", - "mapping": { - "type": "object", - "dynamic": "true" - } - } - }, - { - "string": { - "match": "*_with_weight", - "mapping": { - "type": "text", - "index": "false", - "store": true - } - } - }, - { - "string": { - "match": "*_fea", - "mapping": { - "type": "rank_feature" - } - } - }, - { - "dense_vector": { - "match": "*_512_vec", - "mapping": { - "type": "dense_vector", - "index": true, - "similarity": "cosine", - "dims": 512 - } - } - }, - { - "dense_vector": { - "match": "*_768_vec", - "mapping": { - "type": "dense_vector", - "index": true, - "similarity": "cosine", - "dims": 768 - } - } - }, - { - "dense_vector": { - "match": "*_1024_vec", - "mapping": { - "type": "dense_vector", - "index": true, - "similarity": "cosine", - "dims": 1024 - } - } - }, - { - "dense_vector": { - "match": "*_1536_vec", - "mapping": { - "type": "dense_vector", - "index": true, - "similarity": "cosine", - "dims": 1536 - } - } - }, - { - "binary": { - "match": "*_bin", - "mapping": { - "type": "binary" - } - } - } - ] - } -} + "properties": { + "lat_lon": { + "type": "geo_point", + "store": "true" + } + }, + "date_detection": "true", + "dynamic_templates": [ + { + "int": { + "match": "*_int", + "mapping": { + "type": "integer", + "store": "true" + } + } + }, + { + "ulong": { + "match": "*_ulong", + "mapping": { + "type": "unsigned_long", + "store": "true" + } + } + }, + { + "long": { + "match": "*_long", + "mapping": { + "type": "long", + "store": "true" + } + } + }, + { + "short": { + "match": "*_short", + "mapping": { + "type": "short", + "store": "true" + } + } + }, + { + "numeric": { + "match": "*_flt", + "mapping": { + "type": "float", + "store": true + } + } + }, + { + "tks": { + "match": "*_tks", + "mapping": { + "type": "text", + "similarity": "scripted_sim", + "analyzer": "whitespace", + "store": true + } + } + }, + { + "ltks": { + "match": "*_ltks", + "mapping": { + "type": "text", + "analyzer": "whitespace", + "store": true + } + } + }, + { + "kwd": { + "match_pattern": "regex", + "match": "^(.*_(kwd|id|ids|uid|uids)|uid)$", + "mapping": { + "type": "keyword", + "similarity": "boolean", + "store": true + } + } + }, + { + "dt": { + "match_pattern": "regex", + "match": "^.*(_dt|_time|_at)$", + "mapping": { + "type": "date", + "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyy-MM-dd_HH:mm:ss", + "store": true + } + } + }, + { + "nested": { + "match": "*_nst", + "mapping": { + "type": "nested" + } + } + }, + { + "object": { + "match": "*_obj", + "mapping": { + "type": "object", + "dynamic": "true" + } + } + }, + { + "string": { + "match": "*_(with_weight|list)$", + "mapping": { + "type": "text", + "index": "false", + "store": true + } + } + }, + { + "string": { + "match": "*_fea", + "mapping": { + "type": "rank_feature" + } + } + }, + { + "dense_vector": { + "match": "*_512_vec", + "mapping": { + "type": "dense_vector", + "index": true, + "similarity": "cosine", + "dims": 512 + } + } + }, + { + "dense_vector": { + "match": "*_768_vec", + "mapping": { + "type": "dense_vector", + "index": true, + "similarity": "cosine", + "dims": 768 + } + } + }, + { + "dense_vector": { + "match": "*_1024_vec", + "mapping": { + "type": "dense_vector", + "index": true, + "similarity": "cosine", + "dims": 1024 + } + } + }, + { + "dense_vector": { + "match": "*_1536_vec", + "mapping": { + "type": "dense_vector", + "index": true, + "similarity": "cosine", + "dims": 1536 + } + } + }, + { + "binary": { + "match": "*_bin", + "mapping": { + "type": "binary" + } + } + } + ] + } +} \ No newline at end of file diff --git a/docker/.env b/docker/.env index 8cb40201508..6955e65fe9f 100644 --- a/docker/.env +++ b/docker/.env @@ -13,9 +13,13 @@ KIBANA_USER=rag_flow KIBANA_PASSWORD=infini_rag_flow # Update according to the available host memory (in bytes) - MEM_LIMIT=8073741824 +# Port to expose Infinity Thrift API to the host +INFINITY_THRIFT_PORT=23817 +INFINITY_HTTP_PORT=23820 +INFINITY_PSQL_PORT=5432 + MYSQL_PASSWORD=infini_rag_flow MYSQL_PORT=5455 diff --git a/docker/docker-compose-base.yml b/docker/docker-compose-base.yml index 8ba265bbc81..00a781a667d 100644 --- a/docker/docker-compose-base.yml +++ b/docker/docker-compose-base.yml @@ -6,6 +6,7 @@ services: - esdata01:/usr/share/elasticsearch/data ports: - ${ES_PORT}:9200 + env_file: .env environment: - node.name=es01 - ELASTIC_PASSWORD=${ELASTIC_PASSWORD} @@ -27,12 +28,34 @@ services: retries: 120 networks: - ragflow - restart: always + restart: on-failure + + # infinity: + # container_name: ragflow-infinity + # image: infiniflow/infinity:nightly + # volumes: + # - infinity_data:/var/infinity + # ports: + # - ${INFINITY_THRIFT_PORT}:23817 + # - ${INFINITY_HTTP_PORT}:23820 + # - ${INFINITY_PSQL_PORT}:5432 + # env_file: .env + # environment: + # - TZ=${TIMEZONE} + # mem_limit: ${MEM_LIMIT} + # ulimits: + # nofile: + # soft: 500000 + # hard: 500000 + # networks: + # - ragflow + # restart: on-failure mysql: # mysql:5.7 linux/arm64 image is unavailable. image: mysql:8.0.39 container_name: ragflow-mysql + env_file: .env environment: - MYSQL_ROOT_PASSWORD=${MYSQL_PASSWORD} - TZ=${TIMEZONE} @@ -55,7 +78,7 @@ services: interval: 10s timeout: 10s retries: 3 - restart: always + restart: on-failure minio: image: quay.io/minio/minio:RELEASE.2023-12-20T01-00-02Z @@ -64,6 +87,7 @@ services: ports: - ${MINIO_PORT}:9000 - ${MINIO_CONSOLE_PORT}:9001 + env_file: .env environment: - MINIO_ROOT_USER=${MINIO_USER} - MINIO_ROOT_PASSWORD=${MINIO_PASSWORD} @@ -72,25 +96,28 @@ services: - minio_data:/data networks: - ragflow - restart: always + restart: on-failure redis: image: redis:7.2.4 container_name: ragflow-redis command: redis-server --requirepass ${REDIS_PASSWORD} --maxmemory 128mb --maxmemory-policy allkeys-lru + env_file: .env ports: - ${REDIS_PORT}:6379 volumes: - redis_data:/data networks: - ragflow - restart: always + restart: on-failure volumes: esdata01: driver: local + infinity_data: + driver: local mysql_data: driver: local minio_data: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 24592c3f4b8..b8707935c8a 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -1,6 +1,5 @@ include: - - path: ./docker-compose-base.yml - env_file: ./.env + - ./docker-compose-base.yml services: ragflow: @@ -15,19 +14,21 @@ services: - ${SVR_HTTP_PORT}:9380 - 80:80 - 443:443 - - 5678:5678 volumes: - ./service_conf.yaml:/ragflow/conf/service_conf.yaml - ./ragflow-logs:/ragflow/logs - ./nginx/ragflow.conf:/etc/nginx/conf.d/ragflow.conf - ./nginx/proxy.conf:/etc/nginx/proxy.conf - ./nginx/nginx.conf:/etc/nginx/nginx.conf + env_file: .env environment: - TZ=${TIMEZONE} - HF_ENDPOINT=${HF_ENDPOINT} - MACOS=${MACOS} networks: - ragflow - restart: always + restart: on-failure + # https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration + # If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container. extra_hosts: - "host.docker.internal:host-gateway" diff --git a/docs/guides/develop/launch_ragflow_from_source.md b/docs/guides/develop/launch_ragflow_from_source.md index 0584aa14a50..160c704abe1 100644 --- a/docs/guides/develop/launch_ragflow_from_source.md +++ b/docs/guides/develop/launch_ragflow_from_source.md @@ -67,7 +67,7 @@ docker compose -f docker/docker-compose-base.yml up -d 1. Add the following line to `/etc/hosts` to resolve all hosts specified in **docker/service_conf.yaml** to `127.0.0.1`: ``` - 127.0.0.1 es01 mysql minio redis + 127.0.0.1 es01 infinity mysql minio redis ``` 2. In **docker/service_conf.yaml**, update mysql port to `5455` and es port to `1200`, as specified in **docker/.env**. diff --git a/docs/references/http_api_reference.md b/docs/references/http_api_reference.md index 7ee0314e136..8de1af0d4d2 100644 --- a/docs/references/http_api_reference.md +++ b/docs/references/http_api_reference.md @@ -1005,7 +1005,7 @@ Success: "docnm_kwd": "1.txt", "document_id": "b330ec2e91ec11efbc510242ac120004", "id": "b48c170e90f70af998485c1065490726", - "image_id": "", + "img_id": "", "important_keywords": "", "positions": [ "" diff --git a/docs/references/python_api_reference.md b/docs/references/python_api_reference.md index 3a1b7093b4e..c2e1abd283e 100644 --- a/docs/references/python_api_reference.md +++ b/docs/references/python_api_reference.md @@ -1347,7 +1347,7 @@ A list of `Chunk` objects representing references to the message, each containin The chunk ID. - `content` `str` The content of the chunk. -- `image_id` `str` +- `img_id` `str` The ID of the snapshot of the chunk. Applicable only when the source of the chunk is an image, PPT, PPTX, or PDF file. - `document_id` `str` The ID of the referenced document. diff --git a/graphrag/claim_extractor.py b/graphrag/claim_extractor.py index d3c68642967..d986b24546a 100644 --- a/graphrag/claim_extractor.py +++ b/graphrag/claim_extractor.py @@ -254,9 +254,12 @@ def pull_field(index: int, fields: list[str]) -> str | None: from api.db import LLMType from api.db.services.llm_service import LLMBundle from api.settings import retrievaler + from api.db.services.knowledgebase_service import KnowledgebaseService + + kb_ids = KnowledgebaseService.get_kb_ids(args.tenant_id) ex = ClaimExtractor(LLMBundle(args.tenant_id, LLMType.CHAT)) - docs = [d["content_with_weight"] for d in retrievaler.chunk_list(args.doc_id, args.tenant_id, max_count=12, fields=["content_with_weight"])] + docs = [d["content_with_weight"] for d in retrievaler.chunk_list(args.doc_id, args.tenant_id, kb_ids, max_count=12, fields=["content_with_weight"])] info = { "input_text": docs, "entity_specs": "organization, person", diff --git a/graphrag/search.py b/graphrag/search.py index 85ba0698a3b..f19cf26b5b7 100644 --- a/graphrag/search.py +++ b/graphrag/search.py @@ -15,95 +15,91 @@ # import json from copy import deepcopy +from typing import Dict import pandas as pd -from elasticsearch_dsl import Q, Search +from rag.utils.doc_store_conn import OrderByExpr, FusionExpr from rag.nlp.search import Dealer class KGSearch(Dealer): - def search(self, req, idxnm, emb_mdl=None, highlight=False): - def merge_into_first(sres, title=""): - df,texts = [],[] - for d in sres["hits"]["hits"]: + def search(self, req, idxnm, emb_mdl, highlight=False): + def merge_into_first(sres, title="") -> Dict[str, str]: + if not sres: + return {} + content_with_weight = "" + df, texts = [],[] + for d in sres.values(): try: - df.append(json.loads(d["_source"]["content_with_weight"])) - except Exception as e: - texts.append(d["_source"]["content_with_weight"]) - pass - if not df and not texts: return False + df.append(json.loads(d["content_with_weight"])) + except Exception: + texts.append(d["content_with_weight"]) if df: - try: - sres["hits"]["hits"][0]["_source"]["content_with_weight"] = title + "\n" + pd.DataFrame(df).to_csv() - except Exception as e: - pass + content_with_weight = title + "\n" + pd.DataFrame(df).to_csv() else: - sres["hits"]["hits"][0]["_source"]["content_with_weight"] = title + "\n" + "\n".join(texts) - return True + content_with_weight = title + "\n" + "\n".join(texts) + first_id = "" + first_source = {} + for k, v in sres.items(): + first_id = id + first_source = deepcopy(v) + break + first_source["content_with_weight"] = content_with_weight + first_id = next(iter(sres)) + return {first_id: first_source} + + kb_ids = req['kb_ids'] + qst = req.get("question", "") + matchText, keywords = self.qryr.question(qst, min_match=0.05) + condition = self.get_filters(req) + ## Entity retrieval + condition.update({"knowledge_graph_kwd": ["entity"]}) + assert emb_mdl, "No embedding model selected" + matchDense = self.get_vector(qst, emb_mdl, 1024, req.get("similarity", 0.1)) + q_vec = matchDense.embedding_data src = req.get("fields", ["docnm_kwd", "content_ltks", "kb_id", "img_id", "title_tks", "important_kwd", - "image_id", "doc_id", "q_512_vec", "q_768_vec", "position_int", "name_kwd", + "doc_id", f"q_{len(q_vec)}_vec", "position_list", "name_kwd", "q_1024_vec", "q_1536_vec", "available_int", "content_with_weight", "weight_int", "weight_flt", "rank_int" ]) - qst = req.get("question", "") - binary_query, keywords = self.qryr.question(qst, min_match="5%") - binary_query = self._add_filters(binary_query, req) + fusionExpr = FusionExpr("weighted_sum", 32, {"weights": "0.5, 0.5"}) - ## Entity retrieval - bqry = deepcopy(binary_query) - bqry.filter.append(Q("terms", knowledge_graph_kwd=["entity"])) - s = Search() - s = s.query(bqry)[0: 32] - - s = s.to_dict() - q_vec = [] - if req.get("vector"): - assert emb_mdl, "No embedding model selected" - s["knn"] = self._vector( - qst, emb_mdl, req.get( - "similarity", 0.1), 1024) - s["knn"]["filter"] = bqry.to_dict() - q_vec = s["knn"]["query_vector"] - - ent_res = self.es.search(deepcopy(s), idxnm=idxnm, timeout="600s", src=src) - entities = [d["name_kwd"] for d in self.es.getSource(ent_res)] - ent_ids = self.es.getDocIds(ent_res) - if merge_into_first(ent_res, "-Entities-"): - ent_ids = ent_ids[0:1] + ent_res = self.dataStore.search(src, list(), condition, [matchText, matchDense, fusionExpr], OrderByExpr(), 0, 32, idxnm, kb_ids) + ent_res_fields = self.dataStore.getFields(ent_res, src) + entities = [d["name_kwd"] for d in ent_res_fields.values()] + ent_ids = self.dataStore.getChunkIds(ent_res) + ent_content = merge_into_first(ent_res_fields, "-Entities-") + if ent_content: + ent_ids = list(ent_content.keys()) ## Community retrieval - bqry = deepcopy(binary_query) - bqry.filter.append(Q("terms", entities_kwd=entities)) - bqry.filter.append(Q("terms", knowledge_graph_kwd=["community_report"])) - s = Search() - s = s.query(bqry)[0: 32] - s = s.to_dict() - comm_res = self.es.search(deepcopy(s), idxnm=idxnm, timeout="600s", src=src) - comm_ids = self.es.getDocIds(comm_res) - if merge_into_first(comm_res, "-Community Report-"): - comm_ids = comm_ids[0:1] + condition = self.get_filters(req) + condition.update({"entities_kwd": entities, "knowledge_graph_kwd": ["community_report"]}) + comm_res = self.dataStore.search(src, list(), condition, [matchText, matchDense, fusionExpr], OrderByExpr(), 0, 32, idxnm, kb_ids) + comm_res_fields = self.dataStore.getFields(comm_res, src) + comm_ids = self.dataStore.getChunkIds(comm_res) + comm_content = merge_into_first(comm_res_fields, "-Community Report-") + if comm_content: + comm_ids = list(comm_content.keys()) ## Text content retrieval - bqry = deepcopy(binary_query) - bqry.filter.append(Q("terms", knowledge_graph_kwd=["text"])) - s = Search() - s = s.query(bqry)[0: 6] - s = s.to_dict() - txt_res = self.es.search(deepcopy(s), idxnm=idxnm, timeout="600s", src=src) - txt_ids = self.es.getDocIds(txt_res) - if merge_into_first(txt_res, "-Original Content-"): - txt_ids = txt_ids[0:1] + condition = self.get_filters(req) + condition.update({"knowledge_graph_kwd": ["text"]}) + txt_res = self.dataStore.search(src, list(), condition, [matchText, matchDense, fusionExpr], OrderByExpr(), 0, 6, idxnm, kb_ids) + txt_res_fields = self.dataStore.getFields(txt_res, src) + txt_ids = self.dataStore.getChunkIds(txt_res) + txt_content = merge_into_first(txt_res_fields, "-Original Content-") + if txt_content: + txt_ids = list(txt_content.keys()) return self.SearchResult( total=len(ent_ids) + len(comm_ids) + len(txt_ids), ids=[*ent_ids, *comm_ids, *txt_ids], query_vector=q_vec, - aggregation=None, highlight=None, - field={**self.getFields(ent_res, src), **self.getFields(comm_res, src), **self.getFields(txt_res, src)}, + field={**ent_content, **comm_content, **txt_content}, keywords=[] ) - diff --git a/graphrag/smoke.py b/graphrag/smoke.py index b2efbc91fd2..3d0ae370a23 100644 --- a/graphrag/smoke.py +++ b/graphrag/smoke.py @@ -31,10 +31,13 @@ from api.db import LLMType from api.db.services.llm_service import LLMBundle from api.settings import retrievaler + from api.db.services.knowledgebase_service import KnowledgebaseService + + kb_ids = KnowledgebaseService.get_kb_ids(args.tenant_id) ex = GraphExtractor(LLMBundle(args.tenant_id, LLMType.CHAT)) docs = [d["content_with_weight"] for d in - retrievaler.chunk_list(args.doc_id, args.tenant_id, max_count=6, fields=["content_with_weight"])] + retrievaler.chunk_list(args.doc_id, args.tenant_id, kb_ids, max_count=6, fields=["content_with_weight"])] graph = ex(docs) er = EntityResolution(LLMBundle(args.tenant_id, LLMType.CHAT)) diff --git a/poetry.lock b/poetry.lock index 0f4a8d20391..98f7813b09a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -209,13 +209,13 @@ files = [ [[package]] name = "akshare" -version = "1.15.2" +version = "1.15.9" description = "AKShare is an elegant and simple financial data interface library for Python, built for human beings!" optional = false python-versions = ">=3.8" files = [ - {file = "akshare-1.15.2-py3-none-any.whl", hash = "sha256:699d137b92a664455c497dec65e95622174a69a7c87999d68210be3114ad3dca"}, - {file = "akshare-1.15.2.tar.gz", hash = "sha256:17a34459cb2fde120d088ef357c76410623e00764bd990af3b8011487ad72f3a"}, + {file = "akshare-1.15.9-py3-none-any.whl", hash = "sha256:a4764c97d95b605939033d4a284be775e8bca8990ff26d713f621c535516f999"}, + {file = "akshare-1.15.9.tar.gz", hash = "sha256:27e897262a2ffea5e33eac09b74bd1e514da17781d31878d6023d82e4fedce2c"}, ] [package.dependencies] @@ -427,7 +427,7 @@ name = "aspose-slides" version = "24.10.0" description = "Aspose.Slides for Python via .NET is a presentation file formats processing library for working with Microsoft PowerPoint files without using Microsoft PowerPoint." optional = false -python-versions = "<3.13,>=3.5" +python-versions = ">=3.5,<3.13" files = [ {file = "Aspose.Slides-24.10.0-py3-none-macosx_10_14_x86_64.whl", hash = "sha256:a9d810ee3277b286c8218fbfa776f9ab46f1de285cb120954ff5031c1a59d959"}, {file = "Aspose.Slides-24.10.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:0ff7ee9a8c00855dc315a7e6cd14022547e5015828b182124ceda0de3d6e3a94"}, @@ -568,7 +568,7 @@ name = "bce-python-sdk" version = "0.9.23" description = "BCE SDK for python" optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,<4,>=2.7" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, <4" files = [ {file = "bce_python_sdk-0.9.23-py3-none-any.whl", hash = "sha256:8debe21a040e00060f6044877d594765ed7b18bc765c6bf16b878bca864140a3"}, {file = "bce_python_sdk-0.9.23.tar.gz", hash = "sha256:19739fed5cd0725356fc5ffa2acbdd8fb23f2a81edb91db21a03174551d0cf41"}, @@ -1469,22 +1469,22 @@ tokenizer = ["tiktoken"] [[package]] name = "datasets" -version = "3.0.1" +version = "3.0.2" description = "HuggingFace community-driven open-source library of datasets" optional = false python-versions = ">=3.8.0" files = [ - {file = "datasets-3.0.1-py3-none-any.whl", hash = "sha256:db080aab41c8cc68645117a0f172e5c6789cbc672f066de0aa5a08fc3eebc686"}, - {file = "datasets-3.0.1.tar.gz", hash = "sha256:40d63b09e76a3066c32e746d6fdc36fd3f29ed2acd49bf5b1a2100da32936511"}, + {file = "datasets-3.0.2-py3-none-any.whl", hash = "sha256:220bfbea0be9bf81d121bd2ac76fe4ef3f7defe0e8586ce1e7f66dcaaf69f88d"}, + {file = "datasets-3.0.2.tar.gz", hash = "sha256:07204c389ce0491ef3ad50dd79966d3fd40422a12b831cf84a117323ac74fbc1"}, ] [package.dependencies] aiohttp = "*" dill = ">=0.3.0,<0.3.9" filelock = "*" -fsspec = {version = ">=2023.1.0,<=2024.6.1", extras = ["http"]} -huggingface-hub = ">=0.22.0" -multiprocess = "*" +fsspec = {version = ">=2023.1.0,<=2024.9.0", extras = ["http"]} +huggingface-hub = ">=0.23.0" +multiprocess = "<0.70.17" numpy = ">=1.17" packaging = "*" pandas = "*" @@ -1832,13 +1832,13 @@ develop = ["coverage", "pytest", "pytest-cov", "pytest-mock", "pytz", "sphinx (> [[package]] name = "et-xmlfile" -version = "1.1.0" +version = "2.0.0" description = "An implementation of lxml.xmlfile for the standard library" optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" files = [ - {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"}, - {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"}, + {file = "et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa"}, + {file = "et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54"}, ] [[package]] @@ -1903,7 +1903,7 @@ name = "fastembed" version = "0.3.6" description = "Fast, light, accurate library built for retrieval embedding generation" optional = false -python-versions = "<3.13,>=3.8.0" +python-versions = ">=3.8.0,<3.13" files = [ {file = "fastembed-0.3.6-py3-none-any.whl", hash = "sha256:2bf70edae28bb4ccd9e01617098c2075b0ba35b88025a3d22b0e1e85b2c488ce"}, {file = "fastembed-0.3.6.tar.gz", hash = "sha256:c93c8ec99b8c008c2d192d6297866b8d70ec7ac8f5696b34eb5ea91f85efd15f"}, @@ -2211,99 +2211,114 @@ requests = "*" [[package]] name = "frozenlist" -version = "1.4.1" +version = "1.5.0" description = "A list-like structure which implements collections.abc.MutableSequence" optional = false python-versions = ">=3.8" files = [ - {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f9aa1878d1083b276b0196f2dfbe00c9b7e752475ed3b682025ff20c1c1f51ac"}, - {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:29acab3f66f0f24674b7dc4736477bcd4bc3ad4b896f5f45379a67bce8b96868"}, - {file = "frozenlist-1.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:74fb4bee6880b529a0c6560885fce4dc95936920f9f20f53d99a213f7bf66776"}, - {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:590344787a90ae57d62511dd7c736ed56b428f04cd8c161fcc5e7232c130c69a"}, - {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:068b63f23b17df8569b7fdca5517edef76171cf3897eb68beb01341131fbd2ad"}, - {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c849d495bf5154cd8da18a9eb15db127d4dba2968d88831aff6f0331ea9bd4c"}, - {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9750cc7fe1ae3b1611bb8cfc3f9ec11d532244235d75901fb6b8e42ce9229dfe"}, - {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9b2de4cf0cdd5bd2dee4c4f63a653c61d2408055ab77b151c1957f221cabf2a"}, - {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0633c8d5337cb5c77acbccc6357ac49a1770b8c487e5b3505c57b949b4b82e98"}, - {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:27657df69e8801be6c3638054e202a135c7f299267f1a55ed3a598934f6c0d75"}, - {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:f9a3ea26252bd92f570600098783d1371354d89d5f6b7dfd87359d669f2109b5"}, - {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:4f57dab5fe3407b6c0c1cc907ac98e8a189f9e418f3b6e54d65a718aaafe3950"}, - {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e02a0e11cf6597299b9f3bbd3f93d79217cb90cfd1411aec33848b13f5c656cc"}, - {file = "frozenlist-1.4.1-cp310-cp310-win32.whl", hash = "sha256:a828c57f00f729620a442881cc60e57cfcec6842ba38e1b19fd3e47ac0ff8dc1"}, - {file = "frozenlist-1.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:f56e2333dda1fe0f909e7cc59f021eba0d2307bc6f012a1ccf2beca6ba362439"}, - {file = "frozenlist-1.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a0cb6f11204443f27a1628b0e460f37fb30f624be6051d490fa7d7e26d4af3d0"}, - {file = "frozenlist-1.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b46c8ae3a8f1f41a0d2ef350c0b6e65822d80772fe46b653ab6b6274f61d4a49"}, - {file = "frozenlist-1.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fde5bd59ab5357e3853313127f4d3565fc7dad314a74d7b5d43c22c6a5ed2ced"}, - {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:722e1124aec435320ae01ee3ac7bec11a5d47f25d0ed6328f2273d287bc3abb0"}, - {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2471c201b70d58a0f0c1f91261542a03d9a5e088ed3dc6c160d614c01649c106"}, - {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c757a9dd70d72b076d6f68efdbb9bc943665ae954dad2801b874c8c69e185068"}, - {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f146e0911cb2f1da549fc58fc7bcd2b836a44b79ef871980d605ec392ff6b0d2"}, - {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9c515e7914626b2a2e1e311794b4c35720a0be87af52b79ff8e1429fc25f19"}, - {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c302220494f5c1ebeb0912ea782bcd5e2f8308037b3c7553fad0e48ebad6ad82"}, - {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:442acde1e068288a4ba7acfe05f5f343e19fac87bfc96d89eb886b0363e977ec"}, - {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:1b280e6507ea8a4fa0c0a7150b4e526a8d113989e28eaaef946cc77ffd7efc0a"}, - {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:fe1a06da377e3a1062ae5fe0926e12b84eceb8a50b350ddca72dc85015873f74"}, - {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:db9e724bebd621d9beca794f2a4ff1d26eed5965b004a97f1f1685a173b869c2"}, - {file = "frozenlist-1.4.1-cp311-cp311-win32.whl", hash = "sha256:e774d53b1a477a67838a904131c4b0eef6b3d8a651f8b138b04f748fccfefe17"}, - {file = "frozenlist-1.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:fb3c2db03683b5767dedb5769b8a40ebb47d6f7f45b1b3e3b4b51ec8ad9d9825"}, - {file = "frozenlist-1.4.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:1979bc0aeb89b33b588c51c54ab0161791149f2461ea7c7c946d95d5f93b56ae"}, - {file = "frozenlist-1.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cc7b01b3754ea68a62bd77ce6020afaffb44a590c2289089289363472d13aedb"}, - {file = "frozenlist-1.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c9c92be9fd329ac801cc420e08452b70e7aeab94ea4233a4804f0915c14eba9b"}, - {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c3894db91f5a489fc8fa6a9991820f368f0b3cbdb9cd8849547ccfab3392d86"}, - {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba60bb19387e13597fb059f32cd4d59445d7b18b69a745b8f8e5db0346f33480"}, - {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8aefbba5f69d42246543407ed2461db31006b0f76c4e32dfd6f42215a2c41d09"}, - {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:780d3a35680ced9ce682fbcf4cb9c2bad3136eeff760ab33707b71db84664e3a"}, - {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9acbb16f06fe7f52f441bb6f413ebae6c37baa6ef9edd49cdd567216da8600cd"}, - {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:23b701e65c7b36e4bf15546a89279bd4d8675faabc287d06bbcfac7d3c33e1e6"}, - {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:3e0153a805a98f5ada7e09826255ba99fb4f7524bb81bf6b47fb702666484ae1"}, - {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:dd9b1baec094d91bf36ec729445f7769d0d0cf6b64d04d86e45baf89e2b9059b"}, - {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:1a4471094e146b6790f61b98616ab8e44f72661879cc63fa1049d13ef711e71e"}, - {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5667ed53d68d91920defdf4035d1cdaa3c3121dc0b113255124bcfada1cfa1b8"}, - {file = "frozenlist-1.4.1-cp312-cp312-win32.whl", hash = "sha256:beee944ae828747fd7cb216a70f120767fc9f4f00bacae8543c14a6831673f89"}, - {file = "frozenlist-1.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:64536573d0a2cb6e625cf309984e2d873979709f2cf22839bf2d61790b448ad5"}, - {file = "frozenlist-1.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:20b51fa3f588ff2fe658663db52a41a4f7aa6c04f6201449c6c7c476bd255c0d"}, - {file = "frozenlist-1.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:410478a0c562d1a5bcc2f7ea448359fcb050ed48b3c6f6f4f18c313a9bdb1826"}, - {file = "frozenlist-1.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c6321c9efe29975232da3bd0af0ad216800a47e93d763ce64f291917a381b8eb"}, - {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48f6a4533887e189dae092f1cf981f2e3885175f7a0f33c91fb5b7b682b6bab6"}, - {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6eb73fa5426ea69ee0e012fb59cdc76a15b1283d6e32e4f8dc4482ec67d1194d"}, - {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fbeb989b5cc29e8daf7f976b421c220f1b8c731cbf22b9130d8815418ea45887"}, - {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:32453c1de775c889eb4e22f1197fe3bdfe457d16476ea407472b9442e6295f7a"}, - {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693945278a31f2086d9bf3df0fe8254bbeaef1fe71e1351c3bd730aa7d31c41b"}, - {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:1d0ce09d36d53bbbe566fe296965b23b961764c0bcf3ce2fa45f463745c04701"}, - {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3a670dc61eb0d0eb7080890c13de3066790f9049b47b0de04007090807c776b0"}, - {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:dca69045298ce5c11fd539682cff879cc1e664c245d1c64da929813e54241d11"}, - {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a06339f38e9ed3a64e4c4e43aec7f59084033647f908e4259d279a52d3757d09"}, - {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b7f2f9f912dca3934c1baec2e4585a674ef16fe00218d833856408c48d5beee7"}, - {file = "frozenlist-1.4.1-cp38-cp38-win32.whl", hash = "sha256:e7004be74cbb7d9f34553a5ce5fb08be14fb33bc86f332fb71cbe5216362a497"}, - {file = "frozenlist-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:5a7d70357e7cee13f470c7883a063aae5fe209a493c57d86eb7f5a6f910fae09"}, - {file = "frozenlist-1.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bfa4a17e17ce9abf47a74ae02f32d014c5e9404b6d9ac7f729e01562bbee601e"}, - {file = "frozenlist-1.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b7e3ed87d4138356775346e6845cccbe66cd9e207f3cd11d2f0b9fd13681359d"}, - {file = "frozenlist-1.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c99169d4ff810155ca50b4da3b075cbde79752443117d89429595c2e8e37fed8"}, - {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edb678da49d9f72c9f6c609fbe41a5dfb9a9282f9e6a2253d5a91e0fc382d7c0"}, - {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6db4667b187a6742b33afbbaf05a7bc551ffcf1ced0000a571aedbb4aa42fc7b"}, - {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55fdc093b5a3cb41d420884cdaf37a1e74c3c37a31f46e66286d9145d2063bd0"}, - {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82e8211d69a4f4bc360ea22cd6555f8e61a1bd211d1d5d39d3d228b48c83a897"}, - {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89aa2c2eeb20957be2d950b85974b30a01a762f3308cd02bb15e1ad632e22dc7"}, - {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9d3e0c25a2350080e9319724dede4f31f43a6c9779be48021a7f4ebde8b2d742"}, - {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7268252af60904bf52c26173cbadc3a071cece75f873705419c8681f24d3edea"}, - {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:0c250a29735d4f15321007fb02865f0e6b6a41a6b88f1f523ca1596ab5f50bd5"}, - {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:96ec70beabbd3b10e8bfe52616a13561e58fe84c0101dd031dc78f250d5128b9"}, - {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:23b2d7679b73fe0e5a4560b672a39f98dfc6f60df63823b0a9970525325b95f6"}, - {file = "frozenlist-1.4.1-cp39-cp39-win32.whl", hash = "sha256:a7496bfe1da7fb1a4e1cc23bb67c58fab69311cc7d32b5a99c2007b4b2a0e932"}, - {file = "frozenlist-1.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:e6a20a581f9ce92d389a8c7d7c3dd47c81fd5d6e655c8dddf341e14aa48659d0"}, - {file = "frozenlist-1.4.1-py3-none-any.whl", hash = "sha256:04ced3e6a46b4cfffe20f9ae482818e34eba9b5fb0ce4056e4cc9b6e212d09b7"}, - {file = "frozenlist-1.4.1.tar.gz", hash = "sha256:c037a86e8513059a2613aaba4d817bb90b9d9b6b69aace3ce9c877e8c8ed402b"}, + {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5b6a66c18b5b9dd261ca98dffcb826a525334b2f29e7caa54e182255c5f6a65a"}, + {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d1b3eb7b05ea246510b43a7e53ed1653e55c2121019a97e60cad7efb881a97bb"}, + {file = "frozenlist-1.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:15538c0cbf0e4fa11d1e3a71f823524b0c46299aed6e10ebb4c2089abd8c3bec"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e79225373c317ff1e35f210dd5f1344ff31066ba8067c307ab60254cd3a78ad5"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9272fa73ca71266702c4c3e2d4a28553ea03418e591e377a03b8e3659d94fa76"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:498524025a5b8ba81695761d78c8dd7382ac0b052f34e66939c42df860b8ff17"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:92b5278ed9d50fe610185ecd23c55d8b307d75ca18e94c0e7de328089ac5dcba"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f3c8c1dacd037df16e85227bac13cca58c30da836c6f936ba1df0c05d046d8d"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f2ac49a9bedb996086057b75bf93538240538c6d9b38e57c82d51f75a73409d2"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e66cc454f97053b79c2ab09c17fbe3c825ea6b4de20baf1be28919460dd7877f"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:5a3ba5f9a0dfed20337d3e966dc359784c9f96503674c2faf015f7fe8e96798c"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6321899477db90bdeb9299ac3627a6a53c7399c8cd58d25da094007402b039ab"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:76e4753701248476e6286f2ef492af900ea67d9706a0155335a40ea21bf3b2f5"}, + {file = "frozenlist-1.5.0-cp310-cp310-win32.whl", hash = "sha256:977701c081c0241d0955c9586ffdd9ce44f7a7795df39b9151cd9a6fd0ce4cfb"}, + {file = "frozenlist-1.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:189f03b53e64144f90990d29a27ec4f7997d91ed3d01b51fa39d2dbe77540fd4"}, + {file = "frozenlist-1.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:fd74520371c3c4175142d02a976aee0b4cb4a7cc912a60586ffd8d5929979b30"}, + {file = "frozenlist-1.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2f3f7a0fbc219fb4455264cae4d9f01ad41ae6ee8524500f381de64ffaa077d5"}, + {file = "frozenlist-1.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f47c9c9028f55a04ac254346e92977bf0f166c483c74b4232bee19a6697e4778"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0996c66760924da6e88922756d99b47512a71cfd45215f3570bf1e0b694c206a"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a2fe128eb4edeabe11896cb6af88fca5346059f6c8d807e3b910069f39157869"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1a8ea951bbb6cacd492e3948b8da8c502a3f814f5d20935aae74b5df2b19cf3d"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:de537c11e4aa01d37db0d403b57bd6f0546e71a82347a97c6a9f0dcc532b3a45"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c2623347b933fcb9095841f1cc5d4ff0b278addd743e0e966cb3d460278840d"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cee6798eaf8b1416ef6909b06f7dc04b60755206bddc599f52232606e18179d3"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f5f9da7f5dbc00a604fe74aa02ae7c98bcede8a3b8b9666f9f86fc13993bc71a"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:90646abbc7a5d5c7c19461d2e3eeb76eb0b204919e6ece342feb6032c9325ae9"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:bdac3c7d9b705d253b2ce370fde941836a5f8b3c5c2b8fd70940a3ea3af7f4f2"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03d33c2ddbc1816237a67f66336616416e2bbb6beb306e5f890f2eb22b959cdf"}, + {file = "frozenlist-1.5.0-cp311-cp311-win32.whl", hash = "sha256:237f6b23ee0f44066219dae14c70ae38a63f0440ce6750f868ee08775073f942"}, + {file = "frozenlist-1.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:0cc974cc93d32c42e7b0f6cf242a6bd941c57c61b618e78b6c0a96cb72788c1d"}, + {file = "frozenlist-1.5.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:31115ba75889723431aa9a4e77d5f398f5cf976eea3bdf61749731f62d4a4a21"}, + {file = "frozenlist-1.5.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7437601c4d89d070eac8323f121fcf25f88674627505334654fd027b091db09d"}, + {file = "frozenlist-1.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7948140d9f8ece1745be806f2bfdf390127cf1a763b925c4a805c603df5e697e"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:feeb64bc9bcc6b45c6311c9e9b99406660a9c05ca8a5b30d14a78555088b0b3a"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:683173d371daad49cffb8309779e886e59c2f369430ad28fe715f66d08d4ab1a"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7d57d8f702221405a9d9b40f9da8ac2e4a1a8b5285aac6100f3393675f0a85ee"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30c72000fbcc35b129cb09956836c7d7abf78ab5416595e4857d1cae8d6251a6"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:000a77d6034fbad9b6bb880f7ec073027908f1b40254b5d6f26210d2dab1240e"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5d7f5a50342475962eb18b740f3beecc685a15b52c91f7d975257e13e029eca9"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:87f724d055eb4785d9be84e9ebf0f24e392ddfad00b3fe036e43f489fafc9039"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:6e9080bb2fb195a046e5177f10d9d82b8a204c0736a97a153c2466127de87784"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9b93d7aaa36c966fa42efcaf716e6b3900438632a626fb09c049f6a2f09fc631"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:52ef692a4bc60a6dd57f507429636c2af8b6046db8b31b18dac02cbc8f507f7f"}, + {file = "frozenlist-1.5.0-cp312-cp312-win32.whl", hash = "sha256:29d94c256679247b33a3dc96cce0f93cbc69c23bf75ff715919332fdbb6a32b8"}, + {file = "frozenlist-1.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:8969190d709e7c48ea386db202d708eb94bdb29207a1f269bab1196ce0dcca1f"}, + {file = "frozenlist-1.5.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7a1a048f9215c90973402e26c01d1cff8a209e1f1b53f72b95c13db61b00f953"}, + {file = "frozenlist-1.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dd47a5181ce5fcb463b5d9e17ecfdb02b678cca31280639255ce9d0e5aa67af0"}, + {file = "frozenlist-1.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1431d60b36d15cda188ea222033eec8e0eab488f39a272461f2e6d9e1a8e63c2"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6482a5851f5d72767fbd0e507e80737f9c8646ae7fd303def99bfe813f76cf7f"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:44c49271a937625619e862baacbd037a7ef86dd1ee215afc298a417ff3270608"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:12f78f98c2f1c2429d42e6a485f433722b0061d5c0b0139efa64f396efb5886b"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce3aa154c452d2467487765e3adc730a8c153af77ad84096bc19ce19a2400840"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b7dc0c4338e6b8b091e8faf0db3168a37101943e687f373dce00959583f7439"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:45e0896250900b5aa25180f9aec243e84e92ac84bd4a74d9ad4138ef3f5c97de"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:561eb1c9579d495fddb6da8959fd2a1fca2c6d060d4113f5844b433fc02f2641"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:df6e2f325bfee1f49f81aaac97d2aa757c7646534a06f8f577ce184afe2f0a9e"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:140228863501b44b809fb39ec56b5d4071f4d0aa6d216c19cbb08b8c5a7eadb9"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7707a25d6a77f5d27ea7dc7d1fc608aa0a478193823f88511ef5e6b8a48f9d03"}, + {file = "frozenlist-1.5.0-cp313-cp313-win32.whl", hash = "sha256:31a9ac2b38ab9b5a8933b693db4939764ad3f299fcaa931a3e605bc3460e693c"}, + {file = "frozenlist-1.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:11aabdd62b8b9c4b84081a3c246506d1cddd2dd93ff0ad53ede5defec7886b28"}, + {file = "frozenlist-1.5.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:dd94994fc91a6177bfaafd7d9fd951bc8689b0a98168aa26b5f543868548d3ca"}, + {file = "frozenlist-1.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2d0da8bbec082bf6bf18345b180958775363588678f64998c2b7609e34719b10"}, + {file = "frozenlist-1.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73f2e31ea8dd7df61a359b731716018c2be196e5bb3b74ddba107f694fbd7604"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:828afae9f17e6de596825cf4228ff28fbdf6065974e5ac1410cecc22f699d2b3"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f1577515d35ed5649d52ab4319db757bb881ce3b2b796d7283e6634d99ace307"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2150cc6305a2c2ab33299453e2968611dacb970d2283a14955923062c8d00b10"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a72b7a6e3cd2725eff67cd64c8f13335ee18fc3c7befc05aed043d24c7b9ccb9"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c16d2fa63e0800723139137d667e1056bee1a1cf7965153d2d104b62855e9b99"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:17dcc32fc7bda7ce5875435003220a457bcfa34ab7924a49a1c19f55b6ee185c"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:97160e245ea33d8609cd2b8fd997c850b56db147a304a262abc2b3be021a9171"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f1e6540b7fa044eee0bb5111ada694cf3dc15f2b0347ca125ee9ca984d5e9e6e"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:91d6c171862df0a6c61479d9724f22efb6109111017c87567cfeb7b5d1449fdf"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c1fac3e2ace2eb1052e9f7c7db480818371134410e1f5c55d65e8f3ac6d1407e"}, + {file = "frozenlist-1.5.0-cp38-cp38-win32.whl", hash = "sha256:b97f7b575ab4a8af9b7bc1d2ef7f29d3afee2226bd03ca3875c16451ad5a7723"}, + {file = "frozenlist-1.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:374ca2dabdccad8e2a76d40b1d037f5bd16824933bf7bcea3e59c891fd4a0923"}, + {file = "frozenlist-1.5.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9bbcdfaf4af7ce002694a4e10a0159d5a8d20056a12b05b45cea944a4953f972"}, + {file = "frozenlist-1.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1893f948bf6681733aaccf36c5232c231e3b5166d607c5fa77773611df6dc336"}, + {file = "frozenlist-1.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2b5e23253bb709ef57a8e95e6ae48daa9ac5f265637529e4ce6b003a37b2621f"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f253985bb515ecd89629db13cb58d702035ecd8cfbca7d7a7e29a0e6d39af5f"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04a5c6babd5e8fb7d3c871dc8b321166b80e41b637c31a995ed844a6139942b6"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9fe0f1c29ba24ba6ff6abf688cb0b7cf1efab6b6aa6adc55441773c252f7411"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:226d72559fa19babe2ccd920273e767c96a49b9d3d38badd7c91a0fdeda8ea08"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15b731db116ab3aedec558573c1a5eec78822b32292fe4f2f0345b7f697745c2"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:366d8f93e3edfe5a918c874702f78faac300209a4d5bf38352b2c1bdc07a766d"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1b96af8c582b94d381a1c1f51ffaedeb77c821c690ea5f01da3d70a487dd0a9b"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:c03eff4a41bd4e38415cbed054bbaff4a075b093e2394b6915dca34a40d1e38b"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:50cf5e7ee9b98f22bdecbabf3800ae78ddcc26e4a435515fc72d97903e8488e0"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1e76bfbc72353269c44e0bc2cfe171900fbf7f722ad74c9a7b638052afe6a00c"}, + {file = "frozenlist-1.5.0-cp39-cp39-win32.whl", hash = "sha256:666534d15ba8f0fda3f53969117383d5dc021266b3c1a42c9ec4855e4b58b9d3"}, + {file = "frozenlist-1.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:5c28f4b5dbef8a0d8aad0d4de24d1e9e981728628afaf4ea0792f5d0939372f0"}, + {file = "frozenlist-1.5.0-py3-none-any.whl", hash = "sha256:d994863bba198a4a518b467bb971c56e1db3f180a25c6cf7bb1949c267f748c3"}, + {file = "frozenlist-1.5.0.tar.gz", hash = "sha256:81d5af29e61b9c8348e876d442253723928dce6433e0e76cd925cd83f1b4b817"}, ] [[package]] name = "fsspec" -version = "2024.6.1" +version = "2024.9.0" description = "File-system specification" optional = false python-versions = ">=3.8" files = [ - {file = "fsspec-2024.6.1-py3-none-any.whl", hash = "sha256:3cb443f8bcd2efb31295a5b9fdb02aee81d8452c80d28f97a6d0959e6cee101e"}, - {file = "fsspec-2024.6.1.tar.gz", hash = "sha256:fad7d7e209dd4c1208e3bbfda706620e0da5142bebbd9c384afb95b07e798e49"}, + {file = "fsspec-2024.9.0-py3-none-any.whl", hash = "sha256:a0947d552d8a6efa72cc2c730b12c41d043509156966cca4fb157b0f2a0c574b"}, + {file = "fsspec-2024.9.0.tar.gz", hash = "sha256:4b0afb90c2f21832df142f292649035d80b421f60a9e1c027802e5a0da2b04e8"}, ] [package.dependencies] @@ -2427,13 +2442,13 @@ protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4 [[package]] name = "google-api-core" -version = "2.21.0" +version = "2.22.0" description = "Google API client core library" optional = false python-versions = ">=3.7" files = [ - {file = "google_api_core-2.21.0-py3-none-any.whl", hash = "sha256:6869eacb2a37720380ba5898312af79a4d30b8bca1548fb4093e0697dc4bdf5d"}, - {file = "google_api_core-2.21.0.tar.gz", hash = "sha256:4a152fd11a9f774ea606388d423b68aa7e6d6a0ffe4c8266f74979613ec09f81"}, + {file = "google_api_core-2.22.0-py3-none-any.whl", hash = "sha256:a6652b6bd51303902494998626653671703c420f6f4c88cfd3f50ed723e9d021"}, + {file = "google_api_core-2.22.0.tar.gz", hash = "sha256:26f8d76b96477db42b55fd02a33aae4a42ec8b86b98b94969b7333a2c828bf35"}, ] [package.dependencies] @@ -2606,13 +2621,13 @@ grpc = ["grpcio (>=1.38.0,<2.0dev)", "grpcio-status (>=1.38.0,<2.0.dev0)"] [[package]] name = "google-cloud-resource-manager" -version = "1.12.5" +version = "1.13.0" description = "Google Cloud Resource Manager API client library" optional = false python-versions = ">=3.7" files = [ - {file = "google_cloud_resource_manager-1.12.5-py2.py3-none-any.whl", hash = "sha256:2708a718b45c79464b7b21559c701b5c92e6b0b1ab2146d0a256277a623dc175"}, - {file = "google_cloud_resource_manager-1.12.5.tar.gz", hash = "sha256:b7af4254401ed4efa3aba3a929cb3ddb803fa6baf91a78485e45583597de5891"}, + {file = "google_cloud_resource_manager-1.13.0-py2.py3-none-any.whl", hash = "sha256:33beb4528c2b7aee7a97ed843710581a7b4a27f3dd1fa41a0bf3359b3d68853f"}, + {file = "google_cloud_resource_manager-1.13.0.tar.gz", hash = "sha256:ae4bf69443f14b37007d4d84150115b0942e8b01650fd7a1fc6ff4dc1760e5c4"}, ] [package.dependencies] @@ -2775,7 +2790,7 @@ name = "graspologic" version = "3.4.1" description = "A set of Python modules for graph statistics" optional = false -python-versions = "<3.13,>=3.9" +python-versions = ">=3.9,<3.13" files = [ {file = "graspologic-3.4.1-py3-none-any.whl", hash = "sha256:c6563e087eda599bad1de831d4b7321c0daa7a82f4e85a7d7737ff67e07cdda2"}, {file = "graspologic-3.4.1.tar.gz", hash = "sha256:7561f0b852a2bccd351bff77e8db07d9892f9dfa35a420fdec01690e4fdc8075"}, @@ -3420,13 +3435,13 @@ type = ["pytest-mypy"] [[package]] name = "infinity-emb" -version = "0.0.51" -description = "Infinity is a high-throughput, low-latency REST API for serving vector embeddings, supporting a wide range of sentence-transformer models and frameworks." +version = "0.0.66" +description = "Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip." optional = false -python-versions = "<4,>=3.9" +python-versions = ">=3.9,<4" files = [ - {file = "infinity_emb-0.0.51-py3-none-any.whl", hash = "sha256:d4384e398189b619699c300dfc144160344e5de2c7b57da5831881688d6c2842"}, - {file = "infinity_emb-0.0.51.tar.gz", hash = "sha256:11b09959c15f9456cbfc9f3d8516ad97485ef54dc5c68f5d9d6279083b8fba9d"}, + {file = "infinity_emb-0.0.66-py3-none-any.whl", hash = "sha256:1dc6ed9fa48e6cbe83650a7583dbbb4bc393900c39c326bb0aff2ddc090ac018"}, + {file = "infinity_emb-0.0.66.tar.gz", hash = "sha256:9c9a361ccebf8e8f626c1f685286518d03d0c35e7d14179ae7c2500b4fc68b98"}, ] [package.dependencies] @@ -3435,17 +3450,18 @@ huggingface_hub = "*" numpy = ">=1.20.0,<2" [package.extras] -all = ["ctranslate2 (>=4.0.0,<5.0.0)", "diskcache", "einops", "fastapi (>=0.103.2)", "optimum[onnxruntime] (>=1.16.2)", "orjson (>=3.9.8,!=3.10.0)", "pillow", "prometheus-fastapi-instrumentator (>=6.1.0)", "pydantic (>=2.4.0,<3)", "rich (>=13,<14)", "sentence-transformers (>=3.0.1,<4.0.0)", "timm", "torch (>=2.2.1)", "typer[all] (>=0.9.0,<0.10.0)", "uvicorn[standard] (>=0.23.2,<0.24.0)"] +all = ["colpali-engine (>=0.3.1,<0.4.0)", "ctranslate2 (>=4.0.0,<5.0.0)", "diskcache", "einops", "fastapi (>=0.103.2)", "optimum[onnxruntime] (>=1.23.1)", "orjson (>=3.9.8,!=3.10.0)", "pillow", "posthog", "prometheus-fastapi-instrumentator (>=6.1.0)", "pydantic (>=2.4.0,<3)", "rich (>=13,<14)", "sentence-transformers (>=3.0.1,<4.0.0)", "soundfile (>=0.12.1,<0.13.0)", "timm", "torch (>=2.2.1)", "torchvision", "typer[all] (>=0.12.5,<0.13.0)", "uvicorn[standard] (>=0.32.0,<0.33.0)"] +audio = ["soundfile (>=0.12.1,<0.13.0)"] cache = ["diskcache"] ct2 = ["ctranslate2 (>=4.0.0,<5.0.0)", "sentence-transformers (>=3.0.1,<4.0.0)", "torch (>=2.2.1)", "transformers (>4.34.0,<=5.0)"] einops = ["einops"] logging = ["rich (>=13,<14)"] onnxruntime-gpu = ["onnxruntime-gpu"] -optimum = ["optimum[onnxruntime] (>=1.16.2)"] -server = ["fastapi (>=0.103.2)", "orjson (>=3.9.8,!=3.10.0)", "prometheus-fastapi-instrumentator (>=6.1.0)", "pydantic (>=2.4.0,<3)", "rich (>=13,<14)", "typer[all] (>=0.9.0,<0.10.0)", "uvicorn[standard] (>=0.23.2,<0.24.0)"] +optimum = ["optimum[onnxruntime] (>=1.23.1)"] +server = ["fastapi (>=0.103.2)", "orjson (>=3.9.8,!=3.10.0)", "posthog", "prometheus-fastapi-instrumentator (>=6.1.0)", "pydantic (>=2.4.0,<3)", "rich (>=13,<14)", "typer[all] (>=0.12.5,<0.13.0)", "uvicorn[standard] (>=0.32.0,<0.33.0)"] tensorrt = ["tensorrt (>=8.6.1,<9.0.0)"] torch = ["sentence-transformers (>=3.0.1,<4.0.0)", "torch (>=2.2.1)"] -vision = ["pillow", "timm"] +vision = ["colpali-engine (>=0.3.1,<0.4.0)", "pillow", "timm", "torchvision"] [[package]] name = "iniconfig" @@ -3833,7 +3849,7 @@ name = "litellm" version = "1.48.0" description = "Library to easily interface with LLM API providers" optional = false -python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" +python-versions = ">=3.8, !=2.7.*, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*, !=3.7.*" files = [ {file = "litellm-1.48.0-py3-none-any.whl", hash = "sha256:7765e8a92069778f5fc66aacfabd0e2f8ec8d74fb117f5e475567d89b0d376b9"}, {file = "litellm-1.48.0.tar.gz", hash = "sha256:31a9b8a25a9daf44c24ddc08bf74298da920f2c5cea44135e5061278d0aa6fc9"}, @@ -4732,13 +4748,13 @@ biothings-client = ">=0.2.6" [[package]] name = "networkx" -version = "3.4.1" +version = "3.4.2" description = "Python package for creating and manipulating graphs and networks" optional = false python-versions = ">=3.10" files = [ - {file = "networkx-3.4.1-py3-none-any.whl", hash = "sha256:e30a87b48c9a6a7cc220e732bffefaee585bdb166d13377734446ce1a0620eed"}, - {file = "networkx-3.4.1.tar.gz", hash = "sha256:f9df45e85b78f5bd010993e897b4f1fdb242c11e015b101bd951e5c0e29982d8"}, + {file = "networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f"}, + {file = "networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1"}, ] [package.extras] @@ -5158,13 +5174,13 @@ numpy = {version = ">=1.26.0", markers = "python_version >= \"3.12\""} [[package]] name = "openpyxl" -version = "3.1.2" +version = "3.1.5" description = "A Python library to read/write Excel 2010 xlsx/xlsm files" optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" files = [ - {file = "openpyxl-3.1.2-py2.py3-none-any.whl", hash = "sha256:f91456ead12ab3c6c2e9491cf33ba6d08357d802192379bb482f1033ade496f5"}, - {file = "openpyxl-3.1.2.tar.gz", hash = "sha256:a6f5977418eff3b2d5500d54d9db50c8277a368436f4e4f8ddb1be3422870184"}, + {file = "openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2"}, + {file = "openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"}, ] [package.dependencies] @@ -5172,68 +5188,69 @@ et-xmlfile = "*" [[package]] name = "orjson" -version = "3.10.9" +version = "3.10.10" description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" optional = false python-versions = ">=3.8" files = [ - {file = "orjson-3.10.9-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:a377186a11b48c55969e34f0aa414c2826a234f212d6f2b312ba512e3cdb2c6f"}, - {file = "orjson-3.10.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bf37bf0ca538065c34efe1803378b2dadd7e05b06610a086c2857f15ee59e12"}, - {file = "orjson-3.10.9-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7d9d83a91168aa48309acba804e393b7d9216b66f15e38f339b9fbb00db8986d"}, - {file = "orjson-3.10.9-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0014038a17a1fe273da0a5489787677ef5a64566ab383ad6d929e44ed5683f4"}, - {file = "orjson-3.10.9-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d6ae1b1733e4528e45675ed09a732b6ac37d716bce2facaf467f84ce774adecd"}, - {file = "orjson-3.10.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe91c2259c4a859356b6db1c6e649b40577492f66d483da8b8af6da0f87c00e3"}, - {file = "orjson-3.10.9-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a04f912c32463386ba117591c99a3d9e40b3b69bed9c5123d89dff06f0f5a4b0"}, - {file = "orjson-3.10.9-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ae82ca347829ca47431767b079f96bb977f592189250ccdede676339a80c8982"}, - {file = "orjson-3.10.9-cp310-none-win32.whl", hash = "sha256:fd5083906825d7f5d23089425ce5424d783d6294020bcabb8518a3e1f97833e5"}, - {file = "orjson-3.10.9-cp310-none-win_amd64.whl", hash = "sha256:e9ff9521b5be0340c8e686bcfe2619777fd7583f71e7b494601cc91ad3919d2e"}, - {file = "orjson-3.10.9-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:f3bd9df47385b8fabb3b2ee1e83f9960b8accc1905be971a1c257f16c32b491e"}, - {file = "orjson-3.10.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4948961b6bce1e2086b2cf0b56cc454cdab589d40c7f85be71fb5a5556c51d3"}, - {file = "orjson-3.10.9-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0a9fc7a6cf2b229ddc323e136df13b3fb4466c50d84ed600cd0898223dd2fea3"}, - {file = "orjson-3.10.9-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2314846e1029a2d2b899140f350eaaf3a73281df43ba84ac44d94ca861b5b269"}, - {file = "orjson-3.10.9-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f52d993504827503411df2d60e60acf52885561458d6273f99ecd172f31c4352"}, - {file = "orjson-3.10.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e29bbf08d907756c145a3a3a1f7ce2f11f15e3edbd3342842589d6030981b76f"}, - {file = "orjson-3.10.9-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7ae82992c00b480c3cc7dac6739324554be8c5d8e858a90044928506a3333ef4"}, - {file = "orjson-3.10.9-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6fdf8d32b6d94019dc15163542d345e9ce4c4661f56b318608aa3088a1a3a23b"}, - {file = "orjson-3.10.9-cp311-none-win32.whl", hash = "sha256:01f5fef452b4d7615f2e94153479370a4b59e0c964efb32dd902978f807a45cd"}, - {file = "orjson-3.10.9-cp311-none-win_amd64.whl", hash = "sha256:95361c4197c7ce9afdf56255de6f4e2474c39d16a277cce31d1b99a2520486d8"}, - {file = "orjson-3.10.9-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:43ad5560db54331c007dc38be5ba7706cb72974a29ae8227019d89305d750a6f"}, - {file = "orjson-3.10.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1471c3274b1a4a9b8f4b9ed6effaea9ad885796373797515c44b365b375c256d"}, - {file = "orjson-3.10.9-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:41d8cac575acd15918903d74cfaabb5dbe57b357b93341332f647d1013928dcc"}, - {file = "orjson-3.10.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2920c8754f1aedc98bd357ec172af18ce48f5f1017a92244c85fe41d16d3c6e0"}, - {file = "orjson-3.10.9-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c7fa3ff6a0d9d15a0d0d2254cca16cd919156a18423654ce5574591392fe9914"}, - {file = "orjson-3.10.9-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1e91b90c0c26bd79593967c1adef421bcff88c9e723d49c93bb7ad8af80bc6b"}, - {file = "orjson-3.10.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f11949024f785ace1a516db32fa6255f6227226b2c988abf66f5aee61d43d8f7"}, - {file = "orjson-3.10.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:060e020d85d0ec145bc1b536b1fd9c10a0519c91991ead9724d6f759ebe26b9a"}, - {file = "orjson-3.10.9-cp312-none-win32.whl", hash = "sha256:71f73439999fe662843da3607cdf6e75b1551c330f487e5801d463d969091c63"}, - {file = "orjson-3.10.9-cp312-none-win_amd64.whl", hash = "sha256:12e2efe81356b8448f1cd130f8d75d3718de583112d71f2e2f8baa81bd835bb9"}, - {file = "orjson-3.10.9-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:0ab6e3ad10e964392f0e838751bcce2ef9c8fa8be7deddffff83088e5791566d"}, - {file = "orjson-3.10.9-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68ef65223baab00f469c8698f771ab3e6ccf6af2a987e77de5b566b4ec651150"}, - {file = "orjson-3.10.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6f130848205fea90a2cb9fa2b11cafff9a9f31f4efad225800bc8b9e4a702f24"}, - {file = "orjson-3.10.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2ea7a98f3295ed8adb6730a5788cc78dafea28300d19932a1d2143457f7db802"}, - {file = "orjson-3.10.9-cp313-none-win32.whl", hash = "sha256:bdce39f96149a74fddeb2674c54f1da5e57724d32952eb6df2ac719b66d453cc"}, - {file = "orjson-3.10.9-cp313-none-win_amd64.whl", hash = "sha256:d11383701d4b58e795039b662ada46987744293d57bfa2719e7379b8d67bc796"}, - {file = "orjson-3.10.9-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:1c3a1e845916a3739ab4162bb48dee66e0e727a19faf397176a7db0d9826cc3c"}, - {file = "orjson-3.10.9-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:063ca59d93d93d1387f0c4bb766c6d4f5b0e423fe7c366d0bd4401a56d1669d1"}, - {file = "orjson-3.10.9-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:938b7fcd79cf06fe348fb24b6163fbaa2fdc9fbed8b1f06318f24467f1487e63"}, - {file = "orjson-3.10.9-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cc32a9e43c7693011ccde6f8eff8cba75ca0d2a55de11092faa4a716101e67f5"}, - {file = "orjson-3.10.9-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1b3069b7e2f57f3eef2282029b9c2ba21f08a55f1018e483663a3356f046af4c"}, - {file = "orjson-3.10.9-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4289b5d1f88fd05dcafdd7a1f3b17bb722e77712b7618f98e86bdda560e0a1a"}, - {file = "orjson-3.10.9-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:74f5a7a7f282d326be71b722b0c350da7af6f5f15b9378da177e0e4a09bd91a3"}, - {file = "orjson-3.10.9-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:80e0c013e50cf7198319d8137931684eb9f32daa067e8276d9dbdd4010bb4add"}, - {file = "orjson-3.10.9-cp38-none-win32.whl", hash = "sha256:9d989152df8f60a76867354e0e08d896292ab9fb96a7ef89a5b3838de174522c"}, - {file = "orjson-3.10.9-cp38-none-win_amd64.whl", hash = "sha256:485358fe9892d6bfd88e5885b66bf88496e1842c8f35f61682ff9928b12a6cf0"}, - {file = "orjson-3.10.9-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:ca54e6f320e33c8a6e471c424ee16576361d905c15d69e134c2906d3fcb31795"}, - {file = "orjson-3.10.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f9a9eb03a29c9b30b6c8bb35e5fa20d96589a76e0042005be59b7c3af10a7e43"}, - {file = "orjson-3.10.9-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:731e8859fc99b398c286320726906404091141e9223dd5e9e6917f7e32e1cc68"}, - {file = "orjson-3.10.9-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:75b061c11f5aab979a95927a76394b4a85e3e4d63d0a2a16b56a4f7c6503afab"}, - {file = "orjson-3.10.9-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b61b08f6397f004570fd6a840f4a58946b63b4c7029408cdedb45fe85c7d17f7"}, - {file = "orjson-3.10.9-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c4f5e0360b7f0aba91dafe12469108109a0e8973956d4a9865ca262a6881406"}, - {file = "orjson-3.10.9-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:e403429e2947a059545e305d97e4b0eb90d3bb44b396d6f327d7ae2018391e13"}, - {file = "orjson-3.10.9-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:0e492b93e122264c2dc78700859122631a4715bda88fabf57d9226954cfe7ec5"}, - {file = "orjson-3.10.9-cp39-none-win32.whl", hash = "sha256:bfba9605e85bfd19b83a21c2c25c2bed2000d5f097f3fa3ad5b5f8a7263a3148"}, - {file = "orjson-3.10.9-cp39-none-win_amd64.whl", hash = "sha256:77d277fa138d4bf145e8b24042004891c188c52ac8492724a183f42b0031cf0c"}, - {file = "orjson-3.10.9.tar.gz", hash = "sha256:c378074e0c46035dc66e57006993233ec66bf8487d501bab41649b4b7289ed4d"}, + {file = "orjson-3.10.10-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:b788a579b113acf1c57e0a68e558be71d5d09aa67f62ca1f68e01117e550a998"}, + {file = "orjson-3.10.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:804b18e2b88022c8905bb79bd2cbe59c0cd014b9328f43da8d3b28441995cda4"}, + {file = "orjson-3.10.10-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9972572a1d042ec9ee421b6da69f7cc823da5962237563fa548ab17f152f0b9b"}, + {file = "orjson-3.10.10-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc6993ab1c2ae7dd0711161e303f1db69062955ac2668181bfdf2dd410e65258"}, + {file = "orjson-3.10.10-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d78e4cacced5781b01d9bc0f0cd8b70b906a0e109825cb41c1b03f9c41e4ce86"}, + {file = "orjson-3.10.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e6eb2598df518281ba0cbc30d24c5b06124ccf7e19169e883c14e0831217a0bc"}, + {file = "orjson-3.10.10-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:23776265c5215ec532de6238a52707048401a568f0fa0d938008e92a147fe2c7"}, + {file = "orjson-3.10.10-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8cc2a654c08755cef90b468ff17c102e2def0edd62898b2486767204a7f5cc9c"}, + {file = "orjson-3.10.10-cp310-none-win32.whl", hash = "sha256:081b3fc6a86d72efeb67c13d0ea7c030017bd95f9868b1e329a376edc456153b"}, + {file = "orjson-3.10.10-cp310-none-win_amd64.whl", hash = "sha256:ff38c5fb749347768a603be1fb8a31856458af839f31f064c5aa74aca5be9efe"}, + {file = "orjson-3.10.10-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:879e99486c0fbb256266c7c6a67ff84f46035e4f8749ac6317cc83dacd7f993a"}, + {file = "orjson-3.10.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:019481fa9ea5ff13b5d5d95e6fd5ab25ded0810c80b150c2c7b1cc8660b662a7"}, + {file = "orjson-3.10.10-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0dd57eff09894938b4c86d4b871a479260f9e156fa7f12f8cad4b39ea8028bb5"}, + {file = "orjson-3.10.10-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dbde6d70cd95ab4d11ea8ac5e738e30764e510fc54d777336eec09bb93b8576c"}, + {file = "orjson-3.10.10-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b2625cb37b8fb42e2147404e5ff7ef08712099197a9cd38895006d7053e69d6"}, + {file = "orjson-3.10.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbf3c20c6a7db69df58672a0d5815647ecf78c8e62a4d9bd284e8621c1fe5ccb"}, + {file = "orjson-3.10.10-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:75c38f5647e02d423807d252ce4528bf6a95bd776af999cb1fb48867ed01d1f6"}, + {file = "orjson-3.10.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:23458d31fa50ec18e0ec4b0b4343730928296b11111df5f547c75913714116b2"}, + {file = "orjson-3.10.10-cp311-none-win32.whl", hash = "sha256:2787cd9dedc591c989f3facd7e3e86508eafdc9536a26ec277699c0aa63c685b"}, + {file = "orjson-3.10.10-cp311-none-win_amd64.whl", hash = "sha256:6514449d2c202a75183f807bc755167713297c69f1db57a89a1ef4a0170ee269"}, + {file = "orjson-3.10.10-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:8564f48f3620861f5ef1e080ce7cd122ee89d7d6dacf25fcae675ff63b4d6e05"}, + {file = "orjson-3.10.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5bf161a32b479034098c5b81f2608f09167ad2fa1c06abd4e527ea6bf4837a9"}, + {file = "orjson-3.10.10-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:68b65c93617bcafa7f04b74ae8bc2cc214bd5cb45168a953256ff83015c6747d"}, + {file = "orjson-3.10.10-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e8e28406f97fc2ea0c6150f4c1b6e8261453318930b334abc419214c82314f85"}, + {file = "orjson-3.10.10-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e4d0d9fe174cc7a5bdce2e6c378bcdb4c49b2bf522a8f996aa586020e1b96cee"}, + {file = "orjson-3.10.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3be81c42f1242cbed03cbb3973501fcaa2675a0af638f8be494eaf37143d999"}, + {file = "orjson-3.10.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:65f9886d3bae65be026219c0a5f32dbbe91a9e6272f56d092ab22561ad0ea33b"}, + {file = "orjson-3.10.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:730ed5350147db7beb23ddaf072f490329e90a1d059711d364b49fe352ec987b"}, + {file = "orjson-3.10.10-cp312-none-win32.whl", hash = "sha256:a8f4bf5f1c85bea2170800020d53a8877812892697f9c2de73d576c9307a8a5f"}, + {file = "orjson-3.10.10-cp312-none-win_amd64.whl", hash = "sha256:384cd13579a1b4cd689d218e329f459eb9ddc504fa48c5a83ef4889db7fd7a4f"}, + {file = "orjson-3.10.10-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:44bffae68c291f94ff5a9b4149fe9d1bdd4cd0ff0fb575bcea8351d48db629a1"}, + {file = "orjson-3.10.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e27b4c6437315df3024f0835887127dac2a0a3ff643500ec27088d2588fa5ae1"}, + {file = "orjson-3.10.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bca84df16d6b49325a4084fd8b2fe2229cb415e15c46c529f868c3387bb1339d"}, + {file = "orjson-3.10.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c14ce70e8f39bd71f9f80423801b5d10bf93d1dceffdecd04df0f64d2c69bc01"}, + {file = "orjson-3.10.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:24ac62336da9bda1bd93c0491eff0613003b48d3cb5d01470842e7b52a40d5b4"}, + {file = "orjson-3.10.10-cp313-none-win32.whl", hash = "sha256:eb0a42831372ec2b05acc9ee45af77bcaccbd91257345f93780a8e654efc75db"}, + {file = "orjson-3.10.10-cp313-none-win_amd64.whl", hash = "sha256:f0c4f37f8bf3f1075c6cc8dd8a9f843689a4b618628f8812d0a71e6968b95ffd"}, + {file = "orjson-3.10.10-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:829700cc18503efc0cf502d630f612884258020d98a317679cd2054af0259568"}, + {file = "orjson-3.10.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0ceb5e0e8c4f010ac787d29ae6299846935044686509e2f0f06ed441c1ca949"}, + {file = "orjson-3.10.10-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0c25908eb86968613216f3db4d3003f1c45d78eb9046b71056ca327ff92bdbd4"}, + {file = "orjson-3.10.10-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:218cb0bc03340144b6328a9ff78f0932e642199ac184dd74b01ad691f42f93ff"}, + {file = "orjson-3.10.10-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e2277ec2cea3775640dc81ab5195bb5b2ada2fe0ea6eee4677474edc75ea6785"}, + {file = "orjson-3.10.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:848ea3b55ab5ccc9d7bbd420d69432628b691fba3ca8ae3148c35156cbd282aa"}, + {file = "orjson-3.10.10-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:e3e67b537ac0c835b25b5f7d40d83816abd2d3f4c0b0866ee981a045287a54f3"}, + {file = "orjson-3.10.10-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:7948cfb909353fce2135dcdbe4521a5e7e1159484e0bb024c1722f272488f2b8"}, + {file = "orjson-3.10.10-cp38-none-win32.whl", hash = "sha256:78bee66a988f1a333dc0b6257503d63553b1957889c17b2c4ed72385cd1b96ae"}, + {file = "orjson-3.10.10-cp38-none-win_amd64.whl", hash = "sha256:f1d647ca8d62afeb774340a343c7fc023efacfd3a39f70c798991063f0c681dd"}, + {file = "orjson-3.10.10-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:5a059afddbaa6dd733b5a2d76a90dbc8af790b993b1b5cb97a1176ca713b5df8"}, + {file = "orjson-3.10.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f9b5c59f7e2a1a410f971c5ebc68f1995822837cd10905ee255f96074537ee6"}, + {file = "orjson-3.10.10-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d5ef198bafdef4aa9d49a4165ba53ffdc0a9e1c7b6f76178572ab33118afea25"}, + {file = "orjson-3.10.10-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aaf29ce0bb5d3320824ec3d1508652421000ba466abd63bdd52c64bcce9eb1fa"}, + {file = "orjson-3.10.10-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dddd5516bcc93e723d029c1633ae79c4417477b4f57dad9bfeeb6bc0315e654a"}, + {file = "orjson-3.10.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a12f2003695b10817f0fa8b8fca982ed7f5761dcb0d93cff4f2f9f6709903fd7"}, + {file = "orjson-3.10.10-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:672f9874a8a8fb9bb1b771331d31ba27f57702c8106cdbadad8bda5d10bc1019"}, + {file = "orjson-3.10.10-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1dcbb0ca5fafb2b378b2c74419480ab2486326974826bbf6588f4dc62137570a"}, + {file = "orjson-3.10.10-cp39-none-win32.whl", hash = "sha256:d9bbd3a4b92256875cb058c3381b782649b9a3c68a4aa9a2fff020c2f9cfc1be"}, + {file = "orjson-3.10.10-cp39-none-win_amd64.whl", hash = "sha256:766f21487a53aee8524b97ca9582d5c6541b03ab6210fbaf10142ae2f3ced2aa"}, + {file = "orjson-3.10.10.tar.gz", hash = "sha256:37949383c4df7b4337ce82ee35b6d7471e55195efa7dcb45ab8226ceadb0fe3b"}, ] [[package]] @@ -5298,40 +5315,53 @@ files = [ [[package]] name = "pandas" -version = "2.2.2" +version = "2.2.3" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.9" files = [ - {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"}, - {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"}, - {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"}, - {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"}, - {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"}, - {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99"}, - {file = "pandas-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772"}, - {file = "pandas-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288"}, - {file = "pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151"}, - {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b"}, - {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee"}, - {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db"}, - {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1"}, - {file = "pandas-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24"}, - {file = "pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef"}, - {file = "pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce"}, - {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad"}, - {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad"}, - {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76"}, - {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"}, - {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"}, - {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"}, - {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"}, - {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"}, - {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"}, - {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"}, - {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57"}, - {file = "pandas-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4"}, - {file = "pandas-2.2.2.tar.gz", hash = "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54"}, + {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"}, + {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"}, + {file = "pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed"}, + {file = "pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57"}, + {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42"}, + {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f"}, + {file = "pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645"}, + {file = "pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039"}, + {file = "pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd"}, + {file = "pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698"}, + {file = "pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc"}, + {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3"}, + {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32"}, + {file = "pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5"}, + {file = "pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9"}, + {file = "pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4"}, + {file = "pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3"}, + {file = "pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319"}, + {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8"}, + {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a"}, + {file = "pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13"}, + {file = "pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015"}, + {file = "pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28"}, + {file = "pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0"}, + {file = "pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24"}, + {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659"}, + {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb"}, + {file = "pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d"}, + {file = "pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468"}, + {file = "pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18"}, + {file = "pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2"}, + {file = "pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4"}, + {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d"}, + {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a"}, + {file = "pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc6b93f9b966093cb0fd62ff1a7e4c09e6d546ad7c1de191767baffc57628f39"}, + {file = "pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5dbca4c1acd72e8eeef4753eeca07de9b1db4f398669d5994086f788a5d7cc30"}, + {file = "pandas-2.2.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8cd6d7cc958a3910f934ea8dbdf17b2364827bb4dafc38ce6eef6bb3d65ff09c"}, + {file = "pandas-2.2.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99df71520d25fade9db7c1076ac94eb994f4d2673ef2aa2e86ee039b6746d20c"}, + {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31d0ced62d4ea3e231a9f228366919a5ea0b07440d9d4dac345376fd8e1477ea"}, + {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7eee9e7cea6adf3e3d24e304ac6b8300646e2a5d1cd3a3c2abed9101b0846761"}, + {file = "pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e"}, + {file = "pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667"}, ] [package.dependencies] @@ -5608,6 +5638,47 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "polars" +version = "1.12.0" +description = "Blazingly fast DataFrame library" +optional = false +python-versions = ">=3.9" +files = [ + {file = "polars-1.12.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:8f3c4e4e423c373dda07b4c8a7ff12aa02094b524767d0ca306b1eba67f2d99e"}, + {file = "polars-1.12.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:aa6f9862f0cec6353243920d9b8d858c21ec8f25f91af203dea6ff91980e140d"}, + {file = "polars-1.12.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afb03647b5160737d2119532ee8ffe825de1d19d87f81bbbb005131786f7d59b"}, + {file = "polars-1.12.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:ea96aba5eb3dab8f0e6abf05ab3fc2136b329261860ef8661d20f5456a2d78e0"}, + {file = "polars-1.12.0-cp39-abi3-win_amd64.whl", hash = "sha256:a228a4b320a36d03a9ec9dfe7241b6d80a2f119b2dceb1da953166655e4cf43c"}, + {file = "polars-1.12.0.tar.gz", hash = "sha256:fb5c92de1a8f7d0a3f923fe48ea89eb518bdf55315ae917012350fa072bd64f4"}, +] + +[package.extras] +adbc = ["adbc-driver-manager[dbapi]", "adbc-driver-sqlite[dbapi]"] +all = ["polars[async,cloudpickle,database,deltalake,excel,fsspec,graph,iceberg,numpy,pandas,plot,pyarrow,pydantic,style,timezone]"] +async = ["gevent"] +calamine = ["fastexcel (>=0.9)"] +cloudpickle = ["cloudpickle"] +connectorx = ["connectorx (>=0.3.2)"] +database = ["nest-asyncio", "polars[adbc,connectorx,sqlalchemy]"] +deltalake = ["deltalake (>=0.15.0)"] +excel = ["polars[calamine,openpyxl,xlsx2csv,xlsxwriter]"] +fsspec = ["fsspec"] +gpu = ["cudf-polars-cu12"] +graph = ["matplotlib"] +iceberg = ["pyiceberg (>=0.5.0)"] +numpy = ["numpy (>=1.16.0)"] +openpyxl = ["openpyxl (>=3.0.0)"] +pandas = ["pandas", "polars[pyarrow]"] +plot = ["altair (>=5.4.0)"] +pyarrow = ["pyarrow (>=7.0.0)"] +pydantic = ["pydantic"] +sqlalchemy = ["polars[pandas]", "sqlalchemy"] +style = ["great-tables (>=0.8.0)"] +timezone = ["backports-zoneinfo", "tzdata"] +xlsx2csv = ["xlsx2csv (>=0.8.0)"] +xlsxwriter = ["xlsxwriter"] + [[package]] name = "pooch" version = "1.8.2" @@ -5846,13 +5917,13 @@ files = [ [[package]] name = "proto-plus" -version = "1.24.0" +version = "1.25.0" description = "Beautiful, Pythonic protocol buffers." optional = false python-versions = ">=3.7" files = [ - {file = "proto-plus-1.24.0.tar.gz", hash = "sha256:30b72a5ecafe4406b0d339db35b56c4059064e69227b8c3bda7462397f966445"}, - {file = "proto_plus-1.24.0-py3-none-any.whl", hash = "sha256:402576830425e5f6ce4c2a6702400ac79897dab0b4343821aa5188b0fab81a12"}, + {file = "proto_plus-1.25.0-py3-none-any.whl", hash = "sha256:c91fc4a65074ade8e458e95ef8bac34d4008daa7cce4a12d6707066fca648961"}, + {file = "proto_plus-1.25.0.tar.gz", hash = "sha256:fbb17f57f7bd05a68b7707e745e26528b0b3c34e378db91eef93912c54982d91"}, ] [package.dependencies] @@ -5886,7 +5957,7 @@ name = "psutil" version = "6.1.0" description = "Cross-platform lib for process and system monitoring in Python." optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ {file = "psutil-6.1.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ff34df86226c0227c52f38b919213157588a678d049688eded74c76c8ba4a5d0"}, {file = "psutil-6.1.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:c0e0c00aa18ca2d3b2b991643b799a15fc8f0563d2ebb6040f64ce8dc027b942"}, @@ -5908,8 +5979,8 @@ files = [ ] [package.extras] -dev = ["black", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest-cov", "requests", "rstcheck", "ruff", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "virtualenv", "wheel"] -test = ["pytest", "pytest-xdist", "setuptools"] +dev = ["black", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest-cov", "requests", "rstcheck", "ruff", "sphinx", "sphinx-rtd-theme", "toml-sort", "twine", "virtualenv", "wheel"] +test = ["enum34", "futures", "ipaddress", "mock (==1.0.1)", "pytest (==4.6.11)", "pytest-xdist", "setuptools", "unittest2"] [[package]] name = "psycopg2-binary" @@ -6018,51 +6089,54 @@ files = [ [[package]] name = "pyarrow" -version = "17.0.0" +version = "18.0.0" description = "Python library for Apache Arrow" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "pyarrow-17.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:a5c8b238d47e48812ee577ee20c9a2779e6a5904f1708ae240f53ecbee7c9f07"}, - {file = "pyarrow-17.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db023dc4c6cae1015de9e198d41250688383c3f9af8f565370ab2b4cb5f62655"}, - {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da1e060b3876faa11cee287839f9cc7cdc00649f475714b8680a05fd9071d545"}, - {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75c06d4624c0ad6674364bb46ef38c3132768139ddec1c56582dbac54f2663e2"}, - {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:fa3c246cc58cb5a4a5cb407a18f193354ea47dd0648194e6265bd24177982fe8"}, - {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:f7ae2de664e0b158d1607699a16a488de3d008ba99b3a7aa5de1cbc13574d047"}, - {file = "pyarrow-17.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:5984f416552eea15fd9cee03da53542bf4cddaef5afecefb9aa8d1010c335087"}, - {file = "pyarrow-17.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:1c8856e2ef09eb87ecf937104aacfa0708f22dfeb039c363ec99735190ffb977"}, - {file = "pyarrow-17.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e19f569567efcbbd42084e87f948778eb371d308e137a0f97afe19bb860ccb3"}, - {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b244dc8e08a23b3e352899a006a26ae7b4d0da7bb636872fa8f5884e70acf15"}, - {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b72e87fe3e1db343995562f7fff8aee354b55ee83d13afba65400c178ab2597"}, - {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dc5c31c37409dfbc5d014047817cb4ccd8c1ea25d19576acf1a001fe07f5b420"}, - {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e3343cb1e88bc2ea605986d4b94948716edc7a8d14afd4e2c097232f729758b4"}, - {file = "pyarrow-17.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:a27532c38f3de9eb3e90ecab63dfda948a8ca859a66e3a47f5f42d1e403c4d03"}, - {file = "pyarrow-17.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9b8a823cea605221e61f34859dcc03207e52e409ccf6354634143e23af7c8d22"}, - {file = "pyarrow-17.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f1e70de6cb5790a50b01d2b686d54aaf73da01266850b05e3af2a1bc89e16053"}, - {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0071ce35788c6f9077ff9ecba4858108eebe2ea5a3f7cf2cf55ebc1dbc6ee24a"}, - {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:757074882f844411fcca735e39aae74248a1531367a7c80799b4266390ae51cc"}, - {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ba11c4f16976e89146781a83833df7f82077cdab7dc6232c897789343f7891a"}, - {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b0c6ac301093b42d34410b187bba560b17c0330f64907bfa4f7f7f2444b0cf9b"}, - {file = "pyarrow-17.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7"}, - {file = "pyarrow-17.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204"}, - {file = "pyarrow-17.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8"}, - {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155"}, - {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145"}, - {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c"}, - {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c"}, - {file = "pyarrow-17.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca"}, - {file = "pyarrow-17.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb"}, - {file = "pyarrow-17.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df"}, - {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687"}, - {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b"}, - {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5"}, - {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda"}, - {file = "pyarrow-17.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204"}, - {file = "pyarrow-17.0.0.tar.gz", hash = "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28"}, -] - -[package.dependencies] -numpy = ">=1.16.6" + {file = "pyarrow-18.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:2333f93260674e185cfbf208d2da3007132572e56871f451ba1a556b45dae6e2"}, + {file = "pyarrow-18.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:4c381857754da44326f3a49b8b199f7f87a51c2faacd5114352fc78de30d3aba"}, + {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:603cd8ad4976568954598ef0a6d4ed3dfb78aff3d57fa8d6271f470f0ce7d34f"}, + {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58a62549a3e0bc9e03df32f350e10e1efb94ec6cf63e3920c3385b26663948ce"}, + {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bc97316840a349485fbb137eb8d0f4d7057e1b2c1272b1a20eebbbe1848f5122"}, + {file = "pyarrow-18.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:2e549a748fa8b8715e734919923f69318c953e077e9c02140ada13e59d043310"}, + {file = "pyarrow-18.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:606e9a3dcb0f52307c5040698ea962685fb1c852d72379ee9412be7de9c5f9e2"}, + {file = "pyarrow-18.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d5795e37c0a33baa618c5e054cd61f586cf76850a251e2b21355e4085def6280"}, + {file = "pyarrow-18.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:5f0510608ccd6e7f02ca8596962afb8c6cc84c453e7be0da4d85f5f4f7b0328a"}, + {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:616ea2826c03c16e87f517c46296621a7c51e30400f6d0a61be645f203aa2b93"}, + {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a1824f5b029ddd289919f354bc285992cb4e32da518758c136271cf66046ef22"}, + {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:6dd1b52d0d58dd8f685ced9971eb49f697d753aa7912f0a8f50833c7a7426319"}, + {file = "pyarrow-18.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:320ae9bd45ad7ecc12ec858b3e8e462578de060832b98fc4d671dee9f10d9954"}, + {file = "pyarrow-18.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:2c992716cffb1088414f2b478f7af0175fd0a76fea80841b1706baa8fb0ebaad"}, + {file = "pyarrow-18.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:e7ab04f272f98ebffd2a0661e4e126036f6936391ba2889ed2d44c5006237802"}, + {file = "pyarrow-18.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:03f40b65a43be159d2f97fd64dc998f769d0995a50c00f07aab58b0b3da87e1f"}, + {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be08af84808dff63a76860847c48ec0416928a7b3a17c2f49a072cac7c45efbd"}, + {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c70c1965cde991b711a98448ccda3486f2a336457cf4ec4dca257a926e149c9"}, + {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:00178509f379415a3fcf855af020e3340254f990a8534294ec3cf674d6e255fd"}, + {file = "pyarrow-18.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:a71ab0589a63a3e987beb2bc172e05f000a5c5be2636b4b263c44034e215b5d7"}, + {file = "pyarrow-18.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:fe92efcdbfa0bcf2fa602e466d7f2905500f33f09eb90bf0bcf2e6ca41b574c8"}, + {file = "pyarrow-18.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:907ee0aa8ca576f5e0cdc20b5aeb2ad4d3953a3b4769fc4b499e00ef0266f02f"}, + {file = "pyarrow-18.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:66dcc216ebae2eb4c37b223feaf82f15b69d502821dde2da138ec5a3716e7463"}, + {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc1daf7c425f58527900876354390ee41b0ae962a73ad0959b9d829def583bb1"}, + {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:871b292d4b696b09120ed5bde894f79ee2a5f109cb84470546471df264cae136"}, + {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:082ba62bdcb939824ba1ce10b8acef5ab621da1f4c4805e07bfd153617ac19d4"}, + {file = "pyarrow-18.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:2c664ab88b9766413197733c1720d3dcd4190e8fa3bbdc3710384630a0a7207b"}, + {file = "pyarrow-18.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:dc892be34dbd058e8d189b47db1e33a227d965ea8805a235c8a7286f7fd17d3a"}, + {file = "pyarrow-18.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:28f9c39a56d2c78bf6b87dcc699d520ab850919d4a8c7418cd20eda49874a2ea"}, + {file = "pyarrow-18.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:f1a198a50c409ab2d009fbf20956ace84567d67f2c5701511d4dd561fae6f32e"}, + {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5bd7fd32e3ace012d43925ea4fc8bd1b02cc6cc1e9813b518302950e89b5a22"}, + {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:336addb8b6f5208be1b2398442c703a710b6b937b1a046065ee4db65e782ff5a"}, + {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:45476490dd4adec5472c92b4d253e245258745d0ccaabe706f8d03288ed60a79"}, + {file = "pyarrow-18.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:b46591222c864e7da7faa3b19455196416cd8355ff6c2cc2e65726a760a3c420"}, + {file = "pyarrow-18.0.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:eb7e3abcda7e1e6b83c2dc2909c8d045881017270a119cc6ee7fdcfe71d02df8"}, + {file = "pyarrow-18.0.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:09f30690b99ce34e0da64d20dab372ee54431745e4efb78ac938234a282d15f9"}, + {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d5ca5d707e158540312e09fd907f9f49bacbe779ab5236d9699ced14d2293b8"}, + {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6331f280c6e4521c69b201a42dd978f60f7e129511a55da9e0bfe426b4ebb8d"}, + {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3ac24b2be732e78a5a3ac0b3aa870d73766dd00beba6e015ea2ea7394f8b4e55"}, + {file = "pyarrow-18.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b30a927c6dff89ee702686596f27c25160dd6c99be5bcc1513a763ae5b1bfc03"}, + {file = "pyarrow-18.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:8f40ec677e942374e3d7f2fad6a67a4c2811a8b975e8703c6fd26d3b168a90e2"}, + {file = "pyarrow-18.0.0.tar.gz", hash = "sha256:a6aa027b1a9d2970cf328ccd6dbe4a996bc13c39fd427f502782f5bdb9ca20f5"}, +] [package.extras] test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"] @@ -6443,12 +6517,12 @@ windows-terminal = ["colorama (>=0.4.6)"] [[package]] name = "pyicu" -version = "2.13.1" +version = "2.14" description = "Python extension wrapping the ICU C++ API" optional = false python-versions = "*" files = [ - {file = "PyICU-2.13.1.tar.gz", hash = "sha256:d4919085eaa07da12bade8ee721e7bbf7ade0151ca0f82946a26c8f4b98cdceb"}, + {file = "PyICU-2.14.tar.gz", hash = "sha256:acc7eb92bd5c554ed577249c6978450a4feda0aa6f01470152b3a7b382a02132"}, ] [[package]] @@ -6520,20 +6594,21 @@ diagrams = ["jinja2", "railroad-diagrams"] [[package]] name = "pypdf" -version = "5.0.1" +version = "5.1.0" description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" optional = false python-versions = ">=3.8" files = [ - {file = "pypdf-5.0.1-py3-none-any.whl", hash = "sha256:ff8a32da6c7a63fea9c32fa4dd837cdd0db7966adf6c14f043e3f12592e992db"}, - {file = "pypdf-5.0.1.tar.gz", hash = "sha256:a361c3c372b4a659f9c8dd438d5ce29a753c79c620dc6e1fd66977651f5547ea"}, + {file = "pypdf-5.1.0-py3-none-any.whl", hash = "sha256:3bd4f503f4ebc58bae40d81e81a9176c400cbbac2ba2d877367595fb524dfdfc"}, + {file = "pypdf-5.1.0.tar.gz", hash = "sha256:425a129abb1614183fd1aca6982f650b47f8026867c0ce7c4b9f281c443d2740"}, ] [package.extras] -crypto = ["PyCryptodome", "cryptography"] +crypto = ["cryptography"] +cryptodome = ["PyCryptodome"] dev = ["black", "flit", "pip-tools", "pre-commit (<2.18.0)", "pytest-cov", "pytest-socket", "pytest-timeout", "pytest-xdist", "wheel"] docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"] -full = ["Pillow (>=8.0.0)", "PyCryptodome", "cryptography"] +full = ["Pillow (>=8.0.0)", "cryptography"] image = ["Pillow (>=8.0.0)"] [[package]] @@ -6715,20 +6790,20 @@ files = [ [[package]] name = "pytest" -version = "8.2.2" +version = "8.3.3" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-8.2.2-py3-none-any.whl", hash = "sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343"}, - {file = "pytest-8.2.2.tar.gz", hash = "sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977"}, + {file = "pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2"}, + {file = "pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181"}, ] [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} iniconfig = "*" packaging = "*" -pluggy = ">=1.5,<2.0" +pluggy = ">=1.5,<2" [package.extras] dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] @@ -7179,13 +7254,13 @@ py = ">=1.4.26,<2.0.0" [[package]] name = "rich" -version = "13.9.2" +version = "13.9.3" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" optional = false python-versions = ">=3.8.0" files = [ - {file = "rich-13.9.2-py3-none-any.whl", hash = "sha256:8c82a3d3f8dcfe9e734771313e606b39d8247bb6b826e196f4914b333b743cf1"}, - {file = "rich-13.9.2.tar.gz", hash = "sha256:51a2c62057461aaf7152b4d611168f93a9fc73068f8ded2790f29fe2b5366d0c"}, + {file = "rich-13.9.3-py3-none-any.whl", hash = "sha256:9836f5096eb2172c9e77df411c1b009bace4193d6a481d534fea75ebba758283"}, + {file = "rich-13.9.3.tar.gz", hash = "sha256:bc1e01b899537598cf02579d2b9f4a415104d3fc439313a7a2c165d76557a08e"}, ] [package.dependencies] @@ -7373,40 +7448,30 @@ files = [ {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:a606ef75a60ecf3d924613892cc603b154178ee25abb3055db5062da811fd969"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd5415dded15c3822597455bc02bcd66e81ef8b7a48cb71a33628fc9fdde39df"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f66efbc1caa63c088dead1c4170d148eabc9b80d95fb75b6c92ac0aad2437d76"}, - {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:22353049ba4181685023b25b5b51a574bce33e7f51c759371a7422dcae5402a6"}, - {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:932205970b9f9991b34f55136be327501903f7c66830e9760a8ffb15b07f05cd"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-win32.whl", hash = "sha256:3eac5a91891ceb88138c113f9db04f3cebdae277f5d44eaa3651a4f573e6a5da"}, {file = "ruamel.yaml.clib-0.2.12-cp310-cp310-win_amd64.whl", hash = "sha256:ab007f2f5a87bd08ab1499bdf96f3d5c6ad4dcfa364884cb4549aa0154b13a28"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:4a6679521a58256a90b0d89e03992c15144c5f3858f40d7c18886023d7943db6"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:d84318609196d6bd6da0edfa25cedfbabd8dbde5140a0a23af29ad4b8f91fb1e"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb43a269eb827806502c7c8efb7ae7e9e9d0573257a46e8e952f4d4caba4f31e"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:811ea1594b8a0fb466172c384267a4e5e367298af6b228931f273b111f17ef52"}, - {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cf12567a7b565cbf65d438dec6cfbe2917d3c1bdddfce84a9930b7d35ea59642"}, - {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7dd5adc8b930b12c8fc5b99e2d535a09889941aa0d0bd06f4749e9a9397c71d2"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-win32.whl", hash = "sha256:bd0a08f0bab19093c54e18a14a10b4322e1eacc5217056f3c063bd2f59853ce4"}, {file = "ruamel.yaml.clib-0.2.12-cp311-cp311-win_amd64.whl", hash = "sha256:a274fb2cb086c7a3dea4322ec27f4cb5cc4b6298adb583ab0e211a4682f241eb"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:20b0f8dc160ba83b6dcc0e256846e1a02d044e13f7ea74a3d1d56ede4e48c632"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:943f32bc9dedb3abff9879edc134901df92cfce2c3d5c9348f172f62eb2d771d"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95c3829bb364fdb8e0332c9931ecf57d9be3519241323c5274bd82f709cebc0c"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:749c16fcc4a2b09f28843cda5a193e0283e47454b63ec4b81eaa2242f50e4ccd"}, - {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bf165fef1f223beae7333275156ab2022cffe255dcc51c27f066b4370da81e31"}, - {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:32621c177bbf782ca5a18ba4d7af0f1082a3f6e517ac2a18b3974d4edf349680"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-win32.whl", hash = "sha256:e8c4ebfcfd57177b572e2040777b8abc537cdef58a2120e830124946aa9b42c5"}, {file = "ruamel.yaml.clib-0.2.12-cp312-cp312-win_amd64.whl", hash = "sha256:0467c5965282c62203273b838ae77c0d29d7638c8a4e3a1c8bdd3602c10904e4"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4c8c5d82f50bb53986a5e02d1b3092b03622c02c2eb78e29bec33fd9593bae1a"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:e7e3736715fbf53e9be2a79eb4db68e4ed857017344d697e8b9749444ae57475"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b7e75b4965e1d4690e93021adfcecccbca7d61c7bddd8e22406ef2ff20d74ef"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96777d473c05ee3e5e3c3e999f5d23c6f4ec5b0c38c098b3a5229085f74236c6"}, - {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:3bc2a80e6420ca8b7d3590791e2dfc709c88ab9152c00eeb511c9875ce5778bf"}, - {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e188d2699864c11c36cdfdada94d781fd5d6b0071cd9c427bceb08ad3d7c70e1"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-win32.whl", hash = "sha256:6442cb36270b3afb1b4951f060eccca1ce49f3d087ca1ca4563a6eb479cb3de6"}, {file = "ruamel.yaml.clib-0.2.12-cp313-cp313-win_amd64.whl", hash = "sha256:e5b8daf27af0b90da7bb903a876477a9e6d7270be6146906b276605997c7e9a3"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:fc4b630cd3fa2cf7fce38afa91d7cfe844a9f75d7f0f36393fa98815e911d987"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:bc5f1e1c28e966d61d2519f2a3d451ba989f9ea0f2307de7bc45baa526de9e45"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a0e060aace4c24dcaf71023bbd7d42674e3b230f7e7b97317baf1e953e5b519"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2f1c3765db32be59d18ab3953f43ab62a761327aafc1594a2a1fbe038b8b8a7"}, - {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d85252669dc32f98ebcd5d36768f5d4faeaeaa2d655ac0473be490ecdae3c285"}, - {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e143ada795c341b56de9418c58d028989093ee611aa27ffb9b7f609c00d813ed"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-win32.whl", hash = "sha256:beffaed67936fbbeffd10966a4eb53c402fafd3d6833770516bf7314bc6ffa12"}, {file = "ruamel.yaml.clib-0.2.12-cp39-cp39-win_amd64.whl", hash = "sha256:040ae85536960525ea62868b642bdb0c2cc6021c9f9d507810c0c604e66f5a7b"}, {file = "ruamel.yaml.clib-0.2.12.tar.gz", hash = "sha256:6c8fbb13ec503f99a91901ab46e0b07ae7941cd527393187039aec586fdfd36f"}, @@ -7417,7 +7482,7 @@ name = "s3transfer" version = "0.10.3" description = "An Amazon S3 Transfer Manager" optional = false -python-versions = ">=3.8" +python-versions = ">= 3.8" files = [ {file = "s3transfer-0.10.3-py3-none-any.whl", hash = "sha256:263ed587a5803c6c708d3ce44dc4dfedaab4c1a32e8329bab818933d79ddcf5d"}, {file = "s3transfer-0.10.3.tar.gz", hash = "sha256:4f50ed74ab84d474ce614475e0b8d5047ff080810aac5d01ea25231cfc944b0c"}, @@ -7742,18 +7807,23 @@ train = ["accelerate (>=0.20.3)", "datasets"] [[package]] name = "setuptools" -version = "70.0.0" +version = "75.2.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "setuptools-70.0.0-py3-none-any.whl", hash = "sha256:54faa7f2e8d2d11bcd2c07bed282eef1046b5c080d1c32add737d7b5817b1ad4"}, - {file = "setuptools-70.0.0.tar.gz", hash = "sha256:f211a66637b8fa059bb28183da127d4e86396c991a942b028c6650d4319c3fd0"}, + {file = "setuptools-75.2.0-py3-none-any.whl", hash = "sha256:a7fcb66f68b4d9e8e66b42f9876150a3371558f98fa32222ffaa5bced76406f8"}, + {file = "setuptools-75.2.0.tar.gz", hash = "sha256:753bb6ebf1f465a1912e19ed1d41f403a79173a9acf66a42e7e6aec45c3c16ec"}, ] [package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] -testing = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"] +core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +cover = ["pytest-cov"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] +enabler = ["pytest-enabler (>=2.2)"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.11.*)", "pytest-mypy"] [[package]] name = "sgmllib3k" @@ -7844,7 +7914,7 @@ name = "smart-open" version = "7.0.5" description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)" optional = false -python-versions = "<4.0,>=3.7" +python-versions = ">=3.7,<4.0" files = [ {file = "smart_open-7.0.5-py3-none-any.whl", hash = "sha256:8523ed805c12dff3eaa50e9c903a6cb0ae78800626631c5fe7ea073439847b89"}, {file = "smart_open-7.0.5.tar.gz", hash = "sha256:d3672003b1dbc85e2013e4983b88eb9a5ccfd389b0d4e5015f39a9ee5620ec18"}, @@ -8472,13 +8542,13 @@ optree = ["optree (>=0.9.1)"] [[package]] name = "tqdm" -version = "4.66.5" +version = "4.66.6" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" files = [ - {file = "tqdm-4.66.5-py3-none-any.whl", hash = "sha256:90279a3770753eafc9194a0364852159802111925aa30eb3f9d85b0e805ac7cd"}, - {file = "tqdm-4.66.5.tar.gz", hash = "sha256:e1020aef2e5096702d8a025ac7d16b1577279c9d63f8375b63083e9a5f0fcbad"}, + {file = "tqdm-4.66.6-py3-none-any.whl", hash = "sha256:223e8b5359c2efc4b30555531f09e9f2f3589bcd7fdd389271191031b49b7a63"}, + {file = "tqdm-4.66.6.tar.gz", hash = "sha256:4bdd694238bef1485ce839d67967ab50af8f9272aab687c0d7702a01da0be090"}, ] [package.dependencies] @@ -9238,93 +9308,93 @@ files = [ [[package]] name = "yarl" -version = "1.15.5" +version = "1.17.0" description = "Yet another URL library" optional = false python-versions = ">=3.9" files = [ - {file = "yarl-1.15.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b6c57972a406ea0f61e3f28f2b3a780fb71fbe1d82d267afe5a2f889a83ee7e7"}, - {file = "yarl-1.15.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5c3ac5bdcc1375c8ee52784adf94edbce37c471dd2100a117cfef56fe8dbc2b4"}, - {file = "yarl-1.15.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:68d21d0563d82aaf46163eac529adac301b20be3181b8a2811f7bd5615466055"}, - {file = "yarl-1.15.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7d317fb80bc17ed4b34a9aad8b80cef34bea0993654f3e8566daf323def7ef9"}, - {file = "yarl-1.15.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ed9c72d5361cfd5af5ccadffa8f8077f4929640e1f938aa0f4b92c5a24996ac5"}, - {file = "yarl-1.15.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bb707859218e8335447b210f41a755e7b1367c33e87add884128bba144694a7f"}, - {file = "yarl-1.15.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6563394492c96cb57f4dff0c69c63d2b28b5469c59c66f35a1e6451583cd0ab4"}, - {file = "yarl-1.15.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9c2d1109c8d92059314cc34dd8f0a31f74b720dc140744923ed7ca228bf9b491"}, - {file = "yarl-1.15.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8fc727f0fb388debc771eaa7091c092bd2e8b6b4741b73354b8efadcf96d6031"}, - {file = "yarl-1.15.5-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:94189746c5ad62e1014a16298130e696fe593d031d442ef135fb7787b7a1f820"}, - {file = "yarl-1.15.5-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b06d8b05d0fafef204d635a4711283ddbf19c7c0facdc61b4b775f6e47e2d4be"}, - {file = "yarl-1.15.5-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:de6917946dc6bc237d4b354e38aa13a232e0c7948fdbdb160edee3862e9d735f"}, - {file = "yarl-1.15.5-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:34816f1d833433a16c4832562a050b0a60eac53dcb71b2032e6ebff82d74b6a7"}, - {file = "yarl-1.15.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:19e2a4b2935f95fad0949f420514c5d862f5f18058fbbfd8854f496a97d9fd87"}, - {file = "yarl-1.15.5-cp310-cp310-win32.whl", hash = "sha256:30ca64521f1a96b72886dd9e8652f16eab11891b4572dcfcfc1ad6d6ccb27abd"}, - {file = "yarl-1.15.5-cp310-cp310-win_amd64.whl", hash = "sha256:86648c53b10c53db8b967a75fb41e0c89dbec7398f6525e34af2b6c456bb0ac0"}, - {file = "yarl-1.15.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e652aa9f8dfa808bc5b2da4d1f4e286cf1d640570fdfa72ffc0c1d16ba114651"}, - {file = "yarl-1.15.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:21050b6cd569980fe20ceeab4baeb900d3f7247270475e42bafe117416a5496c"}, - {file = "yarl-1.15.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:18940191ec9a83bbfe63eea61c3e9d12474bb910d5613bce8fa46e84a80b75b2"}, - {file = "yarl-1.15.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a082dc948045606f62dca0228ab24f13737180b253378d6443f5b2b9ef8beefe"}, - {file = "yarl-1.15.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0a843e692f9d5402b3455653f4607dc521de2385f01c5cad7ba4a87c46e2ea8d"}, - {file = "yarl-1.15.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5093a453176a4fad4f9c3006f507cf300546190bb3e27944275a37cfd6323a65"}, - {file = "yarl-1.15.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2597a589859b94d0a5e2f5d30fee95081867926e57cb751f8b44a7dd92da4e79"}, - {file = "yarl-1.15.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f5a1ca6eaabfe62718b87eac06d9a47b30cf92ffa065fee9196d3ecd24a3cf1"}, - {file = "yarl-1.15.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4ac83b307cc4b8907345b52994055c6c3c2601ceb6fcb94c5ed6a93c6b4e8257"}, - {file = "yarl-1.15.5-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:325e2beb2cd8654b276e7686a3cd203628dd3fe32d5c616e632bc35a2901fb16"}, - {file = "yarl-1.15.5-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:75d04ba8ed335042328086e643e01165e0c24598216f72da709b375930ae3bdb"}, - {file = "yarl-1.15.5-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7abd7d15aedb3961a967cc65f8144dbbca42e3626a21c5f4f29919cf43eeafb9"}, - {file = "yarl-1.15.5-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:294c742a273f44511f14b03a9e06b66094dcdf4bbb75a5e23fead548fd5310ae"}, - {file = "yarl-1.15.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:63d46606b20f80a6476f1044bab78e1a69c2e0747f174583e2f12fc70bad2170"}, - {file = "yarl-1.15.5-cp311-cp311-win32.whl", hash = "sha256:b1217102a455e3ac9ac293081093f21f0183e978c7692171ff669fee5296fa28"}, - {file = "yarl-1.15.5-cp311-cp311-win_amd64.whl", hash = "sha256:5848500b6a01497560969e8c3a7eb1b2570853c74a0ca6f67ebaf6064106c49b"}, - {file = "yarl-1.15.5-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d3309ee667f2d9c7ac9ecf44620d6b274bfdd8065b8c5019ff6795dd887b8fed"}, - {file = "yarl-1.15.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:96ce879799fee124d241ea3b84448378f638e290c49493d00b706f3fd57ec22b"}, - {file = "yarl-1.15.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c884dfa56b050f718ea3cbbfd972e29a6f07f63a7449b10d9a20d64f7eec92e2"}, - {file = "yarl-1.15.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0327081978fe186c3390dd4f73f95f825d0bb9c74967e22c2a1a87735974d8f5"}, - {file = "yarl-1.15.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:524b3bb7dff320e305bc979c65eddc0342548c56ea9241502f907853fe53c408"}, - {file = "yarl-1.15.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd56de8b645421ff09c993fdb0ee9c5a3b50d290a8f55793b500d99b34d0c1ce"}, - {file = "yarl-1.15.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c166ad987265bb343be58cdf4fbc4478cc1d81f2246d2be9a15f94393b269faa"}, - {file = "yarl-1.15.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d56980374a10c74255fcea6ebcfb0aeca7166d212ee9fd7e823ddef35fb62ad0"}, - {file = "yarl-1.15.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cbf36099a9b407e1456dbf55844743a98603fcba32d2a46fb3a698d926facf1b"}, - {file = "yarl-1.15.5-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:d7fa4b033e2f267e37aabcc36949fa89f9f1716a723395912147f9cf3fb437c7"}, - {file = "yarl-1.15.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:bb129f77ddaea2d8e6e00417b8d907448de3407af4eddacca0a515574ad71493"}, - {file = "yarl-1.15.5-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:68e837b3edfcd037f9706157e7cb8efda832de6248c7d9e893e2638356dfae5d"}, - {file = "yarl-1.15.5-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5b8af4165e097ff84d9bbb97bb4f4d7f71b9c1c9565a2d0e27d93e5f92dae220"}, - {file = "yarl-1.15.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:70d074d5a96e0954fe6db81ff356f4361397da1cda3f7c127fc0902f671a087e"}, - {file = "yarl-1.15.5-cp312-cp312-win32.whl", hash = "sha256:362da97ad4360e4ef1dd24ccdd3bceb18332da7f40026a42f49b7edd686e31c3"}, - {file = "yarl-1.15.5-cp312-cp312-win_amd64.whl", hash = "sha256:9aa054d97033beac9cb9b19b7c0b8784b85b12cd17879087ca6bffba57884e02"}, - {file = "yarl-1.15.5-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5fadcf532fd9f6cbad71485ef8c2462dd9a91d3efc72ca01eb0970792c92552a"}, - {file = "yarl-1.15.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8b7dd6983c81523f9de0ae6334c3b7a3cb33283936e0525f80c4f713f54a9bb6"}, - {file = "yarl-1.15.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fcfd663dc88465ebe41c7c938bdc91c4b01cda96a0d64bf38fd66c1877323771"}, - {file = "yarl-1.15.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd529e637cd23204bd82072f6637cff7af2516ad2c132e8f3342cbc84871f7d1"}, - {file = "yarl-1.15.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b30f13fac56598474071a4f1ecd66c78fdaf2f8619042d7ca135f72dbb348cf"}, - {file = "yarl-1.15.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:44088ec0be82fba118ed29b6b429f80bf295297727adae4c257ac297e01e8bcd"}, - {file = "yarl-1.15.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:607683991bab8607e5158cd290dd8fdaa613442aeab802fe1c237d3a3eee7358"}, - {file = "yarl-1.15.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:da48cdff56b01ea4282a6d04b83b07a2088351a4a3ff7aacc1e7e9b6b04b90b9"}, - {file = "yarl-1.15.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9162ea117ce8bad8ebc95b7376b4135988acd888d2cf4702f8281e3c11f8b81f"}, - {file = "yarl-1.15.5-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:e8aa19c39cb20bfb16f0266df175a6004943122cf20707fbf0cacc21f6468a25"}, - {file = "yarl-1.15.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5d6be369488d503c8edc14e2f63d71ab2a607041ad216a8ad444fa18e8dea792"}, - {file = "yarl-1.15.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6e2c674cfe4c03ad7a4d536b1f808221f0d11a360486b4b032d2557c0bd633ad"}, - {file = "yarl-1.15.5-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:041bafaa82b77fd4ec2826d42a55461ec86d999adf7ed9644eef7e8a9febb366"}, - {file = "yarl-1.15.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2eeb9ba53c055740cd282ae9d34eb7970d65e73a46f15adec4b0c1b0f2e55cc2"}, - {file = "yarl-1.15.5-cp313-cp313-win32.whl", hash = "sha256:73143dd279e641543da52c55652ad7b4c7c5f79e797f124f58f04cc060f14271"}, - {file = "yarl-1.15.5-cp313-cp313-win_amd64.whl", hash = "sha256:94ab1185900f43760d5487c8e49f5f1a66f864e36092f282f1813597479b9dfa"}, - {file = "yarl-1.15.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6b3d2767bd64c62909ea33525b954ba05c8f9726bfdf2141d175da4e344f19ae"}, - {file = "yarl-1.15.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:44359c52af9c383e5107f3b6301446fc8269599721fa42fafb2afb5f31a42dcb"}, - {file = "yarl-1.15.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6493da9ba5c551978c679ab04856c2cf8f79c316e8ec8c503460a135705edc3b"}, - {file = "yarl-1.15.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a6b6e95bc621c11cf9ff21012173337e789f2461ebc3b4e5bf65c74ef69adb8"}, - {file = "yarl-1.15.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7983290ede3aaa2c9620879530849532529b4dcbf5b12a0b6a91163a773eadb9"}, - {file = "yarl-1.15.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:07a4b53abe85813c538b9cdbb02909ebe3734e3af466a587df516e960d500cc8"}, - {file = "yarl-1.15.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5882faa2a6e684f65ee44f18c701768749a950cbd5e72db452fc07805f6bdec0"}, - {file = "yarl-1.15.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e27861251d9c094f641d39a8a78dd2371fb9a252ea2f689d1ad353a31d46a0bc"}, - {file = "yarl-1.15.5-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8669a110f655c9eb22f16fb68a7d4942020aeaa09f1def584a80183e3e89953c"}, - {file = "yarl-1.15.5-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:10bfe0bef4cf5ea0383886beda004071faadedf2647048b9f876664284c5b60d"}, - {file = "yarl-1.15.5-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:f7de0d4b6b4d8a77e422eb54d765255c0ec6883ee03b8fd537101633948619d7"}, - {file = "yarl-1.15.5-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:00bb3a559d7bd006a5302ecd7e409916939106a8cdbe31f4eb5e5b9ffcca57ea"}, - {file = "yarl-1.15.5-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:06ec070a2d71415f90dbe9d70af3158e7da97a128519dba2d1581156ee27fb92"}, - {file = "yarl-1.15.5-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b997a806846c00d1f41d6a251803732837771b2091bead7566f68820e317bfe7"}, - {file = "yarl-1.15.5-cp39-cp39-win32.whl", hash = "sha256:7825506fbee4055265528ec3532a8197ff26fc53d4978917a4c8ddbb4c1667d7"}, - {file = "yarl-1.15.5-cp39-cp39-win_amd64.whl", hash = "sha256:71730658be0b5de7c570a9795d7404c577b2313c1db370407092c66f70e04ccb"}, - {file = "yarl-1.15.5-py3-none-any.whl", hash = "sha256:625f31d6650829fba4030b4e7bdb2d69e41510dddfa29a1da27076c199521757"}, - {file = "yarl-1.15.5.tar.gz", hash = "sha256:8249147ee81c1cf4d1dc6f26ba28a1b9d92751529f83c308ad02164bb93abd0d"}, + {file = "yarl-1.17.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2d8715edfe12eee6f27f32a3655f38d6c7410deb482158c0b7d4b7fad5d07628"}, + {file = "yarl-1.17.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1803bf2a7a782e02db746d8bd18f2384801bc1d108723840b25e065b116ad726"}, + {file = "yarl-1.17.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e66589110e20c2951221a938fa200c7aa134a8bdf4e4dc97e6b21539ff026d4"}, + {file = "yarl-1.17.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7069d411cfccf868e812497e0ec4acb7c7bf8d684e93caa6c872f1e6f5d1664d"}, + {file = "yarl-1.17.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cbf70ba16118db3e4b0da69dcde9d4d4095d383c32a15530564c283fa38a7c52"}, + {file = "yarl-1.17.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0bc53cc349675b32ead83339a8de79eaf13b88f2669c09d4962322bb0f064cbc"}, + {file = "yarl-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6aa18a402d1c80193ce97c8729871f17fd3e822037fbd7d9b719864018df746"}, + {file = "yarl-1.17.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d89c5bc701861cfab357aa0cd039bc905fe919997b8c312b4b0c358619c38d4d"}, + {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b728bdf38ca58f2da1d583e4af4ba7d4cd1a58b31a363a3137a8159395e7ecc7"}, + {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:5542e57dc15d5473da5a39fbde14684b0cc4301412ee53cbab677925e8497c11"}, + {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e564b57e5009fb150cb513804d7e9e9912fee2e48835638f4f47977f88b4a39c"}, + {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:eb3c4cff524b4c1c1dba3a6da905edb1dfd2baf6f55f18a58914bbb2d26b59e1"}, + {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:05e13f389038842da930d439fbed63bdce3f7644902714cb68cf527c971af804"}, + {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:153c38ee2b4abba136385af4467459c62d50f2a3f4bde38c7b99d43a20c143ef"}, + {file = "yarl-1.17.0-cp310-cp310-win32.whl", hash = "sha256:4065b4259d1ae6f70fd9708ffd61e1c9c27516f5b4fae273c41028afcbe3a094"}, + {file = "yarl-1.17.0-cp310-cp310-win_amd64.whl", hash = "sha256:abf366391a02a8335c5c26163b5fe6f514cc1d79e74d8bf3ffab13572282368e"}, + {file = "yarl-1.17.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:19a4fe0279626c6295c5b0c8c2bb7228319d2e985883621a6e87b344062d8135"}, + {file = "yarl-1.17.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cadd0113f4db3c6b56868d6a19ca6286f5ccfa7bc08c27982cf92e5ed31b489a"}, + {file = "yarl-1.17.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:60d6693eef43215b1ccfb1df3f6eae8db30a9ff1e7989fb6b2a6f0b468930ee8"}, + {file = "yarl-1.17.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb8bf3843e1fa8cf3fe77813c512818e57368afab7ebe9ef02446fe1a10b492"}, + {file = "yarl-1.17.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d2a5b35fd1d8d90443e061d0c8669ac7600eec5c14c4a51f619e9e105b136715"}, + {file = "yarl-1.17.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c5bf17b32f392df20ab5c3a69d37b26d10efaa018b4f4e5643c7520d8eee7ac7"}, + {file = "yarl-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48f51b529b958cd06e78158ff297a8bf57b4021243c179ee03695b5dbf9cb6e1"}, + {file = "yarl-1.17.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5fcaa06bf788e19f913d315d9c99a69e196a40277dc2c23741a1d08c93f4d430"}, + {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:32f3ee19ff0f18a7a522d44e869e1ebc8218ad3ae4ebb7020445f59b4bbe5897"}, + {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:a4fb69a81ae2ec2b609574ae35420cf5647d227e4d0475c16aa861dd24e840b0"}, + {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7bacc8b77670322132a1b2522c50a1f62991e2f95591977455fd9a398b4e678d"}, + {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:437bf6eb47a2d20baaf7f6739895cb049e56896a5ffdea61a4b25da781966e8b"}, + {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:30534a03c87484092080e3b6e789140bd277e40f453358900ad1f0f2e61fc8ec"}, + {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b30df4ff98703649915144be6f0df3b16fd4870ac38a09c56d5d9e54ff2d5f96"}, + {file = "yarl-1.17.0-cp311-cp311-win32.whl", hash = "sha256:263b487246858e874ab53e148e2a9a0de8465341b607678106829a81d81418c6"}, + {file = "yarl-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:07055a9e8b647a362e7d4810fe99d8f98421575e7d2eede32e008c89a65a17bd"}, + {file = "yarl-1.17.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:84095ab25ba69a8fa3fb4936e14df631b8a71193fe18bd38be7ecbe34d0f5512"}, + {file = "yarl-1.17.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:02608fb3f6df87039212fc746017455ccc2a5fc96555ee247c45d1e9f21f1d7b"}, + {file = "yarl-1.17.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:13468d291fe8c12162b7cf2cdb406fe85881c53c9e03053ecb8c5d3523822cd9"}, + {file = "yarl-1.17.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8da3f8f368fb7e2f052fded06d5672260c50b5472c956a5f1bd7bf474ae504ab"}, + {file = "yarl-1.17.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ec0507ab6523980bed050137007c76883d941b519aca0e26d4c1ec1f297dd646"}, + {file = "yarl-1.17.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08fc76df7fd8360e9ff30e6ccc3ee85b8dbd6ed5d3a295e6ec62bcae7601b932"}, + {file = "yarl-1.17.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d522f390686acb6bab2b917dd9ca06740c5080cd2eaa5aef8827b97e967319d"}, + {file = "yarl-1.17.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:147c527a80bb45b3dcd6e63401af8ac574125d8d120e6afe9901049286ff64ef"}, + {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:24cf43bcd17a0a1f72284e47774f9c60e0bf0d2484d5851f4ddf24ded49f33c6"}, + {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:c28a44b9e0fba49c3857360e7ad1473fc18bc7f6659ca08ed4f4f2b9a52c75fa"}, + {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:350cacb2d589bc07d230eb995d88fcc646caad50a71ed2d86df533a465a4e6e1"}, + {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:fd1ab1373274dea1c6448aee420d7b38af163b5c4732057cd7ee9f5454efc8b1"}, + {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4934e0f96dadc567edc76d9c08181633c89c908ab5a3b8f698560124167d9488"}, + {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8d0a278170d75c88e435a1ce76557af6758bfebc338435b2eba959df2552163e"}, + {file = "yarl-1.17.0-cp312-cp312-win32.whl", hash = "sha256:61584f33196575a08785bb56db6b453682c88f009cd9c6f338a10f6737ce419f"}, + {file = "yarl-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:9987a439ad33a7712bd5bbd073f09ad10d38640425fa498ecc99d8aa064f8fc4"}, + {file = "yarl-1.17.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8deda7b8eb15a52db94c2014acdc7bdd14cb59ec4b82ac65d2ad16dc234a109e"}, + {file = "yarl-1.17.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56294218b348dcbd3d7fce0ffd79dd0b6c356cb2a813a1181af730b7c40de9e7"}, + {file = "yarl-1.17.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1fab91292f51c884b290ebec0b309a64a5318860ccda0c4940e740425a67b6b7"}, + {file = "yarl-1.17.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cf93fa61ff4d9c7d40482ce1a2c9916ca435e34a1b8451e17f295781ccc034f"}, + {file = "yarl-1.17.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:261be774a0d71908c8830c33bacc89eef15c198433a8cc73767c10eeeb35a7d0"}, + {file = "yarl-1.17.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:deec9693b67f6af856a733b8a3e465553ef09e5e8ead792f52c25b699b8f9e6e"}, + {file = "yarl-1.17.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c804b07622ba50a765ca7fb8145512836ab65956de01307541def869e4a456c9"}, + {file = "yarl-1.17.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d013a7c9574e98c14831a8f22d27277688ec3b2741d0188ac01a910b009987a"}, + {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e2cfcba719bd494c7413dcf0caafb51772dec168c7c946e094f710d6aa70494e"}, + {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:c068aba9fc5b94dfae8ea1cedcbf3041cd4c64644021362ffb750f79837e881f"}, + {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:3616df510ffac0df3c9fa851a40b76087c6c89cbcea2de33a835fc80f9faac24"}, + {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:755d6176b442fba9928a4df787591a6a3d62d4969f05c406cad83d296c5d4e05"}, + {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c18f6e708d1cf9ff5b1af026e697ac73bea9cb70ee26a2b045b112548579bed2"}, + {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5b937c216b6dee8b858c6afea958de03c5ff28406257d22b55c24962a2baf6fd"}, + {file = "yarl-1.17.0-cp313-cp313-win32.whl", hash = "sha256:d0131b14cb545c1a7bd98f4565a3e9bdf25a1bd65c83fc156ee5d8a8499ec4a3"}, + {file = "yarl-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:01c96efa4313c01329e88b7e9e9e1b2fc671580270ddefdd41129fa8d0db7696"}, + {file = "yarl-1.17.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0d44f67e193f0a7acdf552ecb4d1956a3a276c68e7952471add9f93093d1c30d"}, + {file = "yarl-1.17.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:16ea0aa5f890cdcb7ae700dffa0397ed6c280840f637cd07bffcbe4b8d68b985"}, + {file = "yarl-1.17.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cf5469dc7dcfa65edf5cc3a6add9f84c5529c6b556729b098e81a09a92e60e51"}, + {file = "yarl-1.17.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e662bf2f6e90b73cf2095f844e2bc1fda39826472a2aa1959258c3f2a8500a2f"}, + {file = "yarl-1.17.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8260e88f1446904ba20b558fa8ce5d0ab9102747238e82343e46d056d7304d7e"}, + {file = "yarl-1.17.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5dc16477a4a2c71e64c5d3d15d7ae3d3a6bb1e8b955288a9f73c60d2a391282f"}, + {file = "yarl-1.17.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46027e326cecd55e5950184ec9d86c803f4f6fe4ba6af9944a0e537d643cdbe0"}, + {file = "yarl-1.17.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fc95e46c92a2b6f22e70afe07e34dbc03a4acd07d820204a6938798b16f4014f"}, + {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:16ca76c7ac9515320cd09d6cc083d8d13d1803f6ebe212b06ea2505fd66ecff8"}, + {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:eb1a5b97388f2613f9305d78a3473cdf8d80c7034e554d8199d96dcf80c62ac4"}, + {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:41fd5498975418cdc34944060b8fbeec0d48b2741068077222564bea68daf5a6"}, + {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:146ca582ed04a5664ad04b0e0603934281eaab5c0115a5a46cce0b3c061a56a1"}, + {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:6abb8c06107dbec97481b2392dafc41aac091a5d162edf6ed7d624fe7da0587a"}, + {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:4d14be4613dd4f96c25feb4bd8c0d8ce0f529ab0ae555a17df5789e69d8ec0c5"}, + {file = "yarl-1.17.0-cp39-cp39-win32.whl", hash = "sha256:174d6a6cad1068f7850702aad0c7b1bca03bcac199ca6026f84531335dfc2646"}, + {file = "yarl-1.17.0-cp39-cp39-win_amd64.whl", hash = "sha256:6af417ca2c7349b101d3fd557ad96b4cd439fdb6ab0d288e3f64a068eea394d0"}, + {file = "yarl-1.17.0-py3-none-any.whl", hash = "sha256:62dd42bb0e49423f4dd58836a04fcf09c80237836796025211bbe913f1524993"}, + {file = "yarl-1.17.0.tar.gz", hash = "sha256:d3f13583f378930377e02002b4085a3d025b00402d5a80911726d43a67911cd9"}, ] [package.dependencies] @@ -9428,4 +9498,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = ">=3.12,<3.13" -content-hash = "b7c00775e090d3b78298d3028a5e99da2152ff3fbdf43763546050ac0c1ed6cd" +content-hash = "ebf868a8177724644553470bc2e4f4fcf8eed1189836d55dbd328b833227340c" diff --git a/pyproject.toml b/pyproject.toml index 4cc47f9ec0c..cb6a3cf577d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,22 +46,22 @@ hanziconv = "0.3.2" html-text = "0.6.2" httpx = "0.27.0" huggingface-hub = "^0.25.0" -infinity-emb = "0.0.51" +infinity-emb = "^0.0.66" itsdangerous = "2.1.2" markdown = "3.6" markdown-to-json = "2.1.1" minio = "7.2.4" mistralai = "0.4.2" nltk = "3.9.1" -numpy = "1.26.4" +numpy = "^1.26.0" ollama = "0.2.1" onnxruntime = "1.19.2" openai = "1.45.0" opencv-python = "4.10.0.84" opencv-python-headless = "4.10.0.84" -openpyxl = "3.1.2" +openpyxl = "^3.1.0" ormsgpack = "1.5.0" -pandas = "2.2.2" +pandas = "^2.2.0" pdfplumber = "0.10.4" peewee = "3.17.1" pillow = "10.4.0" @@ -70,7 +70,7 @@ psycopg2-binary = "2.9.9" pyclipper = "1.3.0.post5" pycryptodomex = "3.20.0" pypdf = "^5.0.0" -pytest = "8.2.2" +pytest = "^8.3.0" python-dotenv = "1.0.1" python-dateutil = "2.8.2" python-pptx = "^1.0.2" @@ -86,7 +86,7 @@ ruamel-base = "1.0.0" scholarly = "1.7.11" scikit-learn = "1.5.0" selenium = "4.22.0" -setuptools = "70.0.0" +setuptools = "^75.2.0" shapely = "2.0.5" six = "1.16.0" strenum = "0.4.15" @@ -114,6 +114,7 @@ graspologic = "^3.4.1" pymysql = "^1.1.1" mini-racer = "^0.12.4" pyicu = "^2.13.1" +polars = "^1.9.0" [tool.poetry.group.full] diff --git a/rag/app/presentation.py b/rag/app/presentation.py index af8c59387c6..54d897616f9 100644 --- a/rag/app/presentation.py +++ b/rag/app/presentation.py @@ -20,6 +20,7 @@ from rag.nlp import rag_tokenizer from deepdoc.parser import PdfParser, PptParser, PlainParser from PyPDF2 import PdfReader as pdf2_read +import json class Ppt(PptParser): @@ -107,9 +108,9 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, d = copy.deepcopy(doc) pn += from_page d["image"] = img - d["page_num_int"] = [pn + 1] - d["top_int"] = [0] - d["position_int"] = [(pn + 1, 0, img.size[0], 0, img.size[1])] + d["page_num_list"] = json.dumps([pn + 1]) + d["top_list"] = json.dumps([0]) + d["position_list"] = json.dumps([(pn + 1, 0, img.size[0], 0, img.size[1])]) tokenize(d, txt, eng) res.append(d) return res @@ -123,10 +124,10 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, pn += from_page if img: d["image"] = img - d["page_num_int"] = [pn + 1] - d["top_int"] = [0] - d["position_int"] = [ - (pn + 1, 0, img.size[0] if img else 0, 0, img.size[1] if img else 0)] + d["page_num_list"] = json.dumps([pn + 1]) + d["top_list"] = json.dumps([0]) + d["position_list"] = json.dumps([ + (pn + 1, 0, img.size[0] if img else 0, 0, img.size[1] if img else 0)]) tokenize(d, txt, eng) res.append(d) return res diff --git a/rag/benchmark.py b/rag/benchmark.py index 490c031f97c..75c004791f7 100644 --- a/rag/benchmark.py +++ b/rag/benchmark.py @@ -13,16 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import json import os from collections import defaultdict from api.db import LLMType from api.db.services.llm_service import LLMBundle from api.db.services.knowledgebase_service import KnowledgebaseService -from api.settings import retrievaler +from api.settings import retrievaler, docStoreConn from api.utils import get_uuid from rag.nlp import tokenize, search -from rag.utils.es_conn import ELASTICSEARCH from ranx import evaluate import pandas as pd from tqdm import tqdm @@ -31,6 +29,7 @@ class Benchmark: def __init__(self, kb_id): e, kb = KnowledgebaseService.get_by_id(kb_id) + self.kb_id = kb_id self.similarity_threshold = kb.similarity_threshold self.vector_similarity_weight = kb.vector_similarity_weight self.embd_mdl = LLMBundle(kb.tenant_id, LLMType.EMBEDDING, llm_name=kb.embd_id, lang=kb.language) @@ -75,19 +74,19 @@ def ms_marco_index(self, file_path, index_name): query = data.iloc[i]['query'] for rel, text in zip(data.iloc[i]['passages']['is_selected'], data.iloc[i]['passages']['passage_text']): d = { - "id": get_uuid() + "_id": get_uuid() } tokenize(d, text, "english") docs.append(d) - texts[d["id"]] = text - qrels[query][d["id"]] = int(rel) + texts[d["_id"]] = text + qrels[query][d["_id"]] = int(rel) if len(docs) >= 32: docs = self.embedding(docs) - ELASTICSEARCH.bulk(docs, search.index_name(index_name)) + docStoreConn.insert(docs, search.index_name(index_name), self.kb_id) docs = [] docs = self.embedding(docs) - ELASTICSEARCH.bulk(docs, search.index_name(index_name)) + docStoreConn.insert(docs, search.index_name(index_name), self.kb_id) return qrels, texts def trivia_qa_index(self, file_path, index_name): @@ -102,19 +101,20 @@ def trivia_qa_index(self, file_path, index_name): for rel, text in zip(data.iloc[i]["search_results"]['rank'], data.iloc[i]["search_results"]['search_context']): d = { - "id": get_uuid() + "_id": get_uuid() } tokenize(d, text, "english") docs.append(d) - texts[d["id"]] = text - qrels[query][d["id"]] = int(rel) + texts[d["_id"]] = text + qrels[query][d["_id"]] = int(rel) if len(docs) >= 32: docs = self.embedding(docs) - ELASTICSEARCH.bulk(docs, search.index_name(index_name)) + docStoreConn.insert(docs, search.index_name(index_name), self.kb_id) + docs = [] docs = self.embedding(docs) - ELASTICSEARCH.bulk(docs, search.index_name(index_name)) + docStoreConn.insert(docs, search.index_name(index_name), self.kb_id) return qrels, texts def miracl_index(self, file_path, corpus_path, index_name): @@ -147,19 +147,20 @@ def miracl_index(self, file_path, corpus_path, index_name): text = corpus_total[tmp_data.iloc[i]['docid']] rel = tmp_data.iloc[i]['relevance'] d = { - "id": get_uuid() + "_id": get_uuid() } tokenize(d, text, 'english') docs.append(d) - texts[d["id"]] = text - qrels[query][d["id"]] = int(rel) + texts[d["_id"]] = text + qrels[query][d["_id"]] = int(rel) if len(docs) >= 32: docs = self.embedding(docs) - ELASTICSEARCH.bulk(docs, search.index_name(index_name)) + docStoreConn.insert(docs, search.index_name(index_name), self.kb_id) + docs = [] docs = self.embedding(docs) - ELASTICSEARCH.bulk(docs, search.index_name(index_name)) + docStoreConn.insert(docs, search.index_name(index_name), self.kb_id) return qrels, texts diff --git a/rag/nlp/__init__.py b/rag/nlp/__init__.py index 7c7b6cec629..03fca45f11a 100644 --- a/rag/nlp/__init__.py +++ b/rag/nlp/__init__.py @@ -25,6 +25,7 @@ from word2number import w2n from cn2an import cn2an from PIL import Image +import json all_codecs = [ 'utf-8', 'gb2312', 'gbk', 'utf_16', 'ascii', 'big5', 'big5hkscs', @@ -51,12 +52,12 @@ def find_codec(blob): try: blob[:1024].decode(c) return c - except Exception as e: + except Exception: pass try: blob.decode(c) return c - except Exception as e: + except Exception: pass return "utf-8" @@ -241,7 +242,7 @@ def tokenize_chunks(chunks, doc, eng, pdf_parser=None): d["image"], poss = pdf_parser.crop(ck, need_position=True) add_positions(d, poss) ck = pdf_parser.remove_tag(ck) - except NotImplementedError as e: + except NotImplementedError: pass tokenize(d, ck, eng) res.append(d) @@ -289,13 +290,16 @@ def tokenize_table(tbls, doc, eng, batch_size=10): def add_positions(d, poss): if not poss: return - d["page_num_int"] = [] - d["position_int"] = [] - d["top_int"] = [] + page_num_list = [] + position_list = [] + top_list = [] for pn, left, right, top, bottom in poss: - d["page_num_int"].append(int(pn + 1)) - d["top_int"].append(int(top)) - d["position_int"].append((int(pn + 1), int(left), int(right), int(top), int(bottom))) + page_num_list.append(int(pn + 1)) + top_list.append(int(top)) + position_list.append((int(pn + 1), int(left), int(right), int(top), int(bottom))) + d["page_num_list"] = json.dumps(page_num_list) + d["position_list"] = json.dumps(position_list) + d["top_list"] = json.dumps(top_list) def remove_contents_table(sections, eng=False): diff --git a/rag/nlp/query.py b/rag/nlp/query.py index c58c99c4cfc..77263a228b8 100644 --- a/rag/nlp/query.py +++ b/rag/nlp/query.py @@ -15,20 +15,25 @@ # import json -import math import re import logging -import copy -from elasticsearch_dsl import Q +from rag.utils.doc_store_conn import MatchTextExpr from rag.nlp import rag_tokenizer, term_weight, synonym -class EsQueryer: - def __init__(self, es): + +class FulltextQueryer: + def __init__(self): self.tw = term_weight.Dealer() - self.es = es self.syn = synonym.Dealer() - self.flds = ["ask_tks^10", "ask_small_tks"] + self.query_fields = [ + "title_tks^10", + "title_sm_tks^5", + "important_kwd^30", + "important_tks^20", + "content_ltks^2", + "content_sm_ltks", + ] @staticmethod def subSpecialChar(line): @@ -43,12 +48,15 @@ def isChinese(line): for t in arr: if not re.match(r"[a-zA-Z]+$", t): e += 1 - return e * 1. / len(arr) >= 0.7 + return e * 1.0 / len(arr) >= 0.7 @staticmethod def rmWWW(txt): patts = [ - (r"是*(什么样的|哪家|一下|那家|请问|啥样|咋样了|什么时候|何时|何地|何人|是否|是不是|多少|哪里|怎么|哪儿|怎么样|如何|哪些|是啥|啥是|啊|吗|呢|吧|咋|什么|有没有|呀)是*", ""), + ( + r"是*(什么样的|哪家|一下|那家|请问|啥样|咋样了|什么时候|何时|何地|何人|是否|是不是|多少|哪里|怎么|哪儿|怎么样|如何|哪些|是啥|啥是|啊|吗|呢|吧|咋|什么|有没有|呀)是*", + "", + ), (r"(^| )(what|who|how|which|where|why)('re|'s)? ", " "), (r"(^| )('s|'re|is|are|were|was|do|does|did|don't|doesn't|didn't|has|have|be|there|you|me|your|my|mine|just|please|may|i|should|would|wouldn't|will|won't|done|go|for|with|so|the|a|an|by|i'm|it's|he's|she's|they|they're|you're|as|by|on|in|at|up|out|down|of) ", " ") ] @@ -56,14 +64,13 @@ def rmWWW(txt): txt = re.sub(r, p, txt, flags=re.IGNORECASE) return txt - def question(self, txt, tbl="qa", min_match="60%"): + def question(self, txt, tbl="qa", min_match:float=0.6): txt = re.sub( r"[ :\r\n\t,,。??/`!!&\^%%]+", " ", - rag_tokenizer.tradi2simp( - rag_tokenizer.strQ2B( - txt.lower()))).strip() - txt = EsQueryer.rmWWW(txt) + rag_tokenizer.tradi2simp(rag_tokenizer.strQ2B(txt.lower())), + ).strip() + txt = FulltextQueryer.rmWWW(txt) if not self.isChinese(txt): tks = rag_tokenizer.tokenize(txt).split(" ") @@ -73,14 +80,20 @@ def question(self, txt, tbl="qa", min_match="60%"): tks_w = [(re.sub(r"^[\+-]", "", tk), w) for tk, w in tks_w if tk] q = ["{}^{:.4f}".format(tk, w) for tk, w in tks_w if tk] for i in range(1, len(tks_w)): - q.append("\"%s %s\"^%.4f" % (tks_w[i - 1][0], tks_w[i][0], max(tks_w[i - 1][1], tks_w[i][1])*2)) + q.append( + '"%s %s"^%.4f' + % ( + tks_w[i - 1][0], + tks_w[i][0], + max(tks_w[i - 1][1], tks_w[i][1]) * 2, + ) + ) if not q: q.append(txt) - return Q("bool", - must=Q("query_string", fields=self.flds, - type="best_fields", query=" ".join(q), - boost=1)#, minimum_should_match=min_match) - ), list(set([t for t in txt.split(" ") if t])) + query = " ".join(q) + return MatchTextExpr( + self.query_fields, query, 100, {"minimum_should_match": min_match} + ), tks def need_fine_grained_tokenize(tk): if len(tk) < 3: @@ -100,65 +113,71 @@ def need_fine_grained_tokenize(tk): logging.info(json.dumps(twts, ensure_ascii=False)) tms = [] for tk, w in sorted(twts, key=lambda x: x[1] * -1): - sm = rag_tokenizer.fine_grained_tokenize(tk).split(" ") if need_fine_grained_tokenize(tk) else [] + sm = ( + rag_tokenizer.fine_grained_tokenize(tk).split(" ") + if need_fine_grained_tokenize(tk) + else [] + ) sm = [ re.sub( r"[ ,\./;'\[\]\\`~!@#$%\^&\*\(\)=\+_<>\?:\"\{\}\|,。;‘’【】、!¥……()——《》?:“”-]+", "", - m) for m in sm] - sm = [EsQueryer.subSpecialChar(m) for m in sm if len(m) > 1] + m, + ) + for m in sm + ] + sm = [FulltextQueryer.subSpecialChar(m) for m in sm if len(m) > 1] sm = [m for m in sm if len(m) > 1] keywords.append(re.sub(r"[ \\\"']+", "", tk)) keywords.extend(sm) - if len(keywords) >= 12: break + if len(keywords) >= 12: + break tk_syns = self.syn.lookup(tk) - tk = EsQueryer.subSpecialChar(tk) + tk = FulltextQueryer.subSpecialChar(tk) if tk.find(" ") > 0: - tk = "\"%s\"" % tk + tk = '"%s"' % tk if tk_syns: tk = f"({tk} %s)" % " ".join(tk_syns) if sm: - tk = f"{tk} OR \"%s\" OR (\"%s\"~2)^0.5" % ( - " ".join(sm), " ".join(sm)) + tk = f'{tk} OR "%s" OR ("%s"~2)^0.5' % (" ".join(sm), " ".join(sm)) if tk.strip(): tms.append((tk, w)) tms = " ".join([f"({t})^{w}" for t, w in tms]) if len(twts) > 1: - tms += f" (\"%s\"~4)^1.5" % (" ".join([t for t, _ in twts])) + tms += ' ("%s"~4)^1.5' % (" ".join([t for t, _ in twts])) if re.match(r"[0-9a-z ]+$", tt): - tms = f"(\"{tt}\" OR \"%s\")" % rag_tokenizer.tokenize(tt) + tms = f'("{tt}" OR "%s")' % rag_tokenizer.tokenize(tt) syns = " OR ".join( - ["\"%s\"^0.7" % EsQueryer.subSpecialChar(rag_tokenizer.tokenize(s)) for s in syns]) + [ + '"%s"^0.7' + % FulltextQueryer.subSpecialChar(rag_tokenizer.tokenize(s)) + for s in syns + ] + ) if syns: tms = f"({tms})^5 OR ({syns})^0.7" qs.append(tms) - flds = copy.deepcopy(self.flds) - mst = [] if qs: - mst.append( - Q("query_string", fields=flds, type="best_fields", - query=" OR ".join([f"({t})" for t in qs if t]), boost=1, minimum_should_match=min_match) - ) - - return Q("bool", - must=mst, - ), list(set(keywords)) + query = " OR ".join([f"({t})" for t in qs if t]) + return MatchTextExpr( + self.query_fields, query, 100, {"minimum_should_match": min_match} + ), keywords + return None, keywords - def hybrid_similarity(self, avec, bvecs, atks, btkss, tkweight=0.3, - vtweight=0.7): + def hybrid_similarity(self, avec, bvecs, atks, btkss, tkweight=0.3, vtweight=0.7): from sklearn.metrics.pairwise import cosine_similarity as CosineSimilarity import numpy as np + sims = CosineSimilarity([avec], bvecs) tksim = self.token_similarity(atks, btkss) - return np.array(sims[0]) * vtweight + \ - np.array(tksim) * tkweight, tksim, sims[0] + return np.array(sims[0]) * vtweight + np.array(tksim) * tkweight, tksim, sims[0] def token_similarity(self, atks, btkss): def toDict(tks): diff --git a/rag/nlp/search.py b/rag/nlp/search.py index a7740fafca1..f5c18696cee 100644 --- a/rag/nlp/search.py +++ b/rag/nlp/search.py @@ -14,34 +14,25 @@ # limitations under the License. # -import json import re -from copy import deepcopy - -from elasticsearch_dsl import Q, Search +import json from typing import List, Optional, Dict, Union from dataclasses import dataclass -from rag.settings import es_logger +from rag.settings import doc_store_logger from rag.utils import rmSpace -from rag.nlp import rag_tokenizer, query, is_english +from rag.nlp import rag_tokenizer, query import numpy as np +from rag.utils.doc_store_conn import DocStoreConnection, MatchDenseExpr, FusionExpr, OrderByExpr def index_name(uid): return f"ragflow_{uid}" class Dealer: - def __init__(self, es): - self.qryr = query.EsQueryer(es) - self.qryr.flds = [ - "title_tks^10", - "title_sm_tks^5", - "important_kwd^30", - "important_tks^20", - "content_ltks^2", - "content_sm_ltks"] - self.es = es + def __init__(self, dataStore: DocStoreConnection): + self.qryr = query.FulltextQueryer() + self.dataStore = dataStore @dataclass class SearchResult: @@ -54,98 +45,65 @@ class SearchResult: keywords: Optional[List[str]] = None group_docs: List[List] = None - def _vector(self, txt, emb_mdl, sim=0.8, topk=10): - qv, c = emb_mdl.encode_queries(txt) - return { - "field": "q_%d_vec" % len(qv), - "k": topk, - "similarity": sim, - "num_candidates": topk * 2, - "query_vector": [float(v) for v in qv] - } - - def _add_filters(self, bqry, req): - if req.get("kb_ids"): - bqry.filter.append(Q("terms", kb_id=req["kb_ids"])) - if req.get("doc_ids"): - bqry.filter.append(Q("terms", doc_id=req["doc_ids"])) - if req.get("knowledge_graph_kwd"): - bqry.filter.append(Q("terms", knowledge_graph_kwd=req["knowledge_graph_kwd"])) - if "available_int" in req: - if req["available_int"] == 0: - bqry.filter.append(Q("range", available_int={"lt": 1})) - else: - bqry.filter.append( - Q("bool", must_not=Q("range", available_int={"lt": 1}))) - return bqry - - def search(self, req, idxnms, emb_mdl=None, highlight=False): + def get_vector(self, txt, emb_mdl, topk=10, similarity=0.1): + qv, _ = emb_mdl.encode_queries(txt) + embedding_data = [float(v) for v in qv] + vector_column_name = f"q_{len(embedding_data)}_vec" + return MatchDenseExpr(vector_column_name, embedding_data, 'float', 'cosine', topk, {"similarity": similarity}) + + def get_filters(self, req): + condition = dict() + for key, field in {"kb_ids": "kb_id", "doc_ids": "doc_id"}.items(): + if key in req and req[key] is not None: + condition[field] = req[key] + # TODO(yzc): `available_int` is nullable however infinity doesn't support nullable columns. + for key in ["knowledge_graph_kwd"]: + if key in req and req[key] is not None: + condition[key] = req[key] + return condition + + def search(self, req, idxnm: str, emb_mdl, highlight = False): + kb_ids = req['kb_ids'] qst = req.get("question", "") - bqry, keywords = self.qryr.question(qst, min_match="30%") - bqry = self._add_filters(bqry, req) - bqry.boost = 0.05 + matchText, keywords = self.qryr.question(qst, min_match=0.3) + filters = self.get_filters(req) + orderBy = OrderByExpr() - s = Search() pg = int(req.get("page", 1)) - 1 topk = int(req.get("topk", 1024)) ps = int(req.get("size", topk)) + offset, limit = pg * ps, (pg + 1) * ps + + q_vec = [] + assert emb_mdl, "No embedding model selected" + matchDense = self.get_vector(qst, emb_mdl, topk, req.get("similarity", 0.1)) + q_vec = matchDense.embedding_data src = req.get("fields", ["docnm_kwd", "content_ltks", "kb_id", "img_id", "title_tks", "important_kwd", - "image_id", "doc_id", "q_512_vec", "q_768_vec", "position_int", "knowledge_graph_kwd", - "q_1024_vec", "q_1536_vec", "available_int", "content_with_weight"]) + "doc_id", f"q_{len(q_vec)}_vec", "position_list", "knowledge_graph_kwd", + "available_int", "content_with_weight"]) + # doc_store_logger.info(f"Dealer.search index {idxnm} emb_mdl {str(emb_mdl.llm_name)} vector length {len(q_vec)}") - s = s.query(bqry)[pg * ps:(pg + 1) * ps] - s = s.highlight("content_ltks") - s = s.highlight("title_ltks") + fusionExpr = FusionExpr("weighted_sum", topk, {"weights": "0.05, 0.95"}) if not qst: - if not req.get("sort"): - s = s.sort( - #{"create_time": {"order": "desc", "unmapped_type": "date"}}, - {"create_timestamp_flt": { - "order": "desc", "unmapped_type": "float"}} - ) + if req.get("sort"): + orderBy.asc("page_num_list").asc("top_list").desc("create_timestamp_flt") else: - s = s.sort( - {"page_num_int": {"order": "asc", "unmapped_type": "float", - "mode": "avg", "numeric_type": "double"}}, - {"top_int": {"order": "asc", "unmapped_type": "float", - "mode": "avg", "numeric_type": "double"}}, - #{"create_time": {"order": "desc", "unmapped_type": "date"}}, - {"create_timestamp_flt": { - "order": "desc", "unmapped_type": "float"}} - ) - - if qst: - s = s.highlight_options( - fragment_size=120, - number_of_fragments=5, - boundary_scanner_locale="zh-CN", - boundary_scanner="SENTENCE", - boundary_chars=",./;:\\!(),。?:!……()——、" - ) - s = s.to_dict() - q_vec = [] - if req.get("vector"): - assert emb_mdl, "No embedding model selected" - s["knn"] = self._vector( - qst, emb_mdl, req.get( - "similarity", 0.1), topk) - s["knn"]["filter"] = bqry.to_dict() - if not highlight and "highlight" in s: - del s["highlight"] - q_vec = s["knn"]["query_vector"] - es_logger.info("【Q】: {}".format(json.dumps(s))) - res = self.es.search(deepcopy(s), idxnms=idxnms, timeout="600s", src=src) - es_logger.info("TOTAL: {}".format(self.es.getTotal(res))) - if self.es.getTotal(res) == 0 and "knn" in s: - bqry, _ = self.qryr.question(qst, min_match="10%") - if req.get("doc_ids"): - bqry = Q("bool", must=[]) - bqry = self._add_filters(bqry, req) - s["query"] = bqry.to_dict() - s["knn"]["filter"] = bqry.to_dict() - s["knn"]["similarity"] = 0.17 - res = self.es.search(s, idxnms=idxnms, timeout="600s", src=src) - es_logger.info("【Q】: {}".format(json.dumps(s))) + orderBy.desc("create_timestamp_flt") + + highlightFields = ["content_ltks", "title_tks"] if highlight else [] + + res = self.dataStore.search(src, highlightFields, filters, [matchText, matchDense, fusionExpr], orderBy, offset, limit, idxnm, kb_ids) + total=self.dataStore.getTotal(res) + + doc_store_logger.info(f"TOTAL: {total}") + + # If result is empty, try again with lower min_match + if total == 0: + matchText, _ = self.qryr.question(qst, min_match=0.1) + del filters["doc_ids"] + matchDense.extra_options["similarity"] = 0.17 + res = self.dataStore.search(src, highlightFields, filters, [matchText, matchDense, fusionExpr], orderBy, offset, limit, idxnm, kb_ids) + total=self.dataStore.getTotal(res) kwds = set([]) for k in keywords: @@ -157,67 +115,19 @@ def search(self, req, idxnms, emb_mdl=None, highlight=False): continue kwds.add(kk) - aggs = self.getAggregation(res, "docnm_kwd") - + ids=self.dataStore.getChunkIds(res) + highlight = self.dataStore.getHighlight(res, keywords, "content_with_weight") + aggs = self.dataStore.getAggregation(res, "docnm_kwd") return self.SearchResult( - total=self.es.getTotal(res), - ids=self.es.getDocIds(res), + total=total, + ids=ids, query_vector=q_vec, aggregation=aggs, - highlight=self.getHighlight(res, keywords, "content_with_weight"), - field=self.getFields(res, src), + highlight=highlight, + field=self.dataStore.getFields(res, src), keywords=list(kwds) ) - def getAggregation(self, res, g): - if not "aggregations" in res or "aggs_" + g not in res["aggregations"]: - return - bkts = res["aggregations"]["aggs_" + g]["buckets"] - return [(b["key"], b["doc_count"]) for b in bkts] - - def getHighlight(self, res, keywords, fieldnm): - ans = {} - for d in res["hits"]["hits"]: - hlts = d.get("highlight") - if not hlts: - continue - txt = "...".join([a for a in list(hlts.items())[0][1]]) - if not is_english(txt.split(" ")): - ans[d["_id"]] = txt - continue - - txt = d["_source"][fieldnm] - txt = re.sub(r"[\r\n]", " ", txt, flags=re.IGNORECASE|re.MULTILINE) - txts = [] - for t in re.split(r"[.?!;\n]", txt): - for w in keywords: - t = re.sub(r"(^|[ .?/'\"\(\)!,:;-])(%s)([ .?/'\"\(\)!,:;-])"%re.escape(w), r"\1\2\3", t, flags=re.IGNORECASE|re.MULTILINE) - if not re.search(r"[^<>]+", t, flags=re.IGNORECASE|re.MULTILINE): continue - txts.append(t) - ans[d["_id"]] = "...".join(txts) if txts else "...".join([a for a in list(hlts.items())[0][1]]) - - return ans - - def getFields(self, sres, flds): - res = {} - if not flds: - return {} - for d in self.es.getSource(sres): - m = {n: d.get(n) for n in flds if d.get(n) is not None} - for n, v in m.items(): - if isinstance(v, type([])): - m[n] = "\t".join([str(vv) if not isinstance( - vv, list) else "\t".join([str(vvv) for vvv in vv]) for vv in v]) - continue - if not isinstance(v, type("")): - m[n] = str(m[n]) - if n.find("tks") > 0: - m[n] = rmSpace(m[n]) - - if m: - res[d["id"]] = m - return res - @staticmethod def trans2floats(txt): return [float(t) for t in txt.split("\t")] @@ -260,7 +170,7 @@ def insert_citations(self, answer, chunks, chunk_v, continue idx.append(i) pieces_.append(t) - es_logger.info("{} => {}".format(answer, pieces_)) + doc_store_logger.info("{} => {}".format(answer, pieces_)) if not pieces_: return answer, set([]) @@ -281,7 +191,7 @@ def insert_citations(self, answer, chunks, chunk_v, chunks_tks, tkweight, vtweight) mx = np.max(sim) * 0.99 - es_logger.info("{} SIM: {}".format(pieces_[i], mx)) + doc_store_logger.info("{} SIM: {}".format(pieces_[i], mx)) if mx < thr: continue cites[idx[i]] = list( @@ -309,9 +219,15 @@ def insert_citations(self, answer, chunks, chunk_v, def rerank(self, sres, query, tkweight=0.3, vtweight=0.7, cfield="content_ltks"): _, keywords = self.qryr.question(query) - ins_embd = [ - Dealer.trans2floats( - sres.field[i].get("q_%d_vec" % len(sres.query_vector), "\t".join(["0"] * len(sres.query_vector)))) for i in sres.ids] + vector_size = len(sres.query_vector) + vector_column = f"q_{vector_size}_vec" + zero_vector = [0.0] * vector_size + ins_embd = [] + for chunk_id in sres.ids: + vector = sres.field[chunk_id].get(vector_column, zero_vector) + if isinstance(vector, str): + vector = [float(v) for v in vector.split("\t")] + ins_embd.append(vector) if not ins_embd: return [], [], [] @@ -393,6 +309,8 @@ def retrieval(self, question, embd_mdl, tenant_ids, kb_ids, page, page_size, sim idx = list(range(len(sres.ids))) dim = len(sres.query_vector) + vector_column = f"q_{dim}_vec" + zero_vector = [0.0] * dim for i in idx: if sim[i] < similarity_threshold: break @@ -401,34 +319,29 @@ def retrieval(self, question, embd_mdl, tenant_ids, kb_ids, page, page_size, sim continue break id = sres.ids[i] - dnm = sres.field[id]["docnm_kwd"] - did = sres.field[id]["doc_id"] + chunk = sres.field[id] + dnm = chunk["docnm_kwd"] + did = chunk["doc_id"] d = { "chunk_id": id, - "content_ltks": sres.field[id]["content_ltks"], - "content_with_weight": sres.field[id]["content_with_weight"], - "doc_id": sres.field[id]["doc_id"], + "content_ltks": chunk["content_ltks"], + "content_with_weight": chunk["content_with_weight"], + "doc_id": chunk["doc_id"], "docnm_kwd": dnm, - "kb_id": sres.field[id]["kb_id"], - "important_kwd": sres.field[id].get("important_kwd", []), - "img_id": sres.field[id].get("img_id", ""), + "kb_id": chunk["kb_id"], + "important_kwd": chunk.get("important_kwd", []), + "img_id": chunk.get("img_id", ""), "similarity": sim[i], "vector_similarity": vsim[i], "term_similarity": tsim[i], - "vector": self.trans2floats(sres.field[id].get("q_%d_vec" % dim, "\t".join(["0"] * dim))), - "positions": sres.field[id].get("position_int", "").split("\t") + "vector": chunk.get(vector_column, zero_vector), + "positions": json.loads(chunk.get("position_list", "[]")) } if highlight: if id in sres.highlight: d["highlight"] = rmSpace(sres.highlight[id]) else: d["highlight"] = d["content_with_weight"] - if len(d["positions"]) % 5 == 0: - poss = [] - for i in range(0, len(d["positions"]), 5): - poss.append([float(d["positions"][i]), float(d["positions"][i + 1]), float(d["positions"][i + 2]), - float(d["positions"][i + 3]), float(d["positions"][i + 4])]) - d["positions"] = poss ranks["chunks"].append(d) if dnm not in ranks["doc_aggs"]: ranks["doc_aggs"][dnm] = {"doc_id": did, "count": 0} @@ -442,39 +355,11 @@ def retrieval(self, question, embd_mdl, tenant_ids, kb_ids, page, page_size, sim return ranks def sql_retrieval(self, sql, fetch_size=128, format="json"): - from api.settings import chat_logger - sql = re.sub(r"[ `]+", " ", sql) - sql = sql.replace("%", "") - es_logger.info(f"Get es sql: {sql}") - replaces = [] - for r in re.finditer(r" ([a-z_]+_l?tks)( like | ?= ?)'([^']+)'", sql): - fld, v = r.group(1), r.group(3) - match = " MATCH({}, '{}', 'operator=OR;minimum_should_match=30%') ".format( - fld, rag_tokenizer.fine_grained_tokenize(rag_tokenizer.tokenize(v))) - replaces.append( - ("{}{}'{}'".format( - r.group(1), - r.group(2), - r.group(3)), - match)) - - for p, r in replaces: - sql = sql.replace(p, r, 1) - chat_logger.info(f"To es: {sql}") - - try: - tbl = self.es.sql(sql, fetch_size, format) - return tbl - except Exception as e: - chat_logger.error(f"SQL failure: {sql} =>" + str(e)) - return {"error": str(e)} - - def chunk_list(self, doc_id, tenant_id, max_count=1024, fields=["docnm_kwd", "content_with_weight", "img_id"]): - s = Search() - s = s.query(Q("match", doc_id=doc_id))[0:max_count] - s = s.to_dict() - es_res = self.es.search(s, idxnms=index_name(tenant_id), timeout="600s", src=fields) - res = [] - for index, chunk in enumerate(es_res['hits']['hits']): - res.append({fld: chunk['_source'].get(fld) for fld in fields}) - return res + tbl = self.dataStore.sql(sql, fetch_size, format) + return tbl + + def chunk_list(self, doc_id: str, tenant_id: str, kb_ids: list[str], max_count=1024, fields=["docnm_kwd", "content_with_weight", "img_id"]): + condition = {"doc_id": doc_id} + res = self.dataStore.search(fields, [], condition, [], OrderByExpr(), 0, max_count, index_name(tenant_id), kb_ids) + dict_chunks = self.dataStore.getFields(res, fields) + return dict_chunks.values() diff --git a/rag/settings.py b/rag/settings.py index 8c88c4067b1..74165822fed 100644 --- a/rag/settings.py +++ b/rag/settings.py @@ -25,12 +25,13 @@ SUBPROCESS_STD_LOG_NAME = "std.log" ES = get_base_config("es", {}) +INFINITY = get_base_config("infinity", {"uri": "infinity:23817"}) AZURE = get_base_config("azure", {}) S3 = get_base_config("s3", {}) MINIO = decrypt_database_config(name="minio") try: REDIS = decrypt_database_config(name="redis") -except Exception as e: +except Exception: REDIS = {} pass DOC_MAXIMUM_SIZE = int(os.environ.get("MAX_CONTENT_LENGTH", 128 * 1024 * 1024)) @@ -44,7 +45,7 @@ # {CRITICAL: 50, FATAL:50, ERROR:40, WARNING:30, WARN:30, INFO:20, DEBUG:10, NOTSET:0} LoggerFactory.LEVEL = 30 -es_logger = getLogger("es") +doc_store_logger = getLogger("doc_store") minio_logger = getLogger("minio") s3_logger = getLogger("s3") azure_logger = getLogger("azure") @@ -53,7 +54,7 @@ database_logger = getLogger("database") formatter = logging.Formatter("%(asctime)-15s %(levelname)-8s (%(process)d) %(message)s") -for logger in [es_logger, minio_logger, s3_logger, azure_logger, cron_logger, chunk_logger, database_logger]: +for logger in [doc_store_logger, minio_logger, s3_logger, azure_logger, cron_logger, chunk_logger, database_logger]: logger.setLevel(logging.INFO) for handler in logger.handlers: handler.setFormatter(fmt=formatter) diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index 5e65f7c840f..cb6dd033e75 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -31,7 +31,6 @@ import numpy as np import pandas as pd -from elasticsearch_dsl import Q from api.db import LLMType, ParserType from api.db.services.dialog_service import keyword_extraction, question_proposal @@ -39,8 +38,7 @@ from api.db.services.llm_service import LLMBundle from api.db.services.task_service import TaskService from api.db.services.file2document_service import File2DocumentService -from api.settings import retrievaler -from api.utils.file_utils import get_project_base_directory +from api.settings import retrievaler, docStoreConn from api.db.db_models import close_connection from rag.app import laws, paper, presentation, manual, qa, table, book, resume, picture, naive, one, audio, knowledge_graph, email from rag.nlp import search, rag_tokenizer @@ -48,7 +46,6 @@ from rag.settings import database_logger, SVR_QUEUE_NAME from rag.settings import cron_logger, DOC_MAXIMUM_SIZE from rag.utils import rmSpace, num_tokens_from_string -from rag.utils.es_conn import ELASTICSEARCH from rag.utils.redis_conn import REDIS_CONN, Payload from rag.utils.storage_factory import STORAGE_IMPL @@ -126,7 +123,7 @@ def collect(): return pd.DataFrame() tasks = TaskService.get_tasks(msg["id"]) if not tasks: - cron_logger.warn("{} empty task!".format(msg["id"])) + cron_logger.warning("{} empty task!".format(msg["id"])) return [] tasks = pd.DataFrame(tasks) @@ -187,7 +184,7 @@ def build(row): docs = [] doc = { "doc_id": row["doc_id"], - "kb_id": [str(row["kb_id"])] + "kb_id": str(row["kb_id"]) } el = 0 for ck in cks: @@ -200,6 +197,11 @@ def build(row): d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19] d["create_timestamp_flt"] = datetime.datetime.now().timestamp() if not d.get("image"): + d["img_id"] = "" + del d["image"] + d["page_num_list"] = json.dumps([]) + d["position_list"] = json.dumps([]) + d["top_list"] = json.dumps([]) docs.append(d) continue @@ -245,12 +247,9 @@ def build(row): return docs -def init_kb(row): +def init_kb(row, vector_size: int): idxnm = search.index_name(row["tenant_id"]) - if ELASTICSEARCH.indexExist(idxnm): - return - return ELASTICSEARCH.createIdx(idxnm, json.load( - open(os.path.join(get_project_base_directory(), "conf", "mapping.json"), "r"))) + return docStoreConn.createIdx(idxnm, row["kb_id"], vector_size) def embedding(docs, mdl, parser_config=None, callback=None): @@ -288,17 +287,20 @@ def embedding(docs, mdl, parser_config=None, callback=None): cnts) if len(tts) == len(cnts) else cnts assert len(vects) == len(docs) + vector_size = 0 for i, d in enumerate(docs): v = vects[i].tolist() + vector_size = len(v) d["q_%d_vec" % len(v)] = v - return tk_count + return tk_count, vector_size def run_raptor(row, chat_mdl, embd_mdl, callback=None): vts, _ = embd_mdl.encode(["ok"]) - vctr_nm = "q_%d_vec" % len(vts[0]) + vector_size = len(vts[0]) + vctr_nm = "q_%d_vec" % vector_size chunks = [] - for d in retrievaler.chunk_list(row["doc_id"], row["tenant_id"], fields=["content_with_weight", vctr_nm]): + for d in retrievaler.chunk_list(row["doc_id"], row["tenant_id"], [str(row["kb_id"])], fields=["content_with_weight", vctr_nm]): chunks.append((d["content_with_weight"], np.array(d[vctr_nm]))) raptor = Raptor( @@ -332,7 +334,7 @@ def run_raptor(row, chat_mdl, embd_mdl, callback=None): d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"]) res.append(d) tk_count += num_tokens_from_string(content) - return res, tk_count + return res, tk_count, vector_size def main(): @@ -352,7 +354,7 @@ def main(): if r.get("task_type", "") == "raptor": try: chat_mdl = LLMBundle(r["tenant_id"], LLMType.CHAT, llm_name=r["llm_id"], lang=r["language"]) - cks, tk_count = run_raptor(r, chat_mdl, embd_mdl, callback) + cks, tk_count, vector_size = run_raptor(r, chat_mdl, embd_mdl, callback) except Exception as e: callback(-1, msg=str(e)) cron_logger.error(str(e)) @@ -373,7 +375,7 @@ def main(): len(cks)) st = timer() try: - tk_count = embedding(cks, embd_mdl, r["parser_config"], callback) + tk_count, vector_size = embedding(cks, embd_mdl, r["parser_config"], callback) except Exception as e: callback(-1, "Embedding error:{}".format(str(e))) cron_logger.error(str(e)) @@ -381,26 +383,25 @@ def main(): cron_logger.info("Embedding elapsed({}): {:.2f}".format(r["name"], timer() - st)) callback(msg="Finished embedding({:.2f})! Start to build index!".format(timer() - st)) - init_kb(r) + # cron_logger.info(f"task_executor init_kb index {search.index_name(r["tenant_id"])} embd_mdl {embd_mdl.llm_name} vector length {vector_size}") + init_kb(r, vector_size) chunk_count = len(set([c["_id"] for c in cks])) st = timer() es_r = "" es_bulk_size = 4 for b in range(0, len(cks), es_bulk_size): - es_r = ELASTICSEARCH.bulk(cks[b:b + es_bulk_size], search.index_name(r["tenant_id"])) + es_r = docStoreConn.insert(cks[b:b + es_bulk_size], search.index_name(r["tenant_id"]), r["kb_id"]) if b % 128 == 0: callback(prog=0.8 + 0.1 * (b + 1) / len(cks), msg="") cron_logger.info("Indexing elapsed({}): {:.2f}".format(r["name"], timer() - st)) if es_r: callback(-1, "Insert chunk error, detail info please check ragflow-logs/api/cron_logger.log. Please also check ES status!") - ELASTICSEARCH.deleteByQuery( - Q("match", doc_id=r["doc_id"]), idxnm=search.index_name(r["tenant_id"])) - cron_logger.error(str(es_r)) + docStoreConn.delete({"doc_id": r["doc_id"]}, search.index_name(r["tenant_id"]), r["kb_id"]) + cron_logger.error('Insert chunk error: ' + str(es_r)) else: if TaskService.do_cancel(r["id"]): - ELASTICSEARCH.deleteByQuery( - Q("match", doc_id=r["doc_id"]), idxnm=search.index_name(r["tenant_id"])) + docStoreConn.delete({"doc_id": r["doc_id"]}, search.index_name(r["tenant_id"]), r["kb_id"]) continue callback(1., "Done!") DocumentService.increment_chunk_num( diff --git a/rag/utils/doc_store_conn.py b/rag/utils/doc_store_conn.py new file mode 100644 index 00000000000..462a82424d2 --- /dev/null +++ b/rag/utils/doc_store_conn.py @@ -0,0 +1,244 @@ +from abc import ABC, abstractmethod +from typing import Optional, Union +from dataclasses import dataclass +import numpy as np +import polars as pl +from typing import List, Dict + +DEFAULT_MATCH_VECTOR_TOPN = 10 +DEFAULT_MATCH_SPARSE_TOPN = 10 +VEC = Union[list, np.ndarray] + + +@dataclass +class SparseVector: + indices: list[int] + values: Union[list[float], list[int], None] = None + + def __post_init__(self): + assert (self.values is None) or (len(self.indices) == len(self.values)) + + def to_dict_old(self): + d = {"indices": self.indices} + if self.values is not None: + d["values"] = self.values + return d + + def to_dict(self): + if self.values is None: + raise ValueError("SparseVector.values is None") + result = {} + for i, v in zip(self.indices, self.values): + result[str(i)] = v + return result + + @staticmethod + def from_dict(d): + return SparseVector(d["indices"], d.get("values")) + + def __str__(self): + return f"SparseVector(indices={self.indices}{'' if self.values is None else f', values={self.values}'})" + + def __repr__(self): + return str(self) + + +class MatchTextExpr(ABC): + def __init__( + self, + fields: str, + matching_text: str, + topn: int, + extra_options: dict = dict(), + ): + self.fields = fields + self.matching_text = matching_text + self.topn = topn + self.extra_options = extra_options + + +class MatchDenseExpr(ABC): + def __init__( + self, + vector_column_name: str, + embedding_data: VEC, + embedding_data_type: str, + distance_type: str, + topn: int = DEFAULT_MATCH_VECTOR_TOPN, + extra_options: dict = dict(), + ): + self.vector_column_name = vector_column_name + self.embedding_data = embedding_data + self.embedding_data_type = embedding_data_type + self.distance_type = distance_type + self.topn = topn + self.extra_options = extra_options + + +class MatchSparseExpr(ABC): + def __init__( + self, + vector_column_name: str, + sparse_data: SparseVector | dict, + distance_type: str, + topn: int, + opt_params: Optional[dict] = None, + ): + self.vector_column_name = vector_column_name + self.sparse_data = sparse_data + self.distance_type = distance_type + self.topn = topn + self.opt_params = opt_params + + +class MatchTensorExpr(ABC): + def __init__( + self, + column_name: str, + query_data: VEC, + query_data_type: str, + topn: int, + extra_option: Optional[dict] = None, + ): + self.column_name = column_name + self.query_data = query_data + self.query_data_type = query_data_type + self.topn = topn + self.extra_option = extra_option + + +class FusionExpr(ABC): + def __init__(self, method: str, topn: int, fusion_params: Optional[dict] = None): + self.method = method + self.topn = topn + self.fusion_params = fusion_params + + +MatchExpr = Union[ + MatchTextExpr, MatchDenseExpr, MatchSparseExpr, MatchTensorExpr, FusionExpr +] + + +class OrderByExpr(ABC): + def __init__(self): + self.fields = list() + def asc(self, field: str): + self.fields.append((field, 0)) + return self + def desc(self, field: str): + self.fields.append((field, 1)) + return self + def fields(self): + return self.fields + +class DocStoreConnection(ABC): + """ + Database operations + """ + + @abstractmethod + def health(self) -> dict: + """ + Return the health status of the database. + """ + raise NotImplementedError("Not implemented") + + """ + Table operations + """ + + @abstractmethod + def createIdx(self, indexName: str, knowledgebaseId: str, vectorSize: int): + """ + Create an index with given name + """ + raise NotImplementedError("Not implemented") + + @abstractmethod + def deleteIdx(self, indexName: str, knowledgebaseId: str): + """ + Delete an index with given name + """ + raise NotImplementedError("Not implemented") + + @abstractmethod + def indexExist(self, indexName: str, knowledgebaseId: str) -> bool: + """ + Check if an index with given name exists + """ + raise NotImplementedError("Not implemented") + + """ + CRUD operations + """ + + @abstractmethod + def search( + self, selectFields: list[str], highlight: list[str], condition: dict, matchExprs: list[MatchExpr], orderBy: OrderByExpr, offset: int, limit: int, indexName: str, knowledgebaseIds: list[str] + ) -> list[dict] | pl.DataFrame: + """ + Search with given conjunctive equivalent filtering condition and return all fields of matched documents + """ + raise NotImplementedError("Not implemented") + + @abstractmethod + def get(self, chunkId: str, indexName: str, knowledgebaseIds: list[str]) -> dict | pl.DataFrame: + """ + Get single chunk with given id + """ + raise NotImplementedError("Not implemented") + + @abstractmethod + def insert(self, rows: list[dict], indexName: str, knowledgebaseId: str): + """ + Update or insert a bulk of rows + """ + raise NotImplementedError("Not implemented") + + @abstractmethod + def update(self, condition: dict, newValue: dict, indexName: str, knowledgebaseId: str): + """ + Update rows with given conjunctive equivalent filtering condition + """ + raise NotImplementedError("Not implemented") + + @abstractmethod + def delete(self, condition: dict, indexName: str, knowledgebaseId: str): + """ + Delete rows with given conjunctive equivalent filtering condition + """ + raise NotImplementedError("Not implemented") + + """ + Helper functions for search result + """ + + @abstractmethod + def getTotal(self, res): + raise NotImplementedError("Not implemented") + + @abstractmethod + def getChunkIds(self, res): + raise NotImplementedError("Not implemented") + + @abstractmethod + def getFields(self, res, fields: List[str]) -> Dict[str, dict]: + raise NotImplementedError("Not implemented") + + @abstractmethod + def getHighlight(self, res, keywords: List[str], fieldnm: str): + raise NotImplementedError("Not implemented") + + @abstractmethod + def getAggregation(self, res, fieldnm: str): + raise NotImplementedError("Not implemented") + + """ + SQL + """ + @abstractmethod + def sql(sql: str, fetch_size: int, format: str): + """ + Run the sql generated by text-to-sql + """ + raise NotImplementedError("Not implemented") diff --git a/rag/utils/es_conn.py b/rag/utils/es_conn.py index d39e263f7f8..d58e258eee2 100644 --- a/rag/utils/es_conn.py +++ b/rag/utils/es_conn.py @@ -1,29 +1,30 @@ import re import json import time -import copy +import os +from typing import List, Dict import elasticsearch -from elastic_transport import ConnectionTimeout +import copy from elasticsearch import Elasticsearch -from elasticsearch_dsl import UpdateByQuery, Search, Index -from rag.settings import es_logger +from elasticsearch_dsl import Q, Search, Index +from elastic_transport import ConnectionTimeout +from rag.settings import doc_store_logger from rag import settings from rag.utils import singleton +from api.utils.file_utils import get_project_base_directory +import polars as pl +from rag.utils.doc_store_conn import DocStoreConnection, MatchExpr, OrderByExpr, MatchTextExpr, MatchDenseExpr, FusionExpr +from rag.nlp import is_english, rag_tokenizer +from . import rmSpace -es_logger.info("Elasticsearch version: "+str(elasticsearch.__version__)) +doc_store_logger.info("Elasticsearch sdk version: "+str(elasticsearch.__version__)) @singleton -class ESConnection: +class ESConnection(DocStoreConnection): def __init__(self): self.info = {} - self.conn() - self.idxnm = settings.ES.get("index_name", "") - if not self.es.ping(): - raise Exception("Can't connect to ES cluster") - - def conn(self): for _ in range(10): try: self.es = Elasticsearch( @@ -34,390 +35,267 @@ def conn(self): ) if self.es: self.info = self.es.info() - es_logger.info("Connect to es.") + doc_store_logger.info("Connect to es.") break except Exception as e: - es_logger.error("Fail to connect to es: " + str(e)) + doc_store_logger.error("Fail to connect to es: " + str(e)) time.sleep(1) - - def version(self): + if not self.es.ping(): + raise Exception("Can't connect to ES cluster") v = self.info.get("version", {"number": "5.6"}) v = v["number"].split(".")[0] - return int(v) >= 7 - - def health(self): + if int(v) < 8: + raise Exception(f"ES version must be greater than or equal to 8, current version: {v}") + fp_mapping = os.path.join(get_project_base_directory(), "conf", "mapping.json") + if not os.path.exists(fp_mapping): + raise Exception(f"Mapping file not found at {fp_mapping}") + self.mapping = json.load(open(fp_mapping, "r")) + + """ + Database operations + """ + def health(self) -> dict: return dict(self.es.cluster.health()) - def upsert(self, df, idxnm=""): - res = [] - for d in df: - id = d["id"] - del d["id"] - d = {"doc": d, "doc_as_upsert": "true"} - T = False - for _ in range(10): - try: - if not self.version(): - r = self.es.update( - index=( - self.idxnm if not idxnm else idxnm), - body=d, - id=id, - doc_type="doc", - refresh=True, - retry_on_conflict=100) - else: - r = self.es.update( - index=( - self.idxnm if not idxnm else idxnm), - body=d, - id=id, - refresh=True, - retry_on_conflict=100) - es_logger.info("Successfully upsert: %s" % id) - T = True - break - except Exception as e: - es_logger.warning("Fail to index: " + - json.dumps(d, ensure_ascii=False) + str(e)) - if re.search(r"(Timeout|time out)", str(e), re.IGNORECASE): - time.sleep(3) - continue - self.conn() - T = False - - if not T: - res.append(d) - es_logger.error( - "Fail to index: " + - re.sub( - "[\r\n]", - "", - json.dumps( - d, - ensure_ascii=False))) - d["id"] = id - d["_index"] = self.idxnm - - if not res: + """ + Table operations + """ + def createIdx(self, indexName: str, knowledgebaseId: str, vectorSize: int): + if self.indexExist(indexName, knowledgebaseId): return True - return False - - def bulk(self, df, idx_nm=None): - ids, acts = {}, [] - for d in df: - id = d["id"] if "id" in d else d["_id"] - ids[id] = copy.deepcopy(d) - ids[id]["_index"] = self.idxnm if not idx_nm else idx_nm - if "id" in d: - del d["id"] - if "_id" in d: - del d["_id"] - acts.append( - {"update": {"_id": id, "_index": ids[id]["_index"]}, "retry_on_conflict": 100}) - acts.append({"doc": d, "doc_as_upsert": "true"}) - - res = [] - for _ in range(100): - try: - if elasticsearch.__version__[0] < 8: - r = self.es.bulk( - index=( - self.idxnm if not idx_nm else idx_nm), - body=acts, - refresh=False, - timeout="600s") - else: - r = self.es.bulk(index=(self.idxnm if not idx_nm else - idx_nm), operations=acts, - refresh=False, timeout="600s") - if re.search(r"False", str(r["errors"]), re.IGNORECASE): - return res - - for it in r["items"]: - if "error" in it["update"]: - res.append(str(it["update"]["_id"]) + - ":" + str(it["update"]["error"])) - - return res - except Exception as e: - es_logger.warn("Fail to bulk: " + str(e)) - if re.search(r"(Timeout|time out)", str(e), re.IGNORECASE): - time.sleep(3) - continue - self.conn() - - return res + try: + from elasticsearch.client import IndicesClient + return IndicesClient(self.es).create(index=indexName, + settings=self.mapping["settings"], + mappings=self.mapping["mappings"]) + except Exception as e: + doc_store_logger.error("ES create index error %s ----%s" % (indexName, str(e))) - def bulk4script(self, df): - ids, acts = {}, [] - for d in df: - id = d["id"] - ids[id] = copy.deepcopy(d["raw"]) - acts.append({"update": {"_id": id, "_index": self.idxnm}}) - acts.append(d["script"]) - es_logger.info("bulk upsert: %s" % id) + def deleteIdx(self, indexName: str, knowledgebaseId: str): + try: + return self.es.indices.delete(indexName, allow_no_indices=True) + except Exception as e: + doc_store_logger.error("ES delete index error %s ----%s" % (indexName, str(e))) - res = [] - for _ in range(10): + def indexExist(self, indexName: str, knowledgebaseId: str) -> bool: + s = Index(indexName, self.es) + for i in range(3): try: - if not self.version(): - r = self.es.bulk( - index=self.idxnm, - body=acts, - refresh=False, - timeout="600s", - doc_type="doc") - else: - r = self.es.bulk( - index=self.idxnm, - body=acts, - refresh=False, - timeout="600s") - if re.search(r"False", str(r["errors"]), re.IGNORECASE): - return res - - for it in r["items"]: - if "error" in it["update"]: - res.append(str(it["update"]["_id"])) - - return res + return s.exists() except Exception as e: - es_logger.warning("Fail to bulk: " + str(e)) - if re.search(r"(Timeout|time out)", str(e), re.IGNORECASE): - time.sleep(3) + doc_store_logger.error("ES updateByQuery indexExist: " + str(e)) + if str(e).find("Timeout") > 0 or str(e).find("Conflict") > 0: continue - self.conn() + return False - return res + """ + CRUD operations + """ + def search(self, selectFields: list[str], highlightFields: list[str], condition: dict, matchExprs: list[MatchExpr], orderBy: OrderByExpr, offset: int, limit: int, indexName: str, knowledgebaseIds: list[str]) -> list[dict] | pl.DataFrame: + """ + Refers to https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html + """ + s = Search() + bqry = None + vector_similarity_weight = 0.5 + for m in matchExprs: + if isinstance(m, FusionExpr) and m.method=="weighted_sum" and "weights" in m.fusion_params: + assert len(matchExprs)==3 and isinstance(matchExprs[0], MatchTextExpr) and isinstance(matchExprs[1], MatchDenseExpr) and isinstance(matchExprs[2], FusionExpr) + weights = m.fusion_params["weights"] + vector_similarity_weight = float(weights.split(",")[1]) + for m in matchExprs: + if isinstance(m, MatchTextExpr): + minimum_should_match = "0%" + if "minimum_should_match" in m.extra_options: + minimum_should_match = str(int(m.extra_options["minimum_should_match"] * 100)) + "%" + bqry = Q("bool", + must=Q("query_string", fields=m.fields, + type="best_fields", query=m.matching_text, + minimum_should_match = minimum_should_match, + boost=1), + boost = 1.0 - vector_similarity_weight, + ) + if condition: + for k, v in condition.items(): + if not isinstance(k, str) or not v: + continue + if isinstance(v, list): + bqry.filter.append(Q("terms", **{k: v})) + elif isinstance(v, str) or isinstance(v, int): + bqry.filter.append(Q("term", **{k: v})) + else: + raise Exception(f"Condition `{str(k)}={str(v)}` value type is {str(type(v))}, expected to be int, str or list.") + elif isinstance(m, MatchDenseExpr): + assert(bqry is not None) + similarity = 0.0 + if "similarity" in m.extra_options: + similarity = m.extra_options["similarity"] + s = s.knn(m.vector_column_name, + m.topn, + m.topn * 2, + query_vector = list(m.embedding_data), + filter = bqry.to_dict(), + similarity = similarity, + ) - def rm(self, d): - for _ in range(10): - try: - if not self.version(): - r = self.es.delete( - index=self.idxnm, - id=d["id"], - doc_type="doc", - refresh=True) - else: - r = self.es.delete( - index=self.idxnm, - id=d["id"], - refresh=True, - doc_type="_doc") - es_logger.info("Remove %s" % d["id"]) - return True - except Exception as e: - es_logger.warn("Fail to delete: " + str(d) + str(e)) - if re.search(r"(Timeout|time out)", str(e), re.IGNORECASE): - time.sleep(3) - continue - if re.search(r"(not_found)", str(e), re.IGNORECASE): - return True - self.conn() + s.query = bqry + for field in highlightFields: + s = s.highlight(field) - es_logger.error("Fail to delete: " + str(d)) + if orderBy: + orders = list() + for field, order in orderBy.fields: + order = "asc" if order == 0 else "desc" + orders.append({field: {"order": order, "unmapped_type": "float", + "mode": "avg", "numeric_type": "double"}}) + s = s.sort(*orders) - return False + if limit!=0: + s = s[offset:limit] + q = s.to_dict() + # doc_store_logger.info("ESConnection.search [Q]: " + json.dumps(q)) - def search(self, q, idxnms=None, src=False, timeout="2s"): - if not isinstance(q, dict): - q = Search().query(q).to_dict() - if isinstance(idxnms, str): - idxnms = idxnms.split(",") for i in range(3): try: - res = self.es.search(index=(self.idxnm if not idxnms else idxnms), + res = self.es.search(index=(indexName), body=q, - timeout=timeout, + timeout="600s", # search_type="dfs_query_then_fetch", track_total_hits=True, - _source=src) + _source=True) if str(res.get("timed_out", "")).lower() == "true": raise Exception("Es Timeout.") + # doc_store_logger.info("ESConnection.search res: " + str(res)) return res except Exception as e: - es_logger.error( + doc_store_logger.error( "ES search exception: " + str(e) + - "【Q】:" + + "[Q]: " + str(q)) if str(e).find("Timeout") > 0: continue raise e - es_logger.error("ES search timeout for 3 times!") + doc_store_logger.error("ES search timeout for 3 times!") raise Exception("ES search timeout.") - def sql(self, sql, fetch_size=128, format="json", timeout="2s"): + def get(self, chunkId: str, indexName: str, knowledgebaseId: str) -> dict: for i in range(3): try: - res = self.es.sql.query(body={"query": sql, "fetch_size": fetch_size}, format=format, request_timeout=timeout) - return res - except ConnectionTimeout as e: - es_logger.error("Timeout【Q】:" + sql) - continue - except Exception as e: - raise e - es_logger.error("ES search timeout for 3 times!") - raise ConnectionTimeout() - - - def get(self, doc_id, idxnm=None): - for i in range(3): - try: - res = self.es.get(index=(self.idxnm if not idxnm else idxnm), - id=doc_id) + res = self.es.get(index=(indexName), + id=chunkId) if str(res.get("timed_out", "")).lower() == "true": raise Exception("Es Timeout.") return res except Exception as e: - es_logger.error( + doc_store_logger.error( "ES get exception: " + str(e) + - "【Q】:" + - doc_id) + "[Q]: " + + chunkId) if str(e).find("Timeout") > 0: continue raise e - es_logger.error("ES search timeout for 3 times!") + doc_store_logger.error("ES search timeout for 3 times!") raise Exception("ES search timeout.") - def updateByQuery(self, q, d): - ubq = UpdateByQuery(index=self.idxnm).using(self.es).query(q) - scripts = "" - for k, v in d.items(): - scripts += "ctx._source.%s = params.%s;" % (str(k), str(k)) - ubq = ubq.script(source=scripts, params=d) - ubq = ubq.params(refresh=False) - ubq = ubq.params(slices=5) - ubq = ubq.params(conflicts="proceed") - for i in range(3): - try: - r = ubq.execute() - return True - except Exception as e: - es_logger.error("ES updateByQuery exception: " + - str(e) + "【Q】:" + str(q.to_dict())) - if str(e).find("Timeout") > 0 or str(e).find("Conflict") > 0: - continue - self.conn() - - return False + def insert(self, documents: list[dict], indexName: str, knowledgebaseId: str): + # Refers to https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html + operations = [] + for d in documents: + d_copy = copy.deepcopy(d) + meta_id = d_copy["_id"] + del d_copy["_id"] + operations.append( + {"index": {"_index": indexName, "_id": meta_id}}) + operations.append(d_copy) - def updateScriptByQuery(self, q, scripts, idxnm=None): - ubq = UpdateByQuery( - index=self.idxnm if not idxnm else idxnm).using( - self.es).query(q) - ubq = ubq.script(source=scripts) - ubq = ubq.params(refresh=True) - ubq = ubq.params(slices=5) - ubq = ubq.params(conflicts="proceed") - for i in range(3): + res = [] + for _ in range(100): try: - r = ubq.execute() - return True - except Exception as e: - es_logger.error("ES updateByQuery exception: " + - str(e) + "【Q】:" + str(q.to_dict())) - if str(e).find("Timeout") > 0 or str(e).find("Conflict") > 0: - continue - self.conn() - - return False + r = self.es.bulk(index=(indexName), operations=operations, + refresh=False, timeout="600s") + if re.search(r"False", str(r["errors"]), re.IGNORECASE): + return res - def deleteByQuery(self, query, idxnm=""): - for i in range(3): - try: - r = self.es.delete_by_query( - index=idxnm if idxnm else self.idxnm, - refresh = True, - body=Search().query(query).to_dict()) - return True + for item in r["items"]: + for action in ["create", "delete", "index", "update"]: + if action in item and "error" in item[action]: + res.append(str(item[action]["_id"]) + ":" + str(item[action]["error"])) + return res except Exception as e: - es_logger.error("ES updateByQuery deleteByQuery: " + - str(e) + "【Q】:" + str(query.to_dict())) - if str(e).find("NotFoundError") > 0: return True - if str(e).find("Timeout") > 0 or str(e).find("Conflict") > 0: + doc_store_logger.warning("Fail to bulk: " + str(e)) + if re.search(r"(Timeout|time out)", str(e), re.IGNORECASE): + time.sleep(3) continue + self.conn() + return res - return False - - def update(self, id, script, routing=None): + def update(self, condition: dict, newValue: dict, indexName: str, knowledgebaseId: str): + if 'id' not in condition: + raise Exception("Condition must contain id.") + doc = copy.deepcopy(condition) + id = doc['id'] + del doc['id'] for i in range(3): try: - if not self.version(): - r = self.es.update( - index=self.idxnm, - id=id, - body=json.dumps( - script, - ensure_ascii=False), - doc_type="doc", - routing=routing, - refresh=False) - else: - r = self.es.update(index=self.idxnm, id=id, body=json.dumps(script, ensure_ascii=False), - routing=routing, refresh=False) # , doc_type="_doc") + self.es.update(index=indexName, id=id, doc=doc) return True except Exception as e: - es_logger.error( - "ES update exception: " + str(e) + " id:" + str(id) + ", version:" + str(self.version()) + - json.dumps(script, ensure_ascii=False)) + doc_store_logger.error( + "ES update exception: " + str(e) + " id:" + str(id) + + json.dumps(condition, ensure_ascii=False)) if str(e).find("Timeout") > 0: continue - - return False - - def indexExist(self, idxnm): - s = Index(idxnm if idxnm else self.idxnm, self.es) - for i in range(3): - try: - return s.exists() - except Exception as e: - es_logger.error("ES updateByQuery indexExist: " + str(e)) - if str(e).find("Timeout") > 0 or str(e).find("Conflict") > 0: - continue - return False - def docExist(self, docid, idxnm=None): - for i in range(3): + def delete(self, condition: dict, indexName: str, knowledgebaseId: str): + qry = None + if "_id" in condition: + chunk_ids = condition["_id"] + if not isinstance(chunk_ids, list): + chunk_ids = [chunk_ids] + qry = Q("ids", values=chunk_ids) + else: + qry = Q("bool") + for k, v in condition.items(): + if isinstance(v, list): + qry.must.append(Q("terms", **{k: v})) + elif isinstance(v, str) or isinstance(v, int): + qry.must.append(Q("term", **{k: v})) + else: + raise Exception("Condition value must be int, str or list.") + doc_store_logger.info("ESConnection.delete [Q]: " + json.dumps(qry.to_dict())) + for _ in range(10): try: - return self.es.exists(index=(idxnm if idxnm else self.idxnm), - id=docid) + self.es.delete_by_query( + index=indexName, + body = Search().query(qry).to_dict(), + refresh=True) + return True except Exception as e: - es_logger.error("ES Doc Exist: " + str(e)) - if str(e).find("Timeout") > 0 or str(e).find("Conflict") > 0: + doc_store_logger.warning("Fail to delete: " + str(filter) + str(e)) + if re.search(r"(Timeout|time out)", str(e), re.IGNORECASE): + time.sleep(3) continue + if re.search(r"(not_found)", str(e), re.IGNORECASE): + return True + self.conn() return False - def createIdx(self, idxnm, mapping): - try: - if elasticsearch.__version__[0] < 8: - return self.es.indices.create(idxnm, body=mapping) - from elasticsearch.client import IndicesClient - return IndicesClient(self.es).create(index=idxnm, - settings=mapping["settings"], - mappings=mapping["mappings"]) - except Exception as e: - es_logger.error("ES create index error %s ----%s" % (idxnm, str(e))) - - def deleteIdx(self, idxnm): - try: - return self.es.indices.delete(idxnm, allow_no_indices=True) - except Exception as e: - es_logger.error("ES delete index error %s ----%s" % (idxnm, str(e))) + """ + Helper functions for search result + """ def getTotal(self, res): if isinstance(res["hits"]["total"], type({})): return res["hits"]["total"]["value"] return res["hits"]["total"] - def getDocIds(self, res): + def getChunkIds(self, res): return [d["_id"] for d in res["hits"]["hits"]] - def getSource(self, res): + def __getSource(self, res): rr = [] for d in res["hits"]["hits"]: d["_source"]["id"] = d["_id"] @@ -425,40 +303,89 @@ def getSource(self, res): rr.append(d["_source"]) return rr - def scrollIter(self, pagesize=100, scroll_time='2m', q={ - "query": {"match_all": {}}, "sort": [{"updated_at": {"order": "desc"}}]}): - for _ in range(100): - try: - page = self.es.search( - index=self.idxnm, - scroll=scroll_time, - size=pagesize, - body=q, - _source=None - ) - break - except Exception as e: - es_logger.error("ES scrolling fail. " + str(e)) - time.sleep(3) - - sid = page['_scroll_id'] - scroll_size = page['hits']['total']["value"] - es_logger.info("[TOTAL]%d" % scroll_size) - # Start scrolling - while scroll_size > 0: - yield page["hits"]["hits"] - for _ in range(100): - try: - page = self.es.scroll(scroll_id=sid, scroll=scroll_time) - break - except Exception as e: - es_logger.error("ES scrolling fail. " + str(e)) - time.sleep(3) + def getFields(self, res, fields: List[str]) -> Dict[str, dict]: + res_fields = {} + if not fields: + return {} + for d in self.__getSource(res): + m = {n: d.get(n) for n in fields if d.get(n) is not None} + for n, v in m.items(): + if isinstance(v, list): + m[n] = v + continue + if not isinstance(v, str): + m[n] = str(m[n]) + if n.find("tks") > 0: + m[n] = rmSpace(m[n]) - # Update the scroll ID - sid = page['_scroll_id'] - # Get the number of results that we returned in the last scroll - scroll_size = len(page['hits']['hits']) + if m: + res_fields[d["id"]] = m + return res_fields + def getHighlight(self, res, keywords: List[str], fieldnm: str): + ans = {} + for d in res["hits"]["hits"]: + hlts = d.get("highlight") + if not hlts: + continue + txt = "...".join([a for a in list(hlts.items())[0][1]]) + if not is_english(txt.split(" ")): + ans[d["_id"]] = txt + continue -ELASTICSEARCH = ESConnection() + txt = d["_source"][fieldnm] + txt = re.sub(r"[\r\n]", " ", txt, flags=re.IGNORECASE|re.MULTILINE) + txts = [] + for t in re.split(r"[.?!;\n]", txt): + for w in keywords: + t = re.sub(r"(^|[ .?/'\"\(\)!,:;-])(%s)([ .?/'\"\(\)!,:;-])"%re.escape(w), r"\1\2\3", t, flags=re.IGNORECASE|re.MULTILINE) + if not re.search(r"[^<>]+", t, flags=re.IGNORECASE|re.MULTILINE): + continue + txts.append(t) + ans[d["_id"]] = "...".join(txts) if txts else "...".join([a for a in list(hlts.items())[0][1]]) + + return ans + + def getAggregation(self, res, fieldnm: str): + agg_field = "aggs_" + fieldnm + if "aggregations" not in res or agg_field not in res["aggregations"]: + return list() + bkts = res["aggregations"][agg_field]["buckets"] + return [(b["key"], b["doc_count"]) for b in bkts] + + + """ + SQL + """ + def sql(self, sql: str, fetch_size: int, format: str): + doc_store_logger.info(f"ESConnection.sql get sql: {sql}") + sql = re.sub(r"[ `]+", " ", sql) + sql = sql.replace("%", "") + replaces = [] + for r in re.finditer(r" ([a-z_]+_l?tks)( like | ?= ?)'([^']+)'", sql): + fld, v = r.group(1), r.group(3) + match = " MATCH({}, '{}', 'operator=OR;minimum_should_match=30%') ".format( + fld, rag_tokenizer.fine_grained_tokenize(rag_tokenizer.tokenize(v))) + replaces.append( + ("{}{}'{}'".format( + r.group(1), + r.group(2), + r.group(3)), + match)) + + for p, r in replaces: + sql = sql.replace(p, r, 1) + doc_store_logger.info(f"ESConnection.sql to es: {sql}") + + for i in range(3): + try: + res = self.es.sql.query(body={"query": sql, "fetch_size": fetch_size}, format=format, request_timeout="2s") + return res + except ConnectionTimeout: + doc_store_logger.error("ESConnection.sql timeout [Q]: " + sql) + continue + except Exception as e: + doc_store_logger.error(f"ESConnection.sql failure: {sql} => " + str(e)) + return None + doc_store_logger.error("ESConnection.sql timeout for 3 times!") + return None diff --git a/rag/utils/infinity_conn.py b/rag/utils/infinity_conn.py new file mode 100644 index 00000000000..e398fc04437 --- /dev/null +++ b/rag/utils/infinity_conn.py @@ -0,0 +1,402 @@ +import re +from typing import List, Dict +import infinity +from infinity.common import ConflictType +from infinity.index import IndexInfo, IndexType +from infinity.connection_pool import ConnectionPool +from rag import settings +from rag.settings import doc_store_logger +from rag.utils import singleton +import polars as pl +from polars.series.series import Series +from . import rmSpace + +from rag.utils.doc_store_conn import ( + DocStoreConnection, + MatchExpr, + MatchTextExpr, + MatchDenseExpr, + FusionExpr, + OrderByExpr, +) + + +def equivalent_condition_to_str(condition: dict) -> str: + cond = list() + for k, v in condition.items(): + if not isinstance(k, str) or not v: + continue + if isinstance(v, list): + inCond = list() + for item in v: + if isinstance(item, str): + inCond.append(f"'{item}'") + else: + inCond.append(str(item)) + if inCond: + strInCond = ', '.join(inCond) + strInCond = f'{k} IN ({strInCond})' + cond.append(strInCond) + elif isinstance(v, str): + cond.append(f"{k}='{v}'") + else: + cond.append(f"{k}={str(v)}") + return " AND ".join(cond) + + +@singleton +class InfinityConnection(DocStoreConnection): + def __init__(self): + self.dbName = settings.INFINITY.get("db_name", "default_db") + infinity_uri = settings.INFINITY["uri"] + if ":" in infinity_uri: + host, port = infinity_uri.split(":") + infinity_uri = infinity.common.NetworkAddress(host, int(port)) + self.connPool = ConnectionPool(infinity_uri) + doc_store_logger.info(f"Connected to infinity {infinity_uri}.") + + """ + Database operations + """ + + def health(self) -> dict: + """ + Return the health status of the database. + TODO: Infinity-sdk provides health() to wrap `show global variables` and `show tables` + """ + return dict() + + """ + Table operations + """ + + def createIdx(self, indexName: str, knowledgebaseId: str, vectorSize: int): + table_name = f'{indexName}_{knowledgebaseId}' + inf_conn = self.connPool.get_conn() + inf_db = inf_conn.create_database(self.dbName, ConflictType.Ignore) + vector_name = f'q_{vectorSize}_vec' + inf_table = inf_db.create_table( + table_name, + { + "chunk_id": { + "type": "varchar", + "default": "", + }, # ES `_id` for each ES document + "doc_id": {"type": "varchar", "default": ""}, + "kb_id": {"type": "varchar", "default": ""}, + "create_time": {"type": "varchar", "default": ""}, + "create_timestamp_flt": {"type": "float", "default": 0.0}, + "img_id": {"type": "varchar", "default": ""}, + "docnm_kwd": {"type": "varchar", "default": ""}, + "title_tks": {"type": "varchar", "default": ""}, + "title_sm_tks": {"type": "varchar", "default": ""}, + "name_kwd": {"type": "varchar", "default": ""}, + "important_kwd": {"type": "varchar", "default": ""}, + "important_tks": {"type": "varchar", "default": ""}, + "content_with_weight": { + "type": "varchar", + "default": "", + }, # The raw chunk text + "content_ltks": {"type": "varchar", "default": ""}, + "content_sm_ltks": {"type": "varchar", "default": ""}, + vector_name: {"type": f"vector,{vectorSize},float"}, + "page_num_list": {"type": "varchar", "default": ""}, + "top_list": {"type": "varchar", "default": ""}, + "position_list": {"type": "varchar", "default": ""}, + "weight_int": {"type": "integer", "default": 0}, + "weight_flt": {"type": "float", "default": 0.0}, + "rank_int": {"type": "integer", "default": 0}, + "available_int": {"type": "integer", "default": 0}, + "knowledge_graph_kwd": {"type": "varchar", "default": ""}, + "entities_kwd": {"type": "varchar", "default": ""}, + }, + ConflictType.Ignore, + ) + inf_table.create_index( + "q_vec_idx", + IndexInfo( + vector_name, + IndexType.Hnsw, + { + "M": "16", + "ef_construction": "50", + "metric": "cosine", + "encode": "lvq", + }, + ), + ConflictType.Ignore, + ) + inf_table.create_index( + "text_idx0", + IndexInfo("title_tks", IndexType.FullText, {"ANALYZER": "standard"}), + ConflictType.Ignore, + ) + inf_table.create_index( + "text_idx1", + IndexInfo("title_sm_tks", IndexType.FullText, {"ANALYZER": "standard"}), + ConflictType.Ignore, + ) + inf_table.create_index( + "text_idx2", + IndexInfo("important_kwd", IndexType.FullText, {"ANALYZER": "standard"}), + ConflictType.Ignore, + ) + inf_table.create_index( + "text_idx3", + IndexInfo("important_tks", IndexType.FullText, {"ANALYZER": "standard"}), + ConflictType.Ignore, + ) + inf_table.create_index( + "text_idx4", + IndexInfo("content_ltks", IndexType.FullText, {"ANALYZER": "standard"}), + ConflictType.Ignore, + ) + inf_table.create_index( + "text_idx5", + IndexInfo("content_sm_ltks", IndexType.FullText, {"ANALYZER": "standard"}), + ConflictType.Ignore, + ) + inf_table.create_index( + "text_idx6", + IndexInfo("docnm_kwd", IndexType.FullText, {"ANALYZER": "standard"}), + ConflictType.Ignore, + ) + inf_table.create_index( + "text_idx7", + IndexInfo("name_kwd", IndexType.FullText, {"ANALYZER": "standard"}), + ConflictType.Ignore, + ) + inf_table.create_index( + "text_idx8", + IndexInfo("knowledge_graph_kwd", IndexType.FullText, {"ANALYZER": "standard"}), + ConflictType.Ignore, + ) + inf_table.create_index( + "text_idx9", + IndexInfo("entities_kwd", IndexType.FullText, {"ANALYZER": "standard"}), + ConflictType.Ignore, + ) + self.connPool.release_conn(inf_conn) + + def deleteIdx(self, indexName: str, knowledgebaseId: str): + table_name = f'{indexName}_{knowledgebaseId}' + inf_conn = self.connPool.get_conn() + db_instance = inf_conn.get_database(self.dbName) + db_instance.drop_table(table_name, ConflictType.Ignore) + self.connPool.release_conn(inf_conn) + + def indexExist(self, indexName: str, knowledgebaseId: str) -> bool: + table_name = f'{indexName}_{knowledgebaseId}' + try: + inf_conn = self.connPool.get_conn() + db_instance = inf_conn.get_database(self.dbName) + _ = db_instance.get_table(table_name) + self.connPool.release_conn(inf_conn) + return True + except Exception as e: + doc_store_logger.error("INFINITY indexExist: " + str(e)) + return False + + """ + CRUD operations + """ + + def search( + self, selectFields: list[str], highlightFields: list[str], condition: dict, matchExprs: list[MatchExpr], orderBy: OrderByExpr, offset: int, limit: int, indexName: str, knowledgebaseIds: list[str] + ) -> list[dict] | pl.DataFrame: + """ + TODO: Infinity doesn't provide highlight + """ + inf_conn = self.connPool.get_conn() + db_instance = inf_conn.get_database(self.dbName) + df_list = list() + for knowledgebaseId in knowledgebaseIds: + table_name = f'{indexName}_{knowledgebaseId}' + table_instance = db_instance.get_table(table_name) + if 'chunk_id' not in selectFields: + selectFields.append('chunk_id') + builder = table_instance.output(selectFields) + filter_cond = '' + filter_fulltext = '' + if condition: + filter_cond = equivalent_condition_to_str(condition) + for matchExpr in matchExprs: + if isinstance(matchExpr, MatchTextExpr): + if len(filter_cond)!=0 and 'filter' not in matchExpr.extra_options: + matchExpr.extra_options.update({'filter': filter_cond}) + fields = ','.join(matchExpr.fields) + filter_fulltext = f"filter_fulltext('{fields}', '{matchExpr.matching_text}')" + if len(filter_cond)!=0: + filter_fulltext = f'({filter_cond}) AND {filter_fulltext}' + # doc_store_logger.info(f"filter_fulltext: {filter_fulltext}") + minimum_should_match = "0%" + if "minimum_should_match" in matchExpr.extra_options: + minimum_should_match = str(int(matchExpr.extra_options["minimum_should_match"] * 100)) + "%" + matchExpr.extra_options.update({'minimum_should_match': minimum_should_match}) + for k, v in matchExpr.extra_options.items(): + if not isinstance(v, str): + matchExpr.extra_options[k] = str(v) + builder = builder.match_text( + fields, + matchExpr.matching_text, + matchExpr.topn, + matchExpr.extra_options, + ) + elif isinstance(matchExpr, MatchDenseExpr): + if len(filter_cond)!=0 and 'filter' not in matchExpr.extra_options: + matchExpr.extra_options.update({'filter': filter_fulltext}) + for k, v in matchExpr.extra_options.items(): + if not isinstance(v, str): + matchExpr.extra_options[k] = str(v) + builder = builder.match_dense( + matchExpr.vector_column_name, + matchExpr.embedding_data, + matchExpr.embedding_data_type, + matchExpr.distance_type, + matchExpr.topn, + matchExpr.extra_options, + ) + elif isinstance(matchExpr, FusionExpr): + builder = builder.fusion( + matchExpr.method, matchExpr.topn, matchExpr.fusion_params + ) + order_by_expr_list = list() + for order_field in orderBy.fields: + order_by_expr_list.append((order_field[0], order_field[1]==0)) + builder.sort(order_by_expr_list) + builder.offset(offset).limit(limit) + kb_res = builder.to_pl() + df_list.append(kb_res) + self.connPool.release_conn(inf_conn) + res = pl.concat(df_list) + return res + + def get(self, chunkId: str, indexName: str, knowledgebaseIds: list[str]) -> dict | pl.DataFrame: + inf_conn = self.connPool.get_conn() + db_instance = inf_conn.get_database(self.dbName) + df_list = list() + for knowledgebaseId in knowledgebaseIds: + table_name = f'{indexName}_{knowledgebaseId}' + table_instance = db_instance.get_table(table_name) + kb_res = table_instance.output(["*"]).filter(f"chunk_id = '{chunkId}'").to_pl() + df_list.append(kb_res) + self.connPool.release(inf_conn) + res = pl.concat(df_list) + return res + + def insert(self, documents: list[dict], indexName: str, knowledgebaseId: str): + inf_conn = self.connPool.get_conn() + db_instance = inf_conn.get_database(self.dbName) + table_name = f'{indexName}_{knowledgebaseId}' + table_instance = db_instance.get_table(table_name) + for d in documents: + if '_id' in d: + d["chunk_id"] = d["_id"] + del d["_id"] + for k, v in d.items(): + if k.endswith("_kwd") and isinstance(v, list): + d[k] =' '.join(v) + ids = [f"'{d["chunk_id"]}'" for d in documents] + str_ids = ', '.join(ids) + str_filter = f'chunk_id IN ({str_ids})' + table_instance.delete(str_filter) + # for doc in documents: + # doc_store_logger.info(f"insert position_list: {doc['position_list']}") + # doc_store_logger.info(f"InfinityConnection.insert {json.dumps(documents)}") + table_instance.insert(documents) + self.connPool.release_conn(inf_conn) + + def update(self, condition: dict, newValue: dict, indexName: str, knowledgebaseId: str): + # if 'position_list' in newValue: + # doc_store_logger.info(f"upsert position_list: {newValue['position_list']}") + inf_conn = self.connPool.get_conn() + db_instance = inf_conn.get_database(self.dbName) + table_name = f'{indexName}_{knowledgebaseId}' + table_instance = db_instance.get_table(table_name) + filter = equivalent_condition_to_str(condition) + for k, v in newValue.items(): + if k.endswith("_kwd") and isinstance(v, list): + newValue[k] =' '.join(v) + table_instance.update(filter, newValue) + self.connPool.release_conn(inf_conn) + + def delete(self, condition: dict, indexName: str, knowledgebaseId: str): + inf_conn = self.connPool.get_conn() + db_instance = inf_conn.get_database(self.dbName) + table_name = f'{indexName}_{knowledgebaseId}' + filter = equivalent_condition_to_str(condition) + try: + table_instance = db_instance.get_table(table_name) + except Exception: + doc_store_logger.warning(f"Skipped deleting `{filter}` from table {table_name} since the table doesn't exist.") + return + table_instance.delete(filter) + self.connPool.release_conn(inf_conn) + + + """ + Helper functions for search result + """ + def getTotal(self, res): + return len(res) + + def getChunkIds(self, res): + return list(res["chunk_id"]) + + def getFields(self, res, fields: List[str]) -> Dict[str, dict]: + res_fields = {} + if not fields: + return {} + num_rows = len(res) + column_id = res["chunk_id"] + for i in range(num_rows): + chunk_id = column_id[i] + m = {"id": chunk_id} + for fieldnm in fields: + if fieldnm not in res: + m[fieldnm] = None + continue + v = res[fieldnm][i] + if isinstance(v, Series): + v = list(v) + elif fieldnm == "important_kwd": + assert isinstance(v, str) + v = v.split(" ") + else: + if not isinstance(v, str): + v = str(v) + if fieldnm.endswith("_tks"): + v = rmSpace(v) + m[fieldnm] = v + res_fields[chunk_id] = m + return res_fields + + def getHighlight(self, res, keywords: List[str], fieldnm: str): + ans = {} + num_rows = len(res) + column_id = res["chunk_id"] + for i in range(num_rows): + chunk_id = column_id[i] + txt = res[fieldnm][i] + txt = re.sub(r"[\r\n]", " ", txt, flags=re.IGNORECASE|re.MULTILINE) + txts = [] + for t in re.split(r"[.?!;\n]", txt): + for w in keywords: + t = re.sub(r"(^|[ .?/'\"\(\)!,:;-])(%s)([ .?/'\"\(\)!,:;-])"%re.escape(w), r"\1\2\3", t, flags=re.IGNORECASE|re.MULTILINE) + if not re.search(r"[^<>]+", t, flags=re.IGNORECASE|re.MULTILINE): + continue + txts.append(t) + ans[chunk_id] = "...".join(txts) + return ans + + def getAggregation(self, res, fieldnm: str): + """ + TODO: Infinity doesn't provide aggregation + """ + return list() + + """ + SQL + """ + def sql(sql: str, fetch_size: int, format: str): + raise NotImplementedError("Not implemented") diff --git a/sdk/python/ragflow_sdk/modules/session.py b/sdk/python/ragflow_sdk/modules/session.py index 1a0bd3e2af7..51dd191caca 100644 --- a/sdk/python/ragflow_sdk/modules/session.py +++ b/sdk/python/ragflow_sdk/modules/session.py @@ -41,7 +41,7 @@ def ask(self, question: str, stream: bool = False): "document_id": chunk["doc_id"], "document_name": chunk["docnm_kwd"], "dataset_id": chunk["kb_id"], - "image_id": chunk["img_id"], + "img_id": chunk["img_id"], "similarity": chunk["similarity"], "vector_similarity": chunk["vector_similarity"], "term_similarity": chunk["term_similarity"], @@ -76,7 +76,7 @@ def __init__(self, rag, res_dict): self.document_id = "" self.document_name = "" self.dataset_id = "" - self.image_id = "" + self.img_id = "" self.similarity = None self.vector_similarity = None self.term_similarity = None