Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

When choose retrieval_method= "multi_qns" get error: "Expected document to be a str" #47

Open
linuaries opened this issue Dec 9, 2024 · 0 comments

Comments

@linuaries
Copy link

I have modified this repo by change openai with AzureOpenAI, and it works with "navie" and "HYDE"

here is the visualization with navie RAG
image

But fails with multi_qns, any solution?

Desktop (please complete the following information):

  • OS: WSL2 with Ubuntu20.04
  • Version v0.01.10

Additional context
Add any other context about the problem here.

Here is the error information:
"{
"name": "ValueError",
"message": "Expected document to be a str, got ['What is Castrol AWS 150?', 'What is the ASTM D892 test?', 'Has Castrol AWS 150 been tested using ASTM D892?', 'Where can I find the test results for Castrol AWS 150?', 'What are the typical results for the ASTM D892 test?'] in query.",
"stack": "---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
File ~/.local/lib/python3.11/site-packages/chromadb/api/models/CollectionCommon.py:90, in validation_context..decorator..wrapper(self, *args, **kwargs)
89 try:
---> 90 return func(self, *args, **kwargs)
91 except Exception as e:

File ~/.local/lib/python3.11/site-packages/chromadb/api/models/CollectionCommon.py:293, in CollectionCommon._validate_and_prepare_query_request(self, query_embeddings, query_texts, query_images, query_uris, n_results, where, where_document, include)
292 # Validate
--> 293 validate_base_record_set(record_set=query_records)
294 validate_filter_set(filter_set=filters)

File ~/.local/lib/python3.11/site-packages/chromadb/api/types.py:204, in validate_base_record_set(record_set)
203 if record_set["documents"] is not None:
--> 204 validate_documents(
205 documents=record_set["documents"],
206 # If embeddings are present, some documents can be None
207 nullable=(record_set["embeddings"] is not None),
208 )
209 if record_set["images"] is not None:

File ~/.local/lib/python3.11/site-packages/chromadb/api/types.py:800, in validate_documents(documents, nullable)
799 if not is_document(document):
--> 800 raise ValueError(f"Expected document to be a str, got {document}")

ValueError: Expected document to be a str, got ['What is Castrol AWS 150?', 'What is the ASTM D892 test?', 'Has Castrol AWS 150 been tested using ASTM D892?', 'Where can I find the test results for Castrol AWS 150?', 'What are the typical results for the ASTM D892 test?']

During handling of the above exception, another exception occurred:

ValueError Traceback (most recent call last)
Cell In[8], line 7
1 client_openai = RAGxplorer(embedding_model="text-embedding-3-small")
2 client_openai.load_pdf(
3 document_path=pdf_path,
4 chunk_size=200,
5 chunk_overlap=50,
6 verbose=True)
----> 7 client_openai.visualize_query(
8 query=question,
9 #"What measures are being taken to cultivate and attract top AI talent to Singapore?",
10 retrieval_method= "multi_qns",#"HyDE",#"naive", "HyDE", "multi_qns"
11 top_k=6,
12 query_shape_size=10
13 )

File ~/miniconda3/envs/env_rag/lib/python3.11/site-packages/ragxplorer-0.1.10-py3.11.egg/ragxplorer/ragxplorer.py:179, in RAGxplorer.visualize_query(self, query, retrieval_method, top_k, query_shape_size, import_projection_data)
175 raise OSError("AZURE_OPENAI_ENDPOINT is not set")
177 self._query.actual_search_queries = generate_sub_qn(query=self._query.original_query)
--> 179 self._query.retrieved_docs = query_chroma(chroma_collection=self._vectordb,
180 query=self._query.actual_search_queries,
181 top_k=top_k)
183 self._VizData.base_df.loc[self._VizData.base_df['id'].isin(self._query.retrieved_docs), "category"] = "Retrieved"
185 self._VizData.visualisation_df = pd.concat([self._VizData.base_df, self._VizData.query_df], axis = 0)

File ~/miniconda3/envs/env_rag/lib/python3.11/site-packages/ragxplorer-0.1.10-py3.11.egg/ragxplorer/rag.py:100, in query_chroma(chroma_collection, query, top_k)
88 def query_chroma(chroma_collection: chromadb.Collection, query: str, top_k: int) -> List[str]:
89 """
90 Queries the Chroma collection for the top_k most relevant chunks to the input query.
91
(...)
98 A list of retrieved chunk IDs.
99 """
--> 100 results = chroma_collection.query(query_texts=[query], n_results=top_k, include=['documents', 'embeddings'])
101 retrieved_id = results['ids'][0]
102 return retrieved_id

File ~/.local/lib/python3.11/site-packages/chromadb/api/models/Collection.py:210, in Collection.query(self, query_embeddings, query_texts, query_images, query_uris, n_results, where, where_document, include)
167 def query(
168 self,
169 query_embeddings: Optional[
(...)
185 ],
186 ) -> QueryResult:
187 """Get the n_results nearest neighbor embeddings for provided query_embeddings or query_texts.
188
189 Args:
(...)
207
208 """
--> 210 query_request = self._validate_and_prepare_query_request(
211 query_embeddings=query_embeddings,
212 query_texts=query_texts,
213 query_images=query_images,
214 query_uris=query_uris,
215 n_results=n_results,
216 where=where,
217 where_document=where_document,
218 include=include,
219 )
221 query_results = self._client._query(
222 collection_id=self.id,
223 query_embeddings=query_request["embeddings"],
(...)
229 database=self.database,
230 )
232 return self._transform_query_response(
233 response=query_results, include=query_request["include"]
234 )

File ~/.local/lib/python3.11/site-packages/chromadb/api/models/CollectionCommon.py:93, in validation_context..decorator..wrapper(self, *args, **kwargs)
91 except Exception as e:
92 msg = f"{str(e)} in {name}."
---> 93 raise type(e)(msg).with_traceback(e.traceback)

File ~/.local/lib/python3.11/site-packages/chromadb/api/models/CollectionCommon.py:90, in validation_context..decorator..wrapper(self, *args, **kwargs)
87 @functools.wraps(func)
88 def wrapper(self: Any, *args: Any, **kwargs: Any) -> T:
89 try:
---> 90 return func(self, *args, **kwargs)
91 except Exception as e:
92 msg = f"{str(e)} in {name}."

File ~/.local/lib/python3.11/site-packages/chromadb/api/models/CollectionCommon.py:293, in CollectionCommon._validate_and_prepare_query_request(self, query_embeddings, query_texts, query_images, query_uris, n_results, where, where_document, include)
287 filters = FilterSet(
288 where=where,
289 where_document=where_document,
290 )
292 # Validate
--> 293 validate_base_record_set(record_set=query_records)
294 validate_filter_set(filter_set=filters)
295 validate_include(include=include)

File ~/.local/lib/python3.11/site-packages/chromadb/api/types.py:204, in validate_base_record_set(record_set)
202 validate_embeddings(embeddings=record_set["embeddings"])
203 if record_set["documents"] is not None:
--> 204 validate_documents(
205 documents=record_set["documents"],
206 # If embeddings are present, some documents can be None
207 nullable=(record_set["embeddings"] is not None),
208 )
209 if record_set["images"] is not None:
210 validate_images(images=record_set["images"])

File ~/.local/lib/python3.11/site-packages/chromadb/api/types.py:800, in validate_documents(documents, nullable)
798 continue
799 if not is_document(document):
--> 800 raise ValueError(f"Expected document to be a str, got {document}")

ValueError: Expected document to be a str, got ['What is Castrol AWS 150?', 'What is the ASTM D892 test?', 'Has Castrol AWS 150 been tested using ASTM D892?', 'Where can I find the test results for Castrol AWS 150?', 'What are the typical results for the ASTM D892 test?'] in query."
}"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant