Skip to content

Commit

Permalink
[Update Success] data set now works for wikitext. Success!
Browse files Browse the repository at this point in the history
  • Loading branch information
exowanderer committed Jan 31, 2024
1 parent af2dcf2 commit aea16e3
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
3 changes: 1 addition & 2 deletions gswikichat/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,8 @@ async def api(q):

result = embedder.run([query])

# print(help(retriever))
results = retriever.run(
query_embedding=result['documents'][0].embedding,
query_embedding=list(result['documents'][0].embedding),
filters=None,
top_k=None,
scale_score=None,
Expand Down
15 changes: 10 additions & 5 deletions gswikichat/vector_store_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,15 @@
input_documents.append(Document(content=v, meta={"src": k}))
elif isinstance(json_obj, list):
for obj_ in json_obj:
meta = obj_['meta']
url = obj_['meta']
content = obj_['content']
# print(f"Loading {meta}")
# print(content)
input_documents.append(Document(content=content, meta=meta))

input_documents.append(
Document(
content=content,
meta={'src': url}
)
)
else:
input_documents = [
Document(
Expand All @@ -56,13 +60,14 @@
)
# document_store.write_documents(input_documents)


# TODO Introduce Jina.AI from HuggingFace. Establish env-variable for trust_...
embedder = SentenceTransformersDocumentEmbedder(
model="sentence-transformers/all-MiniLM-L6-v2"
)
embedder.warm_up()

documents_with_embeddings = embedder.run(input_documents)

document_store.write_documents(
documents=documents_with_embeddings['documents'],
policy=DuplicatePolicy.OVERWRITE
Expand Down

0 comments on commit aea16e3

Please sign in to comment.