Skip to content

Commit

Permalink
Add custom KG insertion
Browse files Browse the repository at this point in the history
  • Loading branch information
LarFii committed Nov 25, 2024
1 parent ac1587a commit abc56d6
Show file tree
Hide file tree
Showing 4 changed files with 255 additions and 1 deletion.
44 changes: 44 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ This repository hosts the code of LightRAG. The structure of this code is based
</div>

## 🎉 News
- [x] [2024.11.25]🎯📢LightRAG now supports [custom KG insertion](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#insert-custom-kg).
- [x] [2024.11.19]🎯📢A comprehensive guide to LightRAG is now available on [LearnOpenCV](https://learnopencv.com/lightrag). Many thanks to the blog author!
- [x] [2024.11.12]🎯📢LightRAG now supports [Oracle Database 23ai for all storage types (KV, vector, and graph)](https://github.com/HKUDS/LightRAG/blob/main/examples/lightrag_oracle_demo.py).
- [x] [2024.11.11]🎯📢LightRAG now supports [deleting entities by their names](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#delete-entity).
Expand Down Expand Up @@ -327,6 +328,49 @@ with open("./newText.txt") as f:
rag.insert(f.read())
```

### Insert Custom KG

```python
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(
embedding_dim=embedding_dimension,
max_token_size=8192,
func=embedding_func,
),
)

custom_kg = {
"entities": [
{
"entity_name": "CompanyA",
"entity_type": "Organization",
"description": "A major technology company",
"source_id": "Source1"
},
{
"entity_name": "ProductX",
"entity_type": "Product",
"description": "A popular product developed by CompanyA",
"source_id": "Source1"
}
],
"relationships": [
{
"src_id": "CompanyA",
"tgt_id": "ProductX",
"description": "CompanyA develops ProductX",
"keywords": "develop, produce",
"weight": 1.0,
"source_id": "Source1"
}
]
}

rag.insert_custom_kg(custom_kg)
```

### Delete Entity

```python
Expand Down
108 changes: 108 additions & 0 deletions examples/insert_custom_kg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import os
from lightrag import LightRAG, QueryParam
from lightrag.llm import gpt_4o_mini_complete
#########
# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
# import nest_asyncio
# nest_asyncio.apply()
#########

WORKING_DIR = "./custom_kg"

if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)

rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model
# llm_model_func=gpt_4o_complete # Optionally, use a stronger model
)

custom_kg = {
"entities": [
{
"entity_name": "CompanyA",
"entity_type": "Organization",
"description": "A major technology company",
"source_id": "Source1"
},
{
"entity_name": "ProductX",
"entity_type": "Product",
"description": "A popular product developed by CompanyA",
"source_id": "Source1"
},
{
"entity_name": "PersonA",
"entity_type": "Person",
"description": "A renowned researcher in AI",
"source_id": "Source2"
},
{
"entity_name": "UniversityB",
"entity_type": "Organization",
"description": "A leading university specializing in technology and sciences",
"source_id": "Source2"
},
{
"entity_name": "CityC",
"entity_type": "Location",
"description": "A large metropolitan city known for its culture and economy",
"source_id": "Source3"
},
{
"entity_name": "EventY",
"entity_type": "Event",
"description": "An annual technology conference held in CityC",
"source_id": "Source3"
},
{
"entity_name": "CompanyD",
"entity_type": "Organization",
"description": "A financial services company specializing in insurance",
"source_id": "Source4"
},
{
"entity_name": "ServiceZ",
"entity_type": "Service",
"description": "An insurance product offered by CompanyD",
"source_id": "Source4"
}
],
"relationships": [
{
"src_id": "CompanyA",
"tgt_id": "ProductX",
"description": "CompanyA develops ProductX",
"keywords": "develop, produce",
"weight": 1.0,
"source_id": "Source1"
},
{
"src_id": "PersonA",
"tgt_id": "UniversityB",
"description": "PersonA works at UniversityB",
"keywords": "employment, affiliation",
"weight": 0.9,
"source_id": "Source2"
},
{
"src_id": "CityC",
"tgt_id": "EventY",
"description": "EventY is hosted in CityC",
"keywords": "host, location",
"weight": 0.8,
"source_id": "Source3"
},
{
"src_id": "CompanyD",
"tgt_id": "ServiceZ",
"description": "CompanyD provides ServiceZ",
"keywords": "provide, offer",
"weight": 1.0,
"source_id": "Source4"
}
]
}

rag.insert_custom_kg(custom_kg)
2 changes: 1 addition & 1 deletion lightrag/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam

__version__ = "1.0.1"
__version__ = "1.0.2"
__author__ = "Zirui Guo"
__url__ = "https://github.com/HKUDS/LightRAG"
102 changes: 102 additions & 0 deletions lightrag/lightrag.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,108 @@ async def _insert_done(self):
tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
await asyncio.gather(*tasks)

def insert_custom_kg(self, custom_kg: dict):
loop = always_get_an_event_loop()
return loop.run_until_complete(self.ainsert_custom_kg(custom_kg))

async def ainsert_custom_kg(self, custom_kg: dict):
update_storage = False
try:
# Insert entities into knowledge graph
all_entities_data = []
for entity_data in custom_kg.get("entities", []):
entity_name = f'"{entity_data["entity_name"].upper()}"'
entity_type = entity_data.get("entity_type", "UNKNOWN")
description = entity_data.get("description", "No description provided")
source_id = entity_data["source_id"]

# Prepare node data
node_data = {
"entity_type": entity_type,
"description": description,
"source_id": source_id,
}
# Insert node data into the knowledge graph
await self.chunk_entity_relation_graph.upsert_node(
entity_name, node_data=node_data
)
node_data["entity_name"] = entity_name
all_entities_data.append(node_data)
update_storage = True

# Insert relationships into knowledge graph
all_relationships_data = []
for relationship_data in custom_kg.get("relationships", []):
src_id = f'"{relationship_data["src_id"].upper()}"'
tgt_id = f'"{relationship_data["tgt_id"].upper()}"'
description = relationship_data["description"]
keywords = relationship_data["keywords"]
weight = relationship_data.get("weight", 1.0)
source_id = relationship_data["source_id"]

# Check if nodes exist in the knowledge graph
for need_insert_id in [src_id, tgt_id]:
if not (
await self.chunk_entity_relation_graph.has_node(need_insert_id)
):
await self.chunk_entity_relation_graph.upsert_node(
need_insert_id,
node_data={
"source_id": source_id,
"description": "UNKNOWN",
"entity_type": "UNKNOWN",
},
)

# Insert edge into the knowledge graph
await self.chunk_entity_relation_graph.upsert_edge(
src_id,
tgt_id,
edge_data={
"weight": weight,
"description": description,
"keywords": keywords,
"source_id": source_id,
},
)
edge_data = {
"src_id": src_id,
"tgt_id": tgt_id,
"description": description,
"keywords": keywords,
}
all_relationships_data.append(edge_data)
update_storage = True

# Insert entities into vector storage if needed
if self.entities_vdb is not None:
data_for_vdb = {
compute_mdhash_id(dp["entity_name"], prefix="ent-"): {
"content": dp["entity_name"] + dp["description"],
"entity_name": dp["entity_name"],
}
for dp in all_entities_data
}
await self.entities_vdb.upsert(data_for_vdb)

# Insert relationships into vector storage if needed
if self.relationships_vdb is not None:
data_for_vdb = {
compute_mdhash_id(dp["src_id"] + dp["tgt_id"], prefix="rel-"): {
"src_id": dp["src_id"],
"tgt_id": dp["tgt_id"],
"content": dp["keywords"]
+ dp["src_id"]
+ dp["tgt_id"]
+ dp["description"],
}
for dp in all_relationships_data
}
await self.relationships_vdb.upsert(data_for_vdb)
finally:
if update_storage:
await self._insert_done()

def query(self, query: str, param: QueryParam = QueryParam()):
loop = always_get_an_event_loop()
return loop.run_until_complete(self.aquery(query, param))
Expand Down

0 comments on commit abc56d6

Please sign in to comment.