Skip to content

Commit

Permalink
changes to main.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Sathvik21S21Rao committed Aug 31, 2024
1 parent dd65ccd commit 80681cc
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 9 deletions.
2 changes: 1 addition & 1 deletion Community_Generation/communitySummary.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def update_communities(self):
response=self.chain.invoke({"input_text":community_description})
if isinstance(response,AIMessage):
response=response.content
print(response)

response=response[response.find("{"):response.rfind("}")]+"}"

community_class=Community.model_validate(from_json(response,allow_partial=True))
Expand Down
2 changes: 1 addition & 1 deletion Graph_Generation/graph_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def _extract_relations(self,entities,input_text,retries=3)->Relation:

chain=self.templates[2] | self.llm
response=chain.invoke({"entities":entities,"input_text":input_text})
if isinstance(response,langchain_core.messages.ai.AIMessage):
if isinstance(response,langchain_core.messages.ai.AIMessage) or not isinstance(response,str):
response=response.content
response=response[response.find("{"):response.rfind("}")+1]

Expand Down
2 changes: 1 addition & 1 deletion Graph_Retrieval/context_based_node_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __init__(self, llm, graph,node2vec_model_path,data_dir="node_data",community
self.vectorstore=vectorstore

self.community=CommunitySummary(self.graph,self.llm,self.community_data,self.create)
print("Communities",os.listdir(self.community_data))


def setup(self):
if self.create:
Expand Down
11 changes: 8 additions & 3 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
data_path: # folder or file containing the data
data_path: data
chunk_path: chunk
chunk_size: 512
chunk_overlap: 128
api_key: # openai api key or groq api key. Use key if provided else uses ollama model specified.
server: # Groq or OpenAI or Ollama
model: # specify the model to use.
server: # Groq or OpenAI or Ollama
model: # specify the model to use.
embedding_server: #ollama or huggingface or openai
embedding_model: # embedding model
temperature: 0.5
use_sentence_embeddings: False #True if you want to use sentence embeddings
node2vec_model_path: ./model/node2vec.model
sentence_model_path: ./model/sentence.model
node2vec_embeddings_path: ./embeddings/node2vec_embeddings.npy
vectorstore_path: ./model/vectorstore
graph_file_path: ./graph/graph.pkl
collection_name: node_data
node_data_dir: ./node_data
Expand All @@ -18,3 +22,4 @@ sentence_embeddings_path: embeddings/sentence_embeddings.npy
node_names_path: ./embeddings/node_names.npy
sentence_model_name: all-MiniLM-L6-v2
faiss_model_path: ./model/faiss.index

7 changes: 4 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def initialize_embedding_model(config):
sentence_model_name = config.get("sentence_model_name")
use_sentence_embeddings = config.get("use_sentence_embeddings")
chunk_path=config.get("chunk_path")
vectorstore_path=config.get("vectorstore_path")



Expand All @@ -96,7 +97,7 @@ def main(create_graph):
data = DataLoader(path=config["data_path"], chunk_size=chunk_size, chunk_overlap=chunk_overlap).load()
NxData = PrepareDataForNX().execute(data, chain)
graph = nx.Graph()
vectorstore=VectorStore(embedding=initialize_embedding_model(config),persist_dir=node_vectorstore_path,collection_name=collection_name,create=True,documents=data,metadata=[{"chunk_id":i} for i in range(len(data))])
vectorstore=VectorStore(embedding=initialize_embedding_model(config),persist_dir=vectorstore_path,collection_name=collection_name,create=True,documents=data,metadata=[{"chunk_id":i} for i in range(len(data))])
graph.add_nodes_from(NxData[0])
graph.add_edges_from(NxData[1])

Expand Down Expand Up @@ -161,10 +162,10 @@ def main(create_graph):
updates="\n".join(updates)
updates=DataLoader(path=None,chunk_overlap=chunk_overlap,chunk_size=chunk_size).load_text(updates)

start_chunk=len(VectorStore(embedding=initialize_embedding_model(config),persist_dir=node_vectorstore_path,collection_name=collection_name,create=False,update=False).get_vectorstore().get()["documents"])
start_chunk=len(VectorStore(embedding=initialize_embedding_model(config),persist_dir=vectorstore_path,collection_name=collection_name,create=False,update=False).get_vectorstore().get()["documents"])


vectorstore=VectorStore(embedding=initialize_embedding_model(config),persist_dir=node_vectorstore_path,collection_name=collection_name,create=False,update=True,documents=updates,metadata=[{"chunk_id":i} for i in range(start_chunk,start_chunk+len(updates))])
vectorstore=VectorStore(embedding=initialize_embedding_model(config),persist_dir=vectorstore_path,collection_name=collection_name,create=False,update=True,documents=updates,metadata=[{"chunk_id":i} for i in range(start_chunk,start_chunk+len(updates))])

sync.syncTempFolder()
chain=GraphExtractionChain(llm=llm)
Expand Down
10 changes: 10 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[![Knowledge graph workflow](https://github.com/Sathvik21S21Rao/KnowledgeGraph/actions/workflows/main.yml/badge.svg)](https://github.com/Sathvik21S21Rao/KnowledgeGraph/actions/workflows/main.yml)

# GraphRAG

GraphRAG is a **Python project** that uses **graph-based methods** for information retrieval. It uses **language models** and **embeddings** to create and interact with a **graph of data**.
Expand Down Expand Up @@ -82,6 +83,15 @@ When running `main.py`, you will be asked whether you want to create a **new gra

Once the application is running, you can interact with it by typing **queries** into the console. The application will respond with information retrieved from the graph based on your query. To end the conversation, type **'exit'**.

## Future scope

1. An interactive UI for seamless user experience
2. Integrating vector search along with graph search
3. Allowing users to maintain multiple graphs.
4. Shifting from in memory graph computation to disk based retrieval

## Tested to work with Groq(LLama3.1-70b) and Ollama Embedding(nomic-embed-text)

## Visualisation of graph

- Built using the data in [testing_data.py](https://github.com/Sathvik21S21Rao/KnowledgeGraph/blob/main/testing_data.py)
Expand Down

0 comments on commit 80681cc

Please sign in to comment.