Skip to content

Commit

Permalink
Implementación de chatbot Laura
Browse files Browse the repository at this point in the history
  • Loading branch information
Blaister9 committed Oct 3, 2024
1 parent bfe0e58 commit fd7d63b
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 93 deletions.
156 changes: 82 additions & 74 deletions backend/chatbot_laura/chatbot_logic.py
Original file line number Diff line number Diff line change
@@ -1,88 +1,96 @@
from openai import OpenAI
import pandas as pd
import json
import faiss
import numpy as np
import os
import pickle
from django.conf import settings
import pandas as pd
import json
import faiss
import numpy as np
import os
import pickle
from django.conf import settings
from dotenv import load_dotenv

client = OpenAI(api_key=settings.OPENAI_API_KEY)
load_dotenv()

embedding_file = os.path.join(settings.BASE_DIR, "chatbot_laura", "embeddings.pkl")
index_file = os.path.join(settings.BASE_DIR, "chatbot_laura", "faiss_index.index")
# Leer la API key de OpenAI desde las variables de entorno
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=OPENAI_API_KEY)

def get_embedding(text, model="text-embedding-ada-002"):
response = client.embeddings.create(input=text, model=model)
return response.data[0].embedding
# Actualizar las rutas de los archivos
data_dir = os.path.join(settings.BASE_DIR, "data")
embedding_file = os.path.join(data_dir, "embeddings.pkl")
index_file = os.path.join(data_dir, "faiss_index.index")
json_file = os.path.join(data_dir, "preguntas_respuestas_procesadasV1.json")

def save_embeddings(embeddings, file_name):
with open(file_name, 'wb') as f:
pickle.dump(embeddings, f)
def get_embedding(text, model="text-embedding-ada-002"):
response = client.embeddings.create(input=text, model=model)
return response.data[0].embedding

def load_embeddings(file_name):
with open(file_name, 'rb') as f:
return pickle.load(f)
def save_embeddings(embeddings, file_name):
with open(file_name, 'wb') as f:
pickle.dump(embeddings, f)

def process_data(json_file):
with open(json_file, "r") as file:
data = json.load(file)
def load_embeddings(file_name):
with open(file_name, 'rb') as f:
return pickle.load(f)

processed_data = []
for item in data:
if item['type'] == 'qa':
text_for_embedding = f"{item['content']['pregunta']} {item['content']['respuesta']}"
elif item['type'] == 'info':
text_for_embedding = f"{item['content']['titulo']} {item['content'].get('descripcion', '')}"
else:
continue
def process_data(json_file):
with open(json_file, "r") as file:
data = json.load(file)

processed_data.append({
'text_for_embedding': text_for_embedding,
'full_content': item['content'],
'type': item['type'],
'url': item.get('url', ''),
'metadata': item.get('metadata', {})
})
processed_data = []
for item in data:
if item['type'] == 'qa':
text_for_embedding = f"{item['content']['pregunta']} {item['content']['respuesta']}"
elif item['type'] == 'info':
text_for_embedding = f"{item['content']['titulo']} {item['content'].get('descripcion', '')}"
else:
continue

return pd.DataFrame(processed_data)
processed_data.append({
'text_for_embedding': text_for_embedding,
'full_content': item['content'],
'type': item['type'],
'url': item.get('url', ''),
'metadata': item.get('metadata', {})
})

def initialize_or_load_index(df):
if os.path.exists(embedding_file) and os.path.exists(index_file):
embeddings = load_embeddings(embedding_file)
index = faiss.read_index(index_file)
else:
df['embedding'] = df['text_for_embedding'].apply(lambda x: get_embedding(x))
embedding_matrix = np.array(df['embedding'].tolist()).astype('float32')
embedding_matrix /= np.linalg.norm(embedding_matrix, axis=1)[:, None]

index = faiss.IndexFlatIP(embedding_matrix.shape[1])
index.add(embedding_matrix)

save_embeddings(df['embedding'].tolist(), embedding_file)
faiss.write_index(index, index_file)
embeddings = df['embedding'].tolist()
return pd.DataFrame(processed_data)

return index, embeddings
def initialize_or_load_index(df):
if os.path.exists(embedding_file) and os.path.exists(index_file):
embeddings = load_embeddings(embedding_file)
index = faiss.read_index(index_file)
else:
df['embedding'] = df['text_for_embedding'].apply(lambda x: get_embedding(x))
embedding_matrix = np.array(df['embedding'].tolist()).astype('float32')
embedding_matrix /= np.linalg.norm(embedding_matrix, axis=1)[:, None]

index = faiss.IndexFlatIP(embedding_matrix.shape[1])
index.add(embedding_matrix)

save_embeddings(df['embedding'].tolist(), embedding_file)
faiss.write_index(index, index_file)
embeddings = df['embedding'].tolist()

def search(query, df, index, k=3):
query_embedding = np.array(get_embedding(query)).astype('float32')
query_embedding /= np.linalg.norm(query_embedding)
D, I = index.search(np.array([query_embedding]), k)

results = []
for i in range(k):
result = df.iloc[I[0][i]]
results.append({
'content': result['full_content'],
'url': result['url'],
'type': result['type'],
'metadata': result['metadata'],
'similarity_score': float(D[0][i])
})

return results
return index, embeddings

# Initialize data and index
df = process_data(os.path.join(settings.BASE_DIR, "chatbot_laura", "preguntas_respuestas_procesadasV1.json"))
index, embeddings = initialize_or_load_index(df)
def search(query, df, index, k=3):
query_embedding = np.array(get_embedding(query)).astype('float32')
query_embedding /= np.linalg.norm(query_embedding)
D, I = index.search(np.array([query_embedding]), k)

results = []
for i in range(k):
result = df.iloc[I[0][i]]
results.append({
'content': result['full_content'],
'url': result['url'],
'type': result['type'],
'metadata': result['metadata'],
'similarity_score': float(D[0][i])
})

return results

# Initialize data and index
df = process_data(json_file)
index, embeddings = initialize_or_load_index(df)
38 changes: 19 additions & 19 deletions backend/chatbot_laura/views.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
from django.http import JsonResponse
from channels.generic.websocket import AsyncWebsocketConsumer
import json
from .chatbot_logic import search, df, index
from channels.generic.websocket import AsyncWebsocketConsumer
import json
from .chatbot_logic import search, df, index

class ChatConsumer(AsyncWebsocketConsumer):
async def connect(self):
await self.accept()
class ChatConsumer(AsyncWebsocketConsumer):
async def connect(self):
await self.accept()

async def disconnect(self, close_code):
pass
async def disconnect(self, close_code):
pass

async def receive(self, text_data):
text_data_json = json.loads(text_data)
query = text_data_json['message']
async def receive(self, text_data):
text_data_json = json.loads(text_data)
query = text_data_json['message']

results = search(query, df, index)
results = search(query, df, index)

await self.send(text_data=json.dumps({
'message': results
}))
await self.send(text_data=json.dumps({
'message': results
}))

def search_view(request):
query = request.GET.get('query', '')
results = search(query, df, index)
return JsonResponse({'results': results})
def search_view(request):
query = request.GET.get('query', '')
results = search(query, df, index)
return JsonResponse({'results': results})

0 comments on commit fd7d63b

Please sign in to comment.