Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable IRIS to use FAQs #187

Merged
merged 24 commits into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
2fe490b
Initial setup of faq ingestion and deletion
cremertim Dec 19, 2024
19c0d81
Initial setup of faq retrieval
cremertim Dec 25, 2024
933c045
Further faq retrieval.
cremertim Dec 26, 2024
54f5167
Working FAQ retrival
cremertim Dec 27, 2024
3b12cd6
Removed logging
cremertim Jan 6, 2025
1017477
Removed logging, added Links for FAQ answer, updated prompts
cremertim Jan 8, 2025
e8f611c
Added language
cremertim Jan 13, 2025
cfb1240
Merge branch 'main' into feature/faq/basic-faq-pipe
cremertim Jan 13, 2025
edfbfa2
Increased faq limit
cremertim Jan 13, 2025
ce2904a
Merge remote-tracking branch 'origin/feature/faq/basic-faq-pipe' into…
cremertim Jan 13, 2025
27abf69
Reformat
cremertim Jan 18, 2025
82a55c3
Fix coderabit
cremertim Jan 24, 2025
f1621ae
Fix docs
cremertim Jan 24, 2025
dcb3e15
Fixed the linter checks
cremertim Jan 27, 2025
bf52342
Fixed the linter checks
cremertim Jan 27, 2025
6163e6f
Refactored FAQ retrival pipeline to reduce code duplication
cremertim Jan 29, 2025
2fcdf4e
Refactored FAQ retrival pipeline to reduce code duplication
cremertim Jan 30, 2025
430b777
Refactored FAQ retrival pipeline to reduce code duplication
cremertim Jan 30, 2025
1242ef0
Merge branch 'main' into feature/faq/basic-faq-pipe
cremertim Jan 30, 2025
cf2feac
Remove unused import
cremertim Jan 30, 2025
dd49dc0
fix typo
cremertim Jan 30, 2025
a36dfcc
Merge branch 'main' into feature/faq/basic-faq-pipe
cremertim Jan 30, 2025
0921e1a
linter
cremertim Jan 30, 2025
1ca23bd
Merge remote-tracking branch 'origin/feature/faq/basic-faq-pipe' into…
cremertim Jan 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions app/common/PipelineEnum.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,6 @@ class PipelineEnum(str, Enum):
IRIS_SUMMARY_PIPELINE = "IRIS_SUMMARY_PIPELINE"
IRIS_LECTURE_RETRIEVAL_PIPELINE = "IRIS_LECTURE_RETRIEVAL_PIPELINE"
IRIS_LECTURE_INGESTION = "IRIS_LECTURE_INGESTION"
IRIS_FAQ_INGESTION = "IRIS_FAQ_INGESTION"
IRIS_FAQ_RETRIEVAL_PIPELINE = "IRIS_FAQ_RETRIEVAL_PIPELINE"
NOT_SET = "NOT_SET"
10 changes: 10 additions & 0 deletions app/domain/data/faq_dto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from pydantic import BaseModel, Field


class FaqDTO(BaseModel):
faq_id: int = Field(alias="faqId")
course_id: int = Field(alias="courseId")
question_title: str = Field(alias="questionTitle")
question_answer: str = Field(alias="questionAnswer")
course_name: str = Field(default="", alias="courseName")
course_description: str = Field(default="", alias="courseDescription")
9 changes: 9 additions & 0 deletions app/domain/ingestion/deletionPipelineExecutionDto.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from pydantic import Field

from app.domain import PipelineExecutionDTO, PipelineExecutionSettingsDTO
from app.domain.data.faq_dto import FaqDTO
from app.domain.data.lecture_unit_dto import LectureUnitDTO
from app.domain.status.stage_dto import StageDTO

Expand All @@ -13,3 +14,11 @@ class LecturesDeletionExecutionDto(PipelineExecutionDTO):
initial_stages: Optional[List[StageDTO]] = Field(
default=None, alias="initialStages"
)


class FaqDeletionExecutionDto(PipelineExecutionDTO):
faq: FaqDTO = Field(..., alias="pyrisFaqWebhookDTO")
settings: Optional[PipelineExecutionSettingsDTO]
initial_stages: Optional[List[StageDTO]] = Field(
default=None, alias="initialStages"
)
9 changes: 9 additions & 0 deletions app/domain/ingestion/ingestion_pipeline_execution_dto.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from pydantic import Field

from app.domain import PipelineExecutionDTO, PipelineExecutionSettingsDTO
from app.domain.data.faq_dto import FaqDTO
from app.domain.data.lecture_unit_dto import LectureUnitDTO
from app.domain.status.stage_dto import StageDTO

Expand All @@ -13,3 +14,11 @@ class IngestionPipelineExecutionDto(PipelineExecutionDTO):
initial_stages: Optional[List[StageDTO]] = Field(
default=None, alias="initialStages"
)


class FaqIngestionPipelineExecutionDto(PipelineExecutionDTO):
faq: FaqDTO = Field(..., alias="pyrisFaqWebhookDTO")
settings: Optional[PipelineExecutionSettingsDTO]
initial_stages: Optional[List[StageDTO]] = Field(
default=None, alias="initialStages"
)
57 changes: 51 additions & 6 deletions app/pipeline/chat/course_chat_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
InteractionSuggestionPipeline,
)
from .lecture_chat_pipeline import LectureChatPipeline
from ..shared.citation_pipeline import CitationPipeline
from ..shared.citation_pipeline import CitationPipeline, InformationType
from ..shared.utils import generate_structured_tools_from_functions
from ...common.message_converters import convert_iris_message_to_langchain_message
from ...common.pyris_message import PyrisMessage
Expand All @@ -42,8 +42,11 @@
)
from ...domain import CourseChatPipelineExecutionDTO
from app.common.PipelineEnum import PipelineEnum
from ...retrieval.faq_retrieval import FaqRetrieval
from ...retrieval.faq_retrieval_utils import should_allow_faq_tool, format_faqs
from ...retrieval.lecture_retrieval import LectureRetrieval
from ...vector_database.database import VectorDatabase
from ...vector_database.faq_schema import FaqSchema
from ...vector_database.lecture_schema import LectureSchema
from ...web.status.status_update import (
CourseChatStatusCallback,
Expand Down Expand Up @@ -81,6 +84,7 @@ class CourseChatPipeline(Pipeline):
variant: str
event: str | None
retrieved_paragraphs: List[dict] = None
retrieved_faqs: List[dict] = None

def __init__(
self,
Expand All @@ -100,19 +104,22 @@ def __init__(
requirements=RequirementList(
gpt_version_equivalent=4.5,
)
), completion_args=completion_args
),
completion_args=completion_args,
)
self.llm_small = IrisLangchainChatModel(
request_handler=CapabilityRequestHandler(
requirements=RequirementList(
gpt_version_equivalent=4.25,
)
), completion_args=completion_args
),
completion_args=completion_args,
)
self.callback = callback

self.db = VectorDatabase()
self.retriever = LectureRetrieval(self.db.client)
self.lecture_retriever = LectureRetrieval(self.db.client)
self.faq_retriever = FaqRetrieval(self.db.client)
self.suggestion_pipeline = InteractionSuggestionPipeline(variant="course")
self.citation_pipeline = CitationPipeline()

Expand Down Expand Up @@ -280,7 +287,7 @@ def lecture_content_retrieval() -> str:
Only use this once.
"""
self.callback.in_progress("Retrieving lecture content ...")
self.retrieved_paragraphs = self.retriever(
self.retrieved_paragraphs = self.lecture_retriever(
chat_history=history,
student_query=query.contents[0].text_content,
result_limit=5,
Expand All @@ -300,6 +307,31 @@ def lecture_content_retrieval() -> str:
result += lct
return result

def faq_content_retrieval() -> str:
"""
Use this tool to retrieve information from indexed FAQs.
It is suitable when no other tool fits, it is a common question or the question is frequently asked,
or the question could be effectively answered by an FAQ. Also use this if the question is explicitly
organizational and course-related. An organizational question about the course might be
"What is the course structure?" or "How do I enroll?" or exam related content like "When is the exam".
The tool performs a RAG retrieval based on the chat history to find the most relevant FAQs.
Each FAQ follows this format: FAQ ID, FAQ Question, FAQ Answer.
Respond to the query concisely and solely using the answer from the relevant FAQs.
This tool should only be used once per query.
"""
self.callback.in_progress("Retrieving faq content ...")
self.retrieved_faqs = self.faq_retriever(
chat_history=history,
student_query=query.contents[0].text_content,
result_limit=10,
course_name=dto.course.name,
course_id=dto.course.id,
base_url=dto.settings.artemis_base_url,
)

result = format_faqs(self.retrieved_faqs)
return result

if dto.user.id % 3 < 2:
iris_initial_system_prompt = tell_iris_initial_system_prompt
begin_agent_prompt = tell_begin_agent_prompt
Expand Down Expand Up @@ -398,6 +430,9 @@ def lecture_content_retrieval() -> str:
if self.should_allow_lecture_tool(dto.course.id):
tool_list.append(lecture_content_retrieval)

if should_allow_faq_tool(self.db, dto.course.id):
tool_list.append(faq_content_retrieval)

tools = generate_structured_tools_from_functions(tool_list)
# No idea why we need this extra contrary to exercise chat agent in this case, but solves the issue.
params.update({"tools": tools})
Expand All @@ -418,9 +453,19 @@ def lecture_content_retrieval() -> str:

if self.retrieved_paragraphs:
self.callback.in_progress("Augmenting response ...")
out = self.citation_pipeline(self.retrieved_paragraphs, out)
out = self.citation_pipeline(
self.retrieved_paragraphs, out, InformationType.PARAGRAPHS
)
self.tokens.extend(self.citation_pipeline.tokens)

if self.retrieved_faqs:
self.callback.in_progress("Augmenting response ...")
out = self.citation_pipeline(
self.retrieved_faqs,
out,
InformationType.FAQS,
base_url=dto.settings.artemis_base_url,
)
self.callback.done("Response created", final_result=out, tokens=self.tokens)

# try:
Expand Down
42 changes: 39 additions & 3 deletions app/pipeline/chat/exercise_chat_agent_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,11 @@
from ...llm import CapabilityRequestHandler, RequirementList
from ...llm import CompletionArguments
from ...llm.langchain import IrisLangchainChatModel
from ...retrieval.faq_retrieval import FaqRetrieval
from ...retrieval.faq_retrieval_utils import should_allow_faq_tool, format_faqs
from ...retrieval.lecture_retrieval import LectureRetrieval
from ...vector_database.database import VectorDatabase
from ...vector_database.faq_schema import FaqSchema
cremertim marked this conversation as resolved.
Show resolved Hide resolved
from ...vector_database.lecture_schema import LectureSchema
from weaviate.collections.classes.filters import Filter
from ...web.status.status_update import ExerciseChatStatusCallback
Expand Down Expand Up @@ -103,6 +106,7 @@ class ExerciseChatAgentPipeline(Pipeline):
prompt: ChatPromptTemplate
variant: str
event: str | None
retrieved_faqs: List[dict] = None

def __init__(
self,
Expand Down Expand Up @@ -136,7 +140,8 @@ def __init__(
# Create the pipelines
self.db = VectorDatabase()
self.suggestion_pipeline = InteractionSuggestionPipeline(variant="exercise")
self.retriever = LectureRetrieval(self.db.client)
self.lecture_retriever = LectureRetrieval(self.db.client)
self.faq_retriever = FaqRetrieval(self.db.client)
self.reranker_pipeline = RerankerPipeline()
self.code_feedback_pipeline = CodeFeedbackPipeline()
self.pipeline = self.llm_big | JsonOutputParser()
Expand Down Expand Up @@ -373,7 +378,7 @@ def lecture_content_retrieval() -> str:
Only use this once.
"""
self.callback.in_progress("Retrieving lecture content ...")
self.retrieved_paragraphs = self.retriever(
self.retrieved_paragraphs = self.lectureRetriever(
cremertim marked this conversation as resolved.
Show resolved Hide resolved
chat_history=chat_history,
student_query=query.contents[0].text_content,
result_limit=5,
Expand All @@ -393,6 +398,31 @@ def lecture_content_retrieval() -> str:
result += lct
return result

def faq_content_retrieval() -> str:
"""
Use this tool to retrieve information from indexed FAQs.
It is suitable when no other tool fits, it is a common question or the question is frequently asked,
or the question could be effectively answered by an FAQ. Also use this if the question is explicitly
organizational and course-related. An organizational question about the course might be
"What is the course structure?" or "How do I enroll?" or exam related content like "When is the exam".
The tool performs a RAG retrieval based on the chat history to find the most relevant FAQs.
Each FAQ follows this format: FAQ ID, FAQ Question, FAQ Answer.
Respond to the query concisely and solely using the answer from the relevant FAQs.
This tool should only be used once per query.
"""
self.callback.in_progress("Retrieving faq content ...")
self.retrieved_faqs = self.faq_retriever(
chat_history=chat_history,
student_query=query.contents[0].text_content,
result_limit=10,
course_name=dto.course.name,
course_id=dto.course.id,
base_url=dto.settings.artemis_base_url,
)

result = format_faqs(self.retrieved_faqs)
return result

iris_initial_system_prompt = tell_iris_initial_system_prompt
chat_history_exists_prompt = tell_chat_history_exists_prompt
no_chat_history_prompt = tell_no_chat_history_prompt
Expand Down Expand Up @@ -511,6 +541,10 @@ def lecture_content_retrieval() -> str:
]
if self.should_allow_lecture_tool(dto.course.id):
tool_list.append(lecture_content_retrieval)

if should_allow_faq_tool(self.db, dto.course.id):
tool_list.append(faq_content_retrieval)

tools = generate_structured_tools_from_functions(tool_list)
agent = create_tool_calling_agent(
llm=self.llm_big, tools=tools, prompt=self.prompt
Expand All @@ -533,7 +567,9 @@ def lecture_content_retrieval() -> str:
]
)

guide_response = (self.prompt | self.llm_small | StrOutputParser()).invoke(
guide_response = (
self.prompt | self.llm_small | StrOutputParser()
).invoke(
{
"response": out,
}
Expand Down
Loading