From a133bd2e41b0a9306acc1ff9e1d1b0baf3c51214 Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Sat, 27 Apr 2024 14:48:18 -0400 Subject: [PATCH 01/18] Remove launch.json --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 7303af34..63460ddb 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ hello_milvus.py temp/ *.trace examples/private +launch.json # Byte-compiled / optimized / DLL files __pycache__/ From 2f7ec70df83dffded665ef6e6c96c305fe6fa00e Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Wed, 15 May 2024 17:20:46 -0400 Subject: [PATCH 02/18] chore: Refactor SlackReceiver to handle channel events and join new channels --- examples/anthropic_bedrock.yaml | 2 +- .../inputs_outputs/slack_input.py | 105 +++++++++++++++++- 2 files changed, 103 insertions(+), 4 deletions(-) diff --git a/examples/anthropic_bedrock.yaml b/examples/anthropic_bedrock.yaml index cf76426a..ad1823dc 100644 --- a/examples/anthropic_bedrock.yaml +++ b/examples/anthropic_bedrock.yaml @@ -53,7 +53,7 @@ flows: payload_encoding: utf-8 payload_format: text - - component_name: azure_openai_llm + - component_name: llm component_module: langchain_chat_model # Anthropic Claude2 in AWS Bedrock component_config: diff --git a/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_input.py b/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_input.py index 92accbd5..b403caea 100644 --- a/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_input.py +++ b/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_input.py @@ -49,6 +49,20 @@ "default": 20, "required": False, }, + { + "name": "listen_to_channels", + "type": "boolean", + "description": "Whether to listen to channels or not. Default: False", + "default": False, + "required": False, + }, + { + "name": "send_history_on_join", + "type": "boolean", + "description": "Send history on join. Default: False", + "default": False, + "required": False, + }, ], "output_schema": { "type": "object", @@ -142,6 +156,8 @@ def init_slack_receiver(self): stop_event=self.stop_receiver_event, max_file_size=self.get_config("max_file_size"), max_total_file_size=self.get_config("max_total_file_size"), + listen_to_channels=self.get_config("listen_to_channels"), + send_history_on_join=self.get_config("send_history_on_join"), ) self.slack_receiver.start() @@ -171,6 +187,8 @@ def __init__( stop_event, max_file_size=20, max_total_file_size=20, + listen_to_channels=False, + send_history_on_join=False, ): threading.Thread.__init__(self) self.app = app @@ -180,13 +198,24 @@ def __init__( self.stop_event = stop_event self.max_file_size = max_file_size self.max_total_file_size = max_total_file_size + self.listen_to_channels = listen_to_channels + self.send_history_on_join = send_history_on_join self.register_handlers() def run(self): SocketModeHandler(self.app, self.slack_app_token).connect() self.stop_event.wait() - def handle_event(self, event): + def handle_channel_event(self, event): + # For now, just do the normal handling + channel_name = (self.get_channel_name(event.get("channel")),) + + self.handle_event(event, channel_name) + + def handle_group_event(self, event): + log.info("Received a private group event. Ignoring.") + + def handle_event(self, event, channel_name=None): files = [] total_file_size = 0 if "files" in event: @@ -228,6 +257,7 @@ def handle_event(self, event): "client_msg_id": event.get("client_msg_id"), "ts": event.get("thread_ts") or event.get("ts"), "channel": event.get("channel"), + "channel_name": channel_name or "", "subtype": event.get("subtype"), "event_ts": event.get("event_ts"), "channel_type": event.get("channel_type"), @@ -256,7 +286,7 @@ def download_file_as_base64_string(self, file_url): def get_user_email(self, user_id): response = self.app.client.users_info(user=user_id) - return response["user"]["profile"]["email"] + return response["user"]["profile"].get("email", "") def process_text_for_mentions(self, text): mention_emails = [] @@ -278,11 +308,80 @@ def process_text_for_mentions(self, text): ) return text, mention_emails + def get_channel_name(self, channel_id): + response = self.app.client.conversations_info(channel=channel_id) + return response["channel"].get("name") + + def get_channel_history(self, channel_id): + response = self.app.client.conversations_history(channel=channel_id) + + # Go through the messages and remove any that have a sub_type + messages = [] + for message in response["messages"]: + if "subtype" not in message and "text" in message: + payload = { + "text": message.get("text"), + "user_email": self.get_user_email(message.get("user")), + "mentions": [], + "type": message.get("type"), + "client_msg_id": message.get("client_msg_id"), + "ts": message.get("ts"), + "channel": channel_id, + "subtype": message.get("subtype"), + "user_id": message.get("user"), + } + messages.append(message) + + return messages + + def handle_new_channel_join(self, event): + """We have been added to a new channel. This will get all the history and send it to the input queue.""" + history = self.get_channel_history(event.get("channel")) + payload = { + "text": "New channel joined", + "user_email": "", + "mentions": [], + "type": "channel_join", + "client_msg_id": "", + "ts": "", + "channel": event.get("channel"), + "subtype": "channel_join", + "event_ts": "", + "channel_type": "channel", + "channel_name": self.get_channel_name(event.get("channel")), + "user_id": "", + "history": history, + } + user_properties = { + "type": "channel_join", + "channel": event.get("channel"), + "subtype": "channel_join", + "channel_type": "channel", + } + message = Message(payload=payload, user_properties=user_properties) + message.set_previous(payload) + self.input_queue.put(message) + def register_handlers(self): @self.app.event("message") def handle_chat_message(event): - self.handle_event(event) + print("Got message event: ", event, event.get("channel_type")) + if event.get("channel_type") == "im": + self.handle_event(event) + elif event.get("channel_type") == "channel": + self.handle_channel_event(event) + elif event.get("channel_type") == "group": + self.handle_group_event(event) @self.app.event("app_mention") def handle_app_mention(event): + print("Got app_mention event: ", event) self.handle_event(event) + + @self.app.event("member_joined_channel") + def handle_member_joined_channel(event, say, context): + if ( + self.send_history_on_join + and event.get("user") == context["bot_user_id"] + ): + self.handle_new_channel_join(event) From d852259701cfb826d9383b40a607f3f5e5a7e4dc Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Thu, 23 May 2024 14:31:25 -0400 Subject: [PATCH 03/18] * Add ability for a component to send a message directly to a named flow * Add ability to stream partial LLM responses to a named flow * Add a configurable message that is sent to the slack channel that sent a message to slack_input * Add the ability to clear the ack message and stream partial results to that ack message --- src/solace_ai_event_connector/flow/flow.py | 9 +++ .../flow_components/component_base.py | 5 ++ .../general/langchain/langchain_chat_model.py | 4 +- .../langchain/langchain_chat_model_base.py | 6 +- .../langchain_chat_model_with_history.py | 64 ++++++++++++++- .../inputs_outputs/slack_input.py | 79 ++++++++++++++++--- .../inputs_outputs/slack_output.py | 69 +++++++++++----- .../solace_ai_event_connector.py | 12 +++ 8 files changed, 210 insertions(+), 38 deletions(-) diff --git a/src/solace_ai_event_connector/flow/flow.py b/src/solace_ai_event_connector/flow/flow.py index 1663e679..66c01307 100644 --- a/src/solace_ai_event_connector/flow/flow.py +++ b/src/solace_ai_event_connector/flow/flow.py @@ -15,6 +15,7 @@ def __init__( storage_manager=None, trace_queue=None, flow_instance_index=0, + connector=None, ): self.flow_config = flow_config self.flow_index = flow_index @@ -27,6 +28,8 @@ def __init__( self.storage_manager = storage_manager self.trace_queue = trace_queue self.flow_instance_index = flow_instance_index + self.connector = connector + self.flow_input_queue = None self.threads = [] self.create_components() @@ -47,6 +50,8 @@ def create_components(self): thread = component.create_thread_and_run() self.threads.append(thread) + self.flow_input_queue = self.component_groups[0][0].get_input_queue() + def create_component_group(self, component, index): component_module = component.get("component_module", "") base_path = component.get("component_base_path", None) @@ -82,6 +87,7 @@ def create_component_group(self, component, index): instance_name=self.instance_name, storage_manager=self.storage_manager, trace_queue=self.trace_queue, + connector=self.connector, ) sibling_component = component_instance @@ -91,6 +97,9 @@ def create_component_group(self, component, index): # Add the component to the list self.component_groups.append(component_group) + def get_flow_input_queue(self): + return self.flow_input_queue + def wait_for_threads(self): for thread in self.threads: thread.join() diff --git a/src/solace_ai_event_connector/flow_components/component_base.py b/src/solace_ai_event_connector/flow_components/component_base.py index 42056df0..4d1a95d5 100644 --- a/src/solace_ai_event_connector/flow_components/component_base.py +++ b/src/solace_ai_event_connector/flow_components/component_base.py @@ -30,6 +30,7 @@ def __init__(self, **kwargs): self.instance_name = kwargs.pop("instance_name", None) self.storage_manager = kwargs.pop("storage_manager", None) self.trace_queue = kwargs.pop("trace_queue", False) + self.connector = kwargs.pop("connector", None) self.component_config = self.config.get("component_config") or {} self.name = self.config.get("component_name", "") @@ -226,6 +227,10 @@ def send_message(self, message): return self.next_component.enqueue(message) + def send_to_flow(self, flow_name, message): + if self.connector: + self.connector.send_message_to_flow(flow_name, message) + def enqueue(self, message): # Add the message to the input queue do_loop = True diff --git a/src/solace_ai_event_connector/flow_components/general/langchain/langchain_chat_model.py b/src/solace_ai_event_connector/flow_components/general/langchain/langchain_chat_model.py index 5823ddb4..1c2ed63e 100644 --- a/src/solace_ai_event_connector/flow_components/general/langchain/langchain_chat_model.py +++ b/src/solace_ai_event_connector/flow_components/general/langchain/langchain_chat_model.py @@ -13,5 +13,7 @@ class LangChainChatModel(LangChainChatModelBase): - def invoke_model(self, messages, session_id=None, clear_history=False): + def invoke_model( + self, input_message, messages, session_id=None, clear_history=False + ): return self.component.invoke(messages) diff --git a/src/solace_ai_event_connector/flow_components/general/langchain/langchain_chat_model_base.py b/src/solace_ai_event_connector/flow_components/general/langchain/langchain_chat_model_base.py index 05fa5096..c3753d2d 100644 --- a/src/solace_ai_event_connector/flow_components/general/langchain/langchain_chat_model_base.py +++ b/src/solace_ai_event_connector/flow_components/general/langchain/langchain_chat_model_base.py @@ -112,7 +112,7 @@ def invoke(self, message, data): clear_history = data.get("clear_history", False) llm_res = self.invoke_model( - messages, session_id=session_id, clear_history=clear_history + message, messages, session_id=session_id, clear_history=clear_history ) res_format = self.get_config("llm_response_format", "text") @@ -134,5 +134,7 @@ def invoke(self, message, data): return llm_res.content @abstractmethod - def invoke_model(self, messages, session_id=None, clear_history=False): + def invoke_model( + self, input_message, messages, session_id=None, clear_history=False + ): pass diff --git a/src/solace_ai_event_connector/flow_components/general/langchain/langchain_chat_model_with_history.py b/src/solace_ai_event_connector/flow_components/general/langchain/langchain_chat_model_with_history.py index f29ffc6d..cc7afa5f 100644 --- a/src/solace_ai_event_connector/flow_components/general/langchain/langchain_chat_model_with_history.py +++ b/src/solace_ai_event_connector/flow_components/general/langchain/langchain_chat_model_with_history.py @@ -1,6 +1,7 @@ """A chat model based on LangChain that includes keeping per-session history of the conversation.""" import threading +from collections import namedtuple from langchain_core.chat_history import BaseChatMessageHistory from langchain_core.runnables.history import RunnableWithMessageHistory @@ -12,6 +13,7 @@ SystemMessage, ) +from solace_ai_event_connector.common.message import Message from solace_ai_event_connector.flow_components.general.langchain.langchain_chat_model_base import ( LangChainChatModelBase, info_base, @@ -60,6 +62,23 @@ "description": "The configuration for the history class.", "type": "object", }, + { + "name": "stream_to_flow", + "required": False, + "description": "Name the flow to stream the output to - this must be configured for llm_mode='stream'.", + "default": "", + }, + { + "name": "llm_mode", + "required": False, + "description": "The mode for streaming results: 'sync' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response.", + }, + { + "name": "stream_batch_size", + "required": False, + "description": "The minimum number of words in a single streaming result. Default: 10.", + "default": 10, + }, ] ) info["input_schema"]["properties"]["session_id"] = { @@ -82,8 +101,13 @@ def __init__(self, **kwargs): super().__init__(**kwargs) self.history_max_turns = self.get_config("history_max_turns", 20) self.history_max_tokens = self.get_config("history_max_tokens", 8000) + self.stream_to_flow = self.get_config("stream_to_flow", "") + self.llm_mode = self.get_config("llm_mode", "none") + self.stream_batch_size = self.get_config("stream_batch_size", 10) - def invoke_model(self, messages, session_id=None, clear_history=False): + def invoke_model( + self, input_message, messages, session_id=None, clear_history=False + ): if clear_history: self.clear_history(session_id) @@ -117,12 +141,48 @@ def invoke_model(self, messages, session_id=None, clear_history=False): history_messages_key="chat_history", ) - return runnable.invoke( + if self.llm_mode == "none": + return runnable.invoke( + {"input": human_message}, + config={ + "configurable": {"session_id": session_id}, + }, + ) + + aggregate_result = "" + current_batch = "" + for chunk in runnable.stream( {"input": human_message}, config={ "configurable": {"session_id": session_id}, }, + ): + # print(f"Streaming chunk: {chunk.content}") + aggregate_result += chunk.content + current_batch += chunk.content + if len(current_batch.split()) >= self.stream_batch_size: + if self.stream_to_flow: + self.send_streaming_message( + input_message, current_batch, aggregate_result + ) + current_batch = "" + + if current_batch: + if self.stream_to_flow: + self.send_streaming_message( + input_message, current_batch, aggregate_result + ) + + result = namedtuple("Result", ["content"])(aggregate_result) + + return result + + def send_streaming_message(self, input_message, chunk, aggregate_result): + message = Message( + payload={"chunk": chunk, "aggregate_result": aggregate_result}, + user_properties=input_message.get_user_properties(), ) + self.send_to_flow(self.stream_to_flow, message) def create_history(self): diff --git a/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_input.py b/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_input.py index b403caea..f520042e 100644 --- a/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_input.py +++ b/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_input.py @@ -63,6 +63,12 @@ "default": False, "required": False, }, + { + "name": "acknowledgement_message", + "type": "string", + "description": "The message to send to acknowledge the user's message has been received.", + "required": False, + }, ], "output_schema": { "type": "object", @@ -158,6 +164,7 @@ def init_slack_receiver(self): max_total_file_size=self.get_config("max_total_file_size"), listen_to_channels=self.get_config("listen_to_channels"), send_history_on_join=self.get_config("send_history_on_join"), + acknowledgement_message=self.get_config("acknowledgement_message"), ) self.slack_receiver.start() @@ -189,6 +196,7 @@ def __init__( max_total_file_size=20, listen_to_channels=False, send_history_on_join=False, + acknowledgement_message=None, ): threading.Thread.__init__(self) self.app = app @@ -200,6 +208,7 @@ def __init__( self.max_total_file_size = max_total_file_size self.listen_to_channels = listen_to_channels self.send_history_on_join = send_history_on_join + self.acknowledgement_message = acknowledgement_message self.register_handlers() def run(self): @@ -208,14 +217,15 @@ def run(self): def handle_channel_event(self, event): # For now, just do the normal handling - channel_name = (self.get_channel_name(event.get("channel")),) + channel_name = self.get_channel_name(event.get("channel")) + event["channel_name"] = channel_name - self.handle_event(event, channel_name) + self.handle_event(event) def handle_group_event(self, event): log.info("Received a private group event. Ignoring.") - def handle_event(self, event, channel_name=None): + def handle_event(self, event): files = [] total_file_size = 0 if "files" in event: @@ -246,18 +256,30 @@ def handle_event(self, event, channel_name=None): } ) + team_domain = None + try: + permalink = self.app.client.chat_getPermalink( + channel=event["channel"], message_ts=event["event_ts"] + ) + team_domain = permalink.get("permalink", "").split("//")[1] + team_domain = team_domain.split(".")[0] + except Exception as e: + log.error("Error getting team domain: %s", e) + user_email = self.get_user_email(event["user"]) (text, mention_emails) = self.process_text_for_mentions(event["text"]) payload = { "text": text, "files": files, "user_email": user_email, + "team_id": event.get("team"), + "team_domain": team_domain, "mentions": mention_emails, "type": event.get("type"), "client_msg_id": event.get("client_msg_id"), - "ts": event.get("thread_ts") or event.get("ts"), + "ts": event.get("thread_ts"), "channel": event.get("channel"), - "channel_name": channel_name or "", + "channel_name": event.get("channel_name", ""), "subtype": event.get("subtype"), "event_ts": event.get("event_ts"), "channel_type": event.get("channel_type"), @@ -265,15 +287,25 @@ def handle_event(self, event, channel_name=None): } user_properties = { "user_email": user_email, + "team_id": event.get("team"), "type": event.get("type"), "client_msg_id": event.get("client_msg_id"), - "ts": event.get("thread_ts") or event.get("ts"), + "ts": event.get("thread_ts"), "channel": event.get("channel"), "subtype": event.get("subtype"), "event_ts": event.get("event_ts"), "channel_type": event.get("channel_type"), "user_id": event.get("user"), } + + if self.acknowledgement_message: + ack_msg_ts = self.app.client.chat_postMessage( + channel=event["channel"], + text=self.acknowledgement_message, + thread_ts=event.get("thread_ts"), + ).get("ts") + user_properties["ack_msg_ts"] = ack_msg_ts + message = Message(payload=payload, user_properties=user_properties) message.set_previous(payload) self.input_queue.put(message) @@ -286,7 +318,7 @@ def download_file_as_base64_string(self, file_url): def get_user_email(self, user_id): response = self.app.client.users_info(user=user_id) - return response["user"]["profile"].get("email", "") + return response["user"]["profile"].get("email", user_id) def process_text_for_mentions(self, text): mention_emails = [] @@ -312,31 +344,52 @@ def get_channel_name(self, channel_id): response = self.app.client.conversations_info(channel=channel_id) return response["channel"].get("name") - def get_channel_history(self, channel_id): + def get_channel_history(self, channel_id, team_id): response = self.app.client.conversations_history(channel=channel_id) + # First search through messages to get all their replies + messages_to_add = [] + for message in response["messages"]: + if "subtype" not in message and "text" in message: + if "reply_count" in message: + # Get the replies + replies = self.app.client.conversations_replies( + channel=channel_id, ts=message.get("ts") + ) + messages_to_add.extend(replies["messages"]) + + response["messages"].extend(messages_to_add) + # Go through the messages and remove any that have a sub_type messages = [] + emails = {} for message in response["messages"]: if "subtype" not in message and "text" in message: + if message.get("user") not in emails: + emails[message.get("user")] = self.get_user_email( + message.get("user") + ) payload = { "text": message.get("text"), - "user_email": self.get_user_email(message.get("user")), + "team_id": team_id, + "user_email": emails[message.get("user")], "mentions": [], "type": message.get("type"), - "client_msg_id": message.get("client_msg_id"), + "client_msg_id": message.get("client_msg_id") or message.get("ts"), "ts": message.get("ts"), + "event_ts": message.get("event_ts") or message.get("ts"), "channel": channel_id, "subtype": message.get("subtype"), "user_id": message.get("user"), + "message_id": message.get("client_msg_id"), } - messages.append(message) + messages.append(payload) return messages def handle_new_channel_join(self, event): """We have been added to a new channel. This will get all the history and send it to the input queue.""" - history = self.get_channel_history(event.get("channel")) + history = self.get_channel_history(event.get("channel"), event.get("team")) payload = { "text": "New channel joined", "user_email": "", @@ -376,6 +429,8 @@ def handle_chat_message(event): @self.app.event("app_mention") def handle_app_mention(event): print("Got app_mention event: ", event) + event["channel_type"] = "im" + event["channel_name"] = self.get_channel_name(event.get("channel")) self.handle_event(event) @self.app.event("member_joined_channel") diff --git a/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_output.py b/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_output.py index f4c70d0d..f9442392 100644 --- a/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_output.py +++ b/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_output.py @@ -115,39 +115,66 @@ def invoke(self, message, data): message_info = data.get("message_info") content = data.get("content") text = content.get("text") + stream = content.get("stream") channel = message_info.get("channel") thread_ts = message_info.get("ts") + ack_msg_ts = message_info.get("ack_msg_ts") + return { "channel": channel, "text": text, "files": content.get("files"), "thread_ts": thread_ts, + "ack_msg_ts": ack_msg_ts, + "stream": stream, } def send_message(self, message): - channel = message.get_data("previous:channel") - messages = message.get_data("previous:text") - files = message.get_data("previous:files") or [] - thread_ts = message.get_data("previous:ts") + try: + channel = message.get_data("previous:channel") + messages = message.get_data("previous:text") + stream = message.get_data("previous:stream") + files = message.get_data("previous:files") or [] + thread_ts = message.get_data("previous:ts") + ack_msg_ts = message.get_data("previous:ack_msg_ts") - if not isinstance(messages, list): - if messages is not None: - messages = [messages] - else: - messages = [] + if not isinstance(messages, list): + if messages is not None: + messages = [messages] + else: + messages = [] - for text in messages: - self.app.client.chat_postMessage( - channel=channel, text=text, thread_ts=thread_ts - ) + for text in messages: + if stream: + if ack_msg_ts: + try: + self.app.client.chat_update( + channel=channel, ts=ack_msg_ts, text=text + ) + except Exception: + # It is normal to possibly get an update after the final message has already + # arrived and deleted the ack message + pass + else: + self.app.client.chat_postMessage( + channel=channel, text=text, thread_ts=thread_ts + ) - for file in files: - file_content = base64.b64decode(file["content"]) - self.app.client.files_upload_v2( - channel=channel, - file=file_content, - thread_ts=thread_ts, - filename=file["name"], - ) + for file in files: + file_content = base64.b64decode(file["content"]) + self.app.client.files_upload_v2( + channel=channel, + file=file_content, + thread_ts=thread_ts, + filename=file["name"], + ) + except Exception as e: + log.error(f"Error sending slack message: {e}") super().send_message(message) + + try: + if ack_msg_ts and not stream: + self.app.client.chat_delete(channel=channel, ts=ack_msg_ts) + except Exception: + pass diff --git a/src/solace_ai_event_connector/solace_ai_event_connector.py b/src/solace_ai_event_connector/solace_ai_event_connector.py index 815b0974..00e2210b 100644 --- a/src/solace_ai_event_connector/solace_ai_event_connector.py +++ b/src/solace_ai_event_connector/solace_ai_event_connector.py @@ -18,6 +18,7 @@ def __init__(self, config, event_handlers=None, error_queue=None): self.flows = [] self.trace_queue = None self.trace_thread = None + self.flow_input_queues = {} self.stop_signal = threading.Event() self.event_handlers = event_handlers or {} self.error_queue = error_queue if error_queue else queue.Queue() @@ -49,6 +50,8 @@ def create_flows(self): num_instances = 1 for i in range(num_instances): flow_instance = self.create_flow(flow, index, i) + flow_input_queue = flow_instance.get_flow_input_queue() + self.flow_input_queues[flow.get("name")] = flow_input_queue self.flows.append(flow_instance) def create_flow(self, flow: dict, index: int, flow_instance_index: int): @@ -63,8 +66,17 @@ def create_flow(self, flow: dict, index: int, flow_instance_index: int): instance_name=self.instance_name, storage_manager=self.storage_manager, trace_queue=self.trace_queue, + connector=self, ) + def send_message_to_flow(self, flow_name, message): + """Send a message to a flow""" + flow_input_queue = self.flow_input_queues.get(flow_name) + if flow_input_queue: + flow_input_queue.put(message) + else: + log.error("Can't send message to flow %s. Not found", flow_name) + def wait_for_flows(self): """Wait for the flows to finish""" while True: From dd5b7f9ef884f8e5eb3d06ca5e30c748a46dcf3c Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Mon, 27 May 2024 10:10:10 -0400 Subject: [PATCH 04/18] feat: Update default stream_batch_size to 15 in LangChainChatModelWithHistory - also started on converting absolute paths for imports to relative ones --- fix_abs_paths.py | 29 +++++++++++++++++++ .../langchain_chat_model_with_history.py | 6 ++-- .../inputs_outputs/broker_base.py | 8 ++--- .../inputs_outputs/broker_input.py | 8 ++--- .../inputs_outputs/slack_base.py | 2 +- .../inputs_outputs/slack_input.py | 8 ++--- .../inputs_outputs/slack_output.py | 8 ++--- 7 files changed, 44 insertions(+), 25 deletions(-) create mode 100644 fix_abs_paths.py diff --git a/fix_abs_paths.py b/fix_abs_paths.py new file mode 100644 index 00000000..c9871ce6 --- /dev/null +++ b/fix_abs_paths.py @@ -0,0 +1,29 @@ +import os + + +def convert_imports(root_dir): + for dirpath, _, filenames in os.walk(root_dir): + for filename in filenames: + if filename.endswith(".py"): + file_path = os.path.join(dirpath, filename) + with open(file_path, "r") as file: + lines = file.readlines() + + with open(file_path, "w") as file: + for line in lines: + if line.startswith("from solace_ai_event_connector"): + # Calculate the relative import path + relative_path = os.path.relpath(dirpath, root_dir) + depth = len(relative_path.split(os.sep)) + relative_import = ( + "." * depth + + line[len("from solace_ai_event_connector") :] + ) + file.write(f"from {relative_import}") + else: + file.write(line) + + +if __name__ == "__main__": + root_directory = "solace_ai_event_connector" + convert_imports(root_directory) diff --git a/src/solace_ai_event_connector/flow_components/general/langchain/langchain_chat_model_with_history.py b/src/solace_ai_event_connector/flow_components/general/langchain/langchain_chat_model_with_history.py index cc7afa5f..9a677fea 100644 --- a/src/solace_ai_event_connector/flow_components/general/langchain/langchain_chat_model_with_history.py +++ b/src/solace_ai_event_connector/flow_components/general/langchain/langchain_chat_model_with_history.py @@ -76,8 +76,8 @@ { "name": "stream_batch_size", "required": False, - "description": "The minimum number of words in a single streaming result. Default: 10.", - "default": 10, + "description": "The minimum number of words in a single streaming result. Default: 15.", + "default": 15, }, ] ) @@ -103,7 +103,7 @@ def __init__(self, **kwargs): self.history_max_tokens = self.get_config("history_max_tokens", 8000) self.stream_to_flow = self.get_config("stream_to_flow", "") self.llm_mode = self.get_config("llm_mode", "none") - self.stream_batch_size = self.get_config("stream_batch_size", 10) + self.stream_batch_size = self.get_config("stream_batch_size", 15) def invoke_model( self, input_message, messages, session_id=None, clear_history=False diff --git a/src/solace_ai_event_connector/flow_components/inputs_outputs/broker_base.py b/src/solace_ai_event_connector/flow_components/inputs_outputs/broker_base.py index 7904361f..9ee67eb5 100644 --- a/src/solace_ai_event_connector/flow_components/inputs_outputs/broker_base.py +++ b/src/solace_ai_event_connector/flow_components/inputs_outputs/broker_base.py @@ -3,11 +3,9 @@ from abc import abstractmethod # from solace_ai_event_connector.common.log import log -from solace_ai_event_connector.common.message import Message -from solace_ai_event_connector.flow_components.component_base import ComponentBase -from solace_ai_event_connector.common.messaging.messaging_builder import ( - MessagingServiceBuilder, -) +from ..component_base import ComponentBase +from ...common.message import Message +from ...common.messaging.messaging_builder import MessagingServiceBuilder # TBD - at the moment, there is no connection sharing supported. It should be possible # to share a connection between multiple components and even flows. The changes diff --git a/src/solace_ai_event_connector/flow_components/inputs_outputs/broker_input.py b/src/solace_ai_event_connector/flow_components/inputs_outputs/broker_input.py index 158e6996..17a04e86 100644 --- a/src/solace_ai_event_connector/flow_components/inputs_outputs/broker_input.py +++ b/src/solace_ai_event_connector/flow_components/inputs_outputs/broker_input.py @@ -5,11 +5,9 @@ import json import yaml # pylint: disable=import-error -from solace_ai_event_connector.common.log import log -from solace_ai_event_connector.flow_components.inputs_outputs.broker_base import ( - BrokerBase, -) -from solace_ai_event_connector.common.message import Message +from ...common.log import log +from .broker_base import BrokerBase +from ...common.message import Message info = { "class_name": "BrokerInput", diff --git a/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_base.py b/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_base.py index cbf25047..4f4f68e7 100644 --- a/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_base.py +++ b/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_base.py @@ -2,7 +2,7 @@ from abc import ABC, abstractmethod from slack_bolt import App # pylint: disable=import-error -from solace_ai_event_connector.flow_components.component_base import ComponentBase +from ..component_base import ComponentBase class SlackBase(ComponentBase, ABC): diff --git a/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_input.py b/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_input.py index f520042e..98a8054a 100644 --- a/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_input.py +++ b/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_input.py @@ -5,11 +5,9 @@ from slack_bolt.adapter.socket_mode import SocketModeHandler -from solace_ai_event_connector.flow_components.inputs_outputs.slack_base import ( - SlackBase, -) -from solace_ai_event_connector.common.message import Message -from solace_ai_event_connector.common.log import log +from .slack_base import SlackBase +from ...common.message import Message +from ...common.log import log info = { diff --git a/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_output.py b/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_output.py index f9442392..ce12cee3 100644 --- a/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_output.py +++ b/src/solace_ai_event_connector/flow_components/inputs_outputs/slack_output.py @@ -1,12 +1,8 @@ import base64 -from copy import deepcopy -from solace_ai_event_connector.flow_components.inputs_outputs.slack_base import ( - SlackBase, -) -from solace_ai_event_connector.common.message import Message -from solace_ai_event_connector.common.log import log +from .slack_base import SlackBase +from ...common.log import log info = { From 261d095b5b69157183f56989f079ee113c55f04a Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Wed, 5 Jun 2024 09:16:04 -0400 Subject: [PATCH 05/18] Update import statement in main.py The import statement in `main.py` was updated to remove the relative path and use an absolute import for `SolaceAiConnector` from `solace_ai_connector.solace_ai_connector`. This change improves the code organization and ensures proper module resolution. --- src/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.py b/src/main.py index e6f11adb..e58d2f2d 100644 --- a/src/main.py +++ b/src/main.py @@ -1,7 +1,7 @@ import os import sys import yaml -from .solace_ai_connector.solace_ai_connector import SolaceAiConnector +from solace_ai_connector.solace_ai_connector import SolaceAiConnector def load_config(file): From 002e9b98ae5536f89be493c8d45873538103481e Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Thu, 6 Jun 2024 14:23:42 -0400 Subject: [PATCH 06/18] Another major reorganization of directory structure to make it more sensible. This is part of a cleanup before releasing to Pypi --- fix_abs_paths.py | 51 - prompts.txt | 1179 ----------------- prompts.yaml | 621 --------- pyproject.toml | 11 +- requirements.txt | 90 +- requirements.txt.old | 86 ++ src/__init__.py | 1 - src/solace_ai_connector/__init__.py | 3 + src/solace_ai_connector/common/utils.py | 8 +- .../components/__init__.py | 69 + .../component_base.py | 6 +- .../general}/__init__.py | 0 .../general/aggregate.py | 2 +- .../general/delay.py | 3 + .../general/for_testing}/__init__.py | 0 .../general/for_testing/fail.py | 3 + .../general/for_testing/give_ack_output.py | 3 + .../general/for_testing/need_ack_input.py | 2 +- .../general/for_testing/storage_tester.py | 3 + .../general/iterate.py | 3 + .../general/langchain}/__init__.py | 0 .../general/langchain/langchain_base.py | 4 +- .../general/langchain/langchain_chat_model.py | 0 .../langchain/langchain_chat_model_base.py | 1 - .../langchain_chat_model_with_history.py | 2 +- .../general/langchain/langchain_embeddings.py | 4 +- .../langchain_vector_store_delete.py | 7 +- .../langchain_vector_store_embedding_base.py | 6 +- .../langchain_vector_store_embedding_index.py | 7 +- ...langchain_vector_store_embedding_search.py | 7 +- .../general/message_filter.py | 2 +- .../general/pass_through.py | 3 + .../general/user_processor.py | 3 + .../inputs_outputs}/__init__.py | 0 .../inputs_outputs/broker_base.py | 4 +- .../inputs_outputs/broker_input.py | 2 +- .../inputs_outputs/broker_output.py | 2 +- .../inputs_outputs/error_input.py | 2 +- .../inputs_outputs/file_input.py | 5 + .../inputs_outputs/slack_base.py | 4 +- .../inputs_outputs/slack_input.py | 6 +- .../inputs_outputs/slack_output.py | 6 +- .../inputs_outputs/stdin_input.py | 3 + .../inputs_outputs/stdout_output.py | 2 + .../inputs_outputs/timer_input.py | 4 +- src/solace_ai_connector/flow/flow.py | 2 +- .../general/langchain/__init__.py | 0 .../inputs_outputs/__init__.py | 0 src/{ => solace_ai_connector}/main.py | 17 +- .../transforms/__init__.py | 6 + .../{common => }/transforms/append.py | 2 +- .../{common => }/transforms/copy.py | 0 .../{common => }/transforms/copy_list_item.py | 6 +- .../{common => }/transforms/filter.py | 0 .../{common => }/transforms/map.py | 0 .../{common => }/transforms/reduce.py | 0 .../{common => }/transforms/transform_base.py | 2 +- .../{common => }/transforms/transforms.py | 0 58 files changed, 270 insertions(+), 1995 deletions(-) delete mode 100644 fix_abs_paths.py delete mode 100644 prompts.txt delete mode 100644 prompts.yaml create mode 100644 requirements.txt.old create mode 100644 src/solace_ai_connector/components/__init__.py rename src/solace_ai_connector/{flow_components => components}/component_base.py (98%) rename src/solace_ai_connector/{common/transforms => components/general}/__init__.py (100%) rename src/solace_ai_connector/{flow_components => components}/general/aggregate.py (99%) rename src/solace_ai_connector/{flow_components => components}/general/delay.py (93%) rename src/solace_ai_connector/{flow_components => components/general/for_testing}/__init__.py (100%) rename src/solace_ai_connector/{flow_components => components}/general/for_testing/fail.py (94%) rename src/solace_ai_connector/{flow_components => components}/general/for_testing/give_ack_output.py (88%) rename src/solace_ai_connector/{flow_components => components}/general/for_testing/need_ack_input.py (97%) rename src/solace_ai_connector/{flow_components => components}/general/for_testing/storage_tester.py (94%) rename src/solace_ai_connector/{flow_components => components}/general/iterate.py (95%) rename src/solace_ai_connector/{flow_components/general => components/general/langchain}/__init__.py (100%) rename src/solace_ai_connector/{flow_components => components}/general/langchain/langchain_base.py (94%) rename src/solace_ai_connector/{flow_components => components}/general/langchain/langchain_chat_model.py (100%) rename src/solace_ai_connector/{flow_components => components}/general/langchain/langchain_chat_model_base.py (99%) rename src/solace_ai_connector/{flow_components => components}/general/langchain/langchain_chat_model_with_history.py (99%) rename src/solace_ai_connector/{flow_components => components}/general/langchain/langchain_embeddings.py (97%) rename src/solace_ai_connector/{flow_components => components}/general/langchain/langchain_vector_store_delete.py (97%) rename src/solace_ai_connector/{flow_components => components}/general/langchain/langchain_vector_store_embedding_base.py (94%) rename src/solace_ai_connector/{flow_components => components}/general/langchain/langchain_vector_store_embedding_index.py (97%) rename src/solace_ai_connector/{flow_components => components}/general/langchain/langchain_vector_store_embedding_search.py (97%) rename src/solace_ai_connector/{flow_components => components}/general/message_filter.py (97%) rename src/solace_ai_connector/{flow_components => components}/general/pass_through.py (87%) rename src/solace_ai_connector/{flow_components => components}/general/user_processor.py (94%) rename src/solace_ai_connector/{flow_components/general/for_testing => components/inputs_outputs}/__init__.py (100%) rename src/solace_ai_connector/{flow_components => components}/inputs_outputs/broker_base.py (97%) rename src/solace_ai_connector/{flow_components => components}/inputs_outputs/broker_input.py (99%) rename src/solace_ai_connector/{flow_components => components}/inputs_outputs/broker_output.py (99%) rename src/solace_ai_connector/{flow_components => components}/inputs_outputs/error_input.py (99%) rename src/solace_ai_connector/{flow_components => components}/inputs_outputs/file_input.py (83%) rename src/solace_ai_connector/{flow_components => components}/inputs_outputs/slack_base.py (92%) rename src/solace_ai_connector/{flow_components => components}/inputs_outputs/slack_input.py (99%) rename src/solace_ai_connector/{flow_components => components}/inputs_outputs/slack_output.py (97%) rename src/solace_ai_connector/{flow_components => components}/inputs_outputs/stdin_input.py (93%) rename src/solace_ai_connector/{flow_components => components}/inputs_outputs/stdout_output.py (88%) rename src/solace_ai_connector/{flow_components => components}/inputs_outputs/timer_input.py (97%) delete mode 100644 src/solace_ai_connector/flow_components/general/langchain/__init__.py delete mode 100644 src/solace_ai_connector/flow_components/inputs_outputs/__init__.py rename src/{ => solace_ai_connector}/main.py (82%) create mode 100644 src/solace_ai_connector/transforms/__init__.py rename src/solace_ai_connector/{common => }/transforms/append.py (98%) rename src/solace_ai_connector/{common => }/transforms/copy.py (100%) rename src/solace_ai_connector/{common => }/transforms/copy_list_item.py (96%) rename src/solace_ai_connector/{common => }/transforms/filter.py (100%) rename src/solace_ai_connector/{common => }/transforms/map.py (100%) rename src/solace_ai_connector/{common => }/transforms/reduce.py (100%) rename src/solace_ai_connector/{common => }/transforms/transform_base.py (97%) rename src/solace_ai_connector/{common => }/transforms/transforms.py (100%) diff --git a/fix_abs_paths.py b/fix_abs_paths.py deleted file mode 100644 index 3df4ba63..00000000 --- a/fix_abs_paths.py +++ /dev/null @@ -1,51 +0,0 @@ -import os - - -def convert_imports(root_dir): - for dirpath, _, filenames in os.walk(root_dir): - for filename in filenames: - if filename.endswith(".py"): - file_path = os.path.join(dirpath, filename) - with open(file_path, "r") as file: - lines = file.readlines() - - with open(file_path, "w") as file: - for line in lines: - if line.startswith("from solace_ai_connector"): - # Calculate the relative import path - import_path = line[ - len("from solace_ai_connector.") : - ].strip() - the_rest = import_path.split(" ", 1)[1] - import_path = import_path.split(" ")[0] - file_relative_path = os.path.relpath(dirpath, root_dir) - - import_parts = import_path.split(".") - file_parts = file_relative_path.split(os.sep) - depth = len(file_parts) - if depth == 1 and file_parts[0] == ".": - depth = 0 - while True: - if ( - len(file_parts) - and len(import_parts) - and import_parts[0] == file_parts[0] - ): - import_parts.pop(0) - file_parts.pop(0) - depth -= 1 - else: - break - - relative_import = "." * (depth + 1) + ".".join(import_parts) - - file.write(f"from {relative_import} {the_rest}\n") - else: - file.write(line) - - -if __name__ == "__main__": - # Change directory to src - os.chdir("src") - root_directory = "solace_ai_connector" - convert_imports(root_directory) diff --git a/prompts.txt b/prompts.txt deleted file mode 100644 index 27f876f8..00000000 --- a/prompts.txt +++ /dev/null @@ -1,1179 +0,0 @@ -You are an assistant who will help users create a new configuration for the Solace AI Event Connector. The connector is a tool that allows users to create flows that process messages from a Solace event broker, generally to help interface with AI based services. A typical flow will start with a message from the broker, pass through a series of components and transforms, and then send the message back to the broker. The components and transforms are user-configurable and can be used to manipulate the message in various ways. The user will have to provide the message input_schema, queue, or topic, and the desired output_schema and topic. Your job is to to create an initial configuration for the user. -Make sure you use ${ENV_VARS} for any sensitive information. -Your interaction with the user will via a chat interface. Before you generate the YAML configuration, you will have to ask the user for the input_schema, queue, or topic, and the desired output_schema and topic. -You can ask as many questions as you need to get the information you need. Try to make the conversation flow naturally and confirm the user's input if there is any ambiguity - for example, if they input the schema in a mixed JSON/YAML/pseudo structure, print it back out for them in a clean YAML format and get confirmation that it is correct -Here is a structure that defines all the built-in components and transforms. - -component: -- class_name: ErrorInput - config_parameters: - - default: null - description: Maximum rate of errors to process per second. Any errors above this - rate will be dropped. If not set, all errors will be processed. - name: max_rate - required: false - description: 'Receive processing errors from the Solace AI Event Connector. Note - that the component_input configuration is ignored. This component should be used - to create a flow that handles errors from other flows. ' - output_schema: - properties: - error: - description: Information about the error - properties: - exception: - description: The exception message - type: string - message: - description: The error message - type: string - required: - - message - - exception - type: object - location: - description: The location where the error occurred - properties: - component: - description: The component name that generated the error - type: string - flow: - description: The flow name of the component that generated the error - type: string - instance: - description: The instance number of the component that generated the error - type: integer - required: - - flow - - component - type: object - message: - description: The message that caused the error - properties: - payload: - description: The payload of the message - type: string - previous: - description: The output from the previous stage that was processed before - the error - type: object - topic: - description: The topic of the message - type: string - user_data: - description: The user data of the message that was created during the - flow - type: object - user_properties: - description: The user properties of the message - type: object - required: [] - type: object - required: - - error - - message - - location - type: object -- class_name: BrokerInput - config_parameters: - - description: Type of broker (Solace, MQTT, etc.) - name: broker_type - required: true - - description: Broker URL (e.g. tcp://localhost:55555) - name: broker_url - required: true - - description: Client username for broker - name: broker_username - required: true - - description: Client password for broker - name: broker_password - required: true - - description: Client VPN for broker - name: broker_vpn - required: true - - description: Queue name for broker - name: broker_queue_name - required: true - - description: Subscriptions for broker - name: broker_subscriptions - required: true - - default: utf-8 - description: Encoding for the payload (utf-8, base64, gzip, none) - name: payload_encoding - required: false - - default: json - description: Format for the payload (json, yaml, text) - name: payload_format - required: false - description: Connect to a messaging broker and receive messages from it. The component - will output the payload, topic, and user properties of the message. - output_schema: - properties: - payload: - type: string - topic: - type: string - user_properties: - type: object - required: - - payload - - topic - - user_properties - type: object -- class_name: BrokerOutput - config_parameters: - - description: Type of broker (Solace, MQTT, etc.) - name: broker_type - required: true - - description: Broker URL (e.g. tcp://localhost:55555) - name: broker_url - required: true - - description: Client username for broker - name: broker_username - required: true - - description: Client password for broker - name: broker_password - required: true - - description: Client VPN for broker - name: broker_vpn - required: true - - default: utf-8 - description: Encoding for the payload (utf-8, base64, gzip, none) - name: payload_encoding - required: false - - default: json - description: Format for the payload (json, yaml, text) - name: payload_format - required: false - - default: true - description: Propagate acknowledgements from the broker to the previous components - name: propagate_acknowledgements - required: false - description: Connect to a messaging broker and send messages to it. Note that this - component requires that the data is transformed into the input schema. - input_schema: - properties: - payload: - description: Payload of the message sent to the broker - type: any - topic: - description: Topic to send the message to - type: string - user_properties: - description: User properties to send with the message - type: object - required: - - payload - - topic - type: object -- class_name: Stdout - config_parameters: [] - description: STDOUT output component - input_schema: - properties: - text: - type: string - required: - - text - type: object -- class_name: Stdin - config_parameters: [] - description: STDIN input component. The component will prompt for input, which will - then be placed in the message payload using the output schema below. - output_schema: - properties: - text: - type: string - required: - - text - type: object -- class_name: SlackInput - config_parameters: - - description: The Slack bot token to connect to Slack. - name: slack_bot_token - type: string - - description: The Slack app token to connect to Slack. - name: slack_app_token - type: string - - default: 20 - description: 'The maximum file size to download from Slack in MB. Default: 20MB' - name: max_file_size - required: false - type: number - - default: 20 - description: 'The maximum total file size to download from Slack in MB. Default: - 20MB' - name: max_total_file_size - required: false - type: number - description: Slack input component. The component connects to Slack using the Bolt - API and receives messages from Slack channels. - output_schema: - properties: - event: - properties: - channel: - type: string - channel_type: - type: string - client_msg_id: - type: string - event_ts: - type: string - files: - items: - properties: - content: - type: string - filetype: - type: string - mime_type: - type: string - name: - type: string - size: - type: number - type: object - type: array - mentions: - items: - type: string - type: array - subtype: - type: string - text: - type: string - ts: - type: string - type: - type: string - user_email: - type: string - user_id: - type: string - type: object - required: - - event - type: object -- class_name: UserProcessor - config_parameters: [] - description: 'A component that allows the processing stage to be defined in the - configuration file using ''invoke'' statements. The configuration must be specified - with the ''component_processing:'' property alongside the ''component_module:'' - property in the component''s configuration. The input and output schemas are free-form. - The user-defined processing must line up with the input ' - input_schema: - properties: {} - type: object - output_schema: - properties: {} - type: object - short_description: A component that allows the processing stage to be defined in - the configuration file. -- class_name: Aggregate - config_parameters: - - default: 10 - description: Number of input messages to aggregate before sending an output message - name: max_items - required: false - type: integer - - default: 1000 - description: Number of milliseconds to wait before sending an output message - name: max_time_ms - required: false - type: integer - description: Take multiple messages and aggregate them into one. The output of this - component is a list of the exact structure of the input data. - input_schema: - description: The input message to be aggregated - properties: {} - type: object - output_schema: - description: The aggregated messages - items: - type: object - type: array - short_description: Aggregate messages into one message. -- class_name: PassThrough - config_parameters: [] - description: What goes in comes out - input_schema: - properties: {} - type: object - output_schema: - properties: {} - type: object -- class_name: Delay - config_parameters: - - default: 1 - description: The delay in seconds - name: delay - type: number - description: 'A simple component that simply passes the input to the output, but - with a configurable delay. Note that it will not service the next input until - the delay has passed. If this component has num_instances > 1, each instance will - run in parallel. ' - input_schema: - properties: {} - type: object - output_schema: - properties: {} - type: object - short_description: A simple component that simply passes the input to the output, - but with a configurable delay. -- class_name: Iterate - config_parameters: [] - description: Take a single message that is a list and output each item in that list - as a separate message - input_schema: - items: - type: object - type: array - output_schema: - properties: {} - type: object -- class_name: MessageFilter - config_parameters: - - description: A dynmaic invoke configuration that will return true if message should - be passed or false to drop it - name: filter_expression - required: true - description: A filtering component. This will apply a user configurable expression. - If the expression evaluates to True, the message will be passed on. If the expression - evaluates to False, the message will be discarded. If the message is discarded, - any previous components that require an acknowledgement will be acknowledged. - input_schema: - properties: {} - type: object - output_schema: - properties: {} - type: object -- class_name: LangChainEmbeddings - config_parameters: - - description: The chat model module - e.g. 'langchain_openai.chat_models' - name: langchain_module - required: true - type: string - - description: The chat model class to use - e.g. ChatOpenAI - name: langchain_class - required: true - type: string - - description: Model specific configuration for the chat model. See documentation - for valid parameter names. - name: langchain_component_config - required: true - type: object - description: Provide access to all the LangChain Text Embeddings components via - configuration - input_schema: - properties: - text: - description: The text to embed - type: string - type: - description: 'The type of embedding to use: ''document'' or ''query'' - default - is ''document''' - type: string - required: - - text - type: object - output_schema: - properties: - embedding: - description: A list of floating point numbers representing the embedding. - Its length is the size of vector that the embedding model produces - items: - type: float - type: array - required: - - embedding - type: object - short_description: Provide access to all the LangChain Text Embeddings components - via configuration -- class_name: LangChainVectorStoreDelete - config_parameters: - - description: The vector store library path - e.g. 'langchain_community.vectorstores' - name: vector_store_component_path - required: true - - description: The vector store to use - e.g. 'Pinecone' - name: vector_store_component_name - required: true - - description: Model specific configuration for the vector store. See LangChain - documentation for valid parameter names for this specific component (e.g. https://python.langchain.com/docs/integrations/vectorstores/pinecone). - name: vector_store_component_config - required: true - - description: The name of the index to use - name: vector_store_index_name - required: false - - description: The embedding library path - e.g. 'langchain_community.embeddings' - name: embedding_component_path - required: true - - description: The embedding model to use - e.g. BedrockEmbeddings - name: embedding_component_name - required: true - - description: Model specific configuration for the embedding model. See documentation - for valid parameter names. - name: embedding_component_config - required: true - - allow_source_expression: true - description: List of ids to delete from the vector store. - name: delete_ids - required: false - - allow_source_expression: true - description: Keyword arguments to pass to the delete method of the vector store.See - documentation for valid parameter names. - name: delete_kwargs - required: true - description: This component allows for entries in a LangChain Vector Store to be - deleted. This is needed for the continued maintenance of the vector store. Due - to the nature of langchain vector stores, you need to specify an embedding component - even though it is not used in this component. - input_schema: - properties: - metadata: - description: 'Metadata to associate with the text in the vector store. ' - type: object - text: - description: The text to embed - type: string - required: - - text - type: object - output_schema: - properties: {} - type: object -- class_name: LangChainChatModel - config_parameters: - - description: The chat model module - e.g. 'langchain_openai.chat_models' - name: langchain_module - required: true - - description: The chat model class to use - e.g. ChatOpenAI - name: langchain_class - required: true - - description: Model specific configuration for the chat model. See documentation - for valid parameter names. - name: langchain_component_config - required: true - - description: The response format for this LLM request. This can be 'json', 'yaml', - or 'text'. If set to 'json' or 'yaml', the response will be parsed by the appropriate - parser and the fields will be available in the response object. If set to 'text', - the response will be returned as a string. - name: llm_response_format - required: false - description: Provide access to all the LangChain chat models via configuration - input_schema: - properties: - messages: - items: - properties: - content: - description: The content of the LLM message - type: string - role: - description: The role of the LLM message (user, assistant, system) - type: string - required: - - content - type: object - type: array - required: - - messages - type: object - output_schema: - description: The result of the chat model invocation. If a format is specified, - then the result text will be parsed and the fields will be available in the - response object. - properties: - result: - type: string - required: - - result - type: object -- class_name: LangChainVectorStoreEmbeddingsIndex - config_parameters: - - description: The vector store library path - e.g. 'langchain_community.vectorstores' - name: vector_store_component_path - required: true - - description: The vector store to use - e.g. 'Pinecone' - name: vector_store_component_name - required: true - - description: Model specific configuration for the vector store. See LangChain - documentation for valid parameter names for this specific component (e.g. https://python.langchain.com/docs/integrations/vectorstores/pinecone). - name: vector_store_component_config - required: true - - description: The name of the index to use - name: vector_store_index_name - required: false - - description: The embedding library path - e.g. 'langchain_community.embeddings' - name: embedding_component_path - required: true - - description: The embedding model to use - e.g. BedrockEmbeddings - name: embedding_component_name - required: true - - description: Model specific configuration for the embedding model. See documentation - for valid parameter names. - name: embedding_component_config - required: true - description: Use LangChain Vector Stores to index text for later semantic searches. - This will take text, run it through an embedding model and then store it in a - vector database. - input_schema: - properties: - metadatas: - items: - type: object - type: array - texts: - items: - type: string - type: array - required: - - texts - type: object - output_schema: - properties: {} - required: - - results - type: object -- class_name: LangChainVectorStoreEmbeddingsSearch - config_parameters: - - description: The vector store library path - e.g. 'langchain_community.vectorstores' - name: vector_store_component_path - required: true - - description: The vector store to use - e.g. 'Pinecone' - name: vector_store_component_name - required: true - - description: Model specific configuration for the vector store. See LangChain - documentation for valid parameter names for this specific component (e.g. https://python.langchain.com/docs/integrations/vectorstores/pinecone). - name: vector_store_component_config - required: true - - description: The name of the index to use - name: vector_store_index_name - required: false - - description: The embedding library path - e.g. 'langchain_community.embeddings' - name: embedding_component_path - required: true - - description: The embedding model to use - e.g. BedrockEmbeddings - name: embedding_component_name - required: true - - description: Model specific configuration for the embedding model. See documentation - for valid parameter names. - name: embedding_component_config - required: true - - description: The maximum number of results to return - name: max_results - required: true - - default: true - description: Set to False if you don't want to combine all the context from the - same source. Default is True - name: combine_context_from_same_source - required: false - description: Use LangChain Vector Stores to search a vector store with a semantic - search. This will take text, run it through an embedding model with a query embedding - and then find the closest matches in the store. - input_schema: - properties: - text: - type: string - required: - - text - type: object - output_schema: - properties: - results: - properties: - matches: - items: - properties: - metadata: - type: object - score: - type: float - text: - type: string - required: - - text - type: object - type: array - type: object - required: - - results - type: object -transform: -- class_name: MapTransform - config_parameters: - - description: Select the list to copy from - name: source_list_expression - required: true - type: string|invoke_expression - - description: 'A field to copy. All normal source_expression options are available, - allowing you to use the source list as the iterator, but copy the same value - from elsewhere in the message over and over. Also, two other expression datatypes - are available: ''item'' and ''index''. ''item'' allows you to select from the - source list entry itself (e.g. item:field_name). ''index'' allows you to select - the index of the source list.' - name: source_expression - required: true - type: string|invoke_expression - - description: An optional invoke function to process the source data before it - is placed in the destination list - name: processing_function - required: false - type: invoke_expression - - description: The list to copy the item into - name: dest_list_expression - required: true - type: string|invoke_expression - - description: The field within the dest list to copy the item into - name: dest_expression - required: false - type: string|invoke_expression - description: "This is a map transform where a list is iterated over. For each item,\ - \ it is possible to take a value from either the source list (or anywhere else\ - \ in the message), optionally process it and then put it in the same index in\ - \ the destination list. If the destination list is shorter than the source list,\ - \ the destination list will be extended to match the length of the source list.\ - \ In the processing function, you have access to the following keyword arguments:\n\ - \n * index: The index of the current item in the source list\n * current_value:\ - \ The value of the current item in the source list\n * source_list: The source\ - \ list\n\nThese should be accessed using `source_expression(keyword_args:)`. For example, `source_expression(keyword_args:current_value)`. See the\ - \ example below for more detail." - example_config: "\n``` \n input_transforms:\n - type: map\n source_list_expression:\ - \ input.payload:my_obj.my_list\n source_expression: item.my_val\n \ - \ processing_function:\n invoke:\n module: invoke_functions\n\ - \ function: add\n params:\n positional:\n\ - \ - source_expression(keyword_args:current_value)\n \ - \ - 2\n dest_expression: user_data.output:new_list\n```\nThis transform\ - \ would take a payload like this:\n\n```\n {\n \"my_obj\": {\n \ - \ \"my_list\": [\n {\"my_val\": 1},\n {\"my_val\": 2},\n \ - \ {\"my_val\": 3}\n ],\n }\n }\n```\nand produce an object\ - \ like this:\n\n```\n user_data.output:\n {\n new_list: [3, 4, 5]\n\ - \ }\n```\n" - short_description: This is a map transform where a list is iterated over, processed - and then placed at the same index in the destination list. -- class_name: CopyListItemTransform - config_parameters: - - description: Select the list to copy from - name: source_expression - required: true - type: string|invoke_expression - - description: The field within that list to select - name: source_property - required: true - type: string|invoke_expression - - description: The list to copy the item into - name: dest_expression - required: true - type: string|invoke_expression - - description: The field within the dest list to copy the item into - name: dest_property - required: false - type: string|invoke_expression - description: 'Select a source list. Iterate over the list and copy the value of - a field to a destination list at the same index. This can be used to create multiple - lists from a single list or vice versa. NOTE: this transform is deprecated - use - ''map'' instead.' -- class_name: AppendTransform - config_parameters: - - description: The field to append to the destination list. - name: source_expression - required: true - type: string|invoke_expression - - description: The field to append the source value to. - name: dest_expression - required: true - type: string|invoke_expression - description: 'Select a source value and append it to a destination list. ' -- class_name: ReduceTransform - config_parameters: - - description: Select the list to iterate over - name: source_list_expression - required: true - type: string|invoke_expression - - description: The field in the source list to accumulate - name: source_expression - required: false - type: string|invoke_expression - - description: The invoke expression to use to accumulate the values - name: accumulator_function - required: true - type: invoke_expression - - description: The initial value for the accumulator as a source_expression - name: initial_value - required: true - type: string|invoke_expression - - description: The field to store the accumulated value - name: dest_expression - required: true - type: string|invoke_expression - description: "This is a reduce transform where a list is iterated over. For each\ - \ item, it is possible to take a value from either the source list (or anywhere\ - \ else in the message) and accumulate it in the accumulator. The accumulated value\ - \ will then be stored in the dest_expression.\n\nIn the accumulator function,\ - \ you have access to the following keyword arguments:\n\n * index: The index of\ - \ the current item in the source list\n * accumulated_value: The current accumulated\ - \ value\n * current_value: The value of the current item in the source list\n\ - \ * source_list: The source list\n\nThese should be accessed using `source_expression(keyword_args:)`. For example, `source_expression(keyword_args:current_value)`. See the\ - \ example below for more detail." - example_config: "\n``` \n input_transforms:\n - type: reduce\n \ - \ source_list_expression: input.payload:my_obj.my_list\n source_expression:\ - \ item.my_val\n initial_value: 0\n accumulator_function:\n \ - \ invoke:\n module: invoke_functions\n function: add\n\ - \ params:\n positional:\n - source_expression(keyword_args:accumulated_value)\n\ - \ - source_expression(keyword_args:current_value)\n dest_expression:\ - \ user_data.output:my_obj.sum\n```\nThis transform would take a payload like this:\n\ - \n```\n {\n \"my_obj\": {\n \"my_list\": [\n {\"my_val\"\ - : 1},\n {\"my_val\": 2},\n {\"my_val\": 3}\n ],\n \ - \ }\n }\n```\nand produce an object like this:\n\n```\n user_data.output:\n\ - \ {\n \"my_obj\": {\n \"sum\": 6\n }\n }\n```\n " - short_description: Reduce a list to a single value -- class_name: CopyTransform - config_parameters: - - description: The field to copy from. - name: source_expression - required: true - type: string|invoke_expression - - description: The field to copy the source value to. - name: dest_expression - required: true - type: string|invoke_expression - description: Copy Transform - copy a value from one field to another. -- class_name: FilterTransform - config_parameters: - - description: Select the list to iterate over - name: source_list_expression - required: true - type: string|invoke_expression - - description: The field in the source list to accumulate - name: source_expression - required: false - type: string|invoke_expression - - description: The invoke function to use to filter the list - name: filter_function - required: true - type: invoke_expression - - description: The list to copy the item into - name: dest_list_expression - required: true - type: string|invoke_expression - - description: The field within the dest list to copy the item into - name: dest_expression - required: false - type: string|invoke_expression - description: "This is a filter transform where a list is iterated over. For each\ - \ item, the provided filter_functions is run. If it evaluates to True then the\ - \ item is copied to the destination list. If it evaluates to False then the item\ - \ is not copied to the destination list.\n\nIn the filter function, you have access\ - \ to the following keyword arguments:\n\n * index: The index of the current item\ - \ in the source list\n * current_value: The value of the current item in the source\ - \ list\n * source_list: The source list\n\nThese should be accessed using `source_expression(keyword_args:)`. For example, `source_expression(keyword_args:current_value)`. See the\ - \ example below for more detail." - example_config: "\n``` \n input_transforms:\n - type: filter\n \ - \ source_list_expression: input.payload:my_obj.my_list\n source_expression:\ - \ item\n filter_function:\n invoke:\n module: invoke_functions\n\ - \ function: greater_than\n params:\n positional:\n\ - \ - source_expression(keyword_args:current_value.my_val)\n \ - \ - 2\n dest_expression: user_data.output:new_list\n```\n\ - This transform would take a payload like this:\n\n```\n {\n \"my_obj\"\ - : {\n \"my_list\": [\n {\"my_val\": 1},\n {\"my_val\"\ - : 2},\n {\"my_val\": 3},\n {\"my_val\": 4}\n ],\n \ - \ }\n }\n```\nand produce an object like this:\n\n```\n user_data.output:\n\ - \ {\n new_list: [\n {\"my_val\": 3},\n {\"my_val\"\ - : 4}\n ],\n }\n```\n" - short_description: Filter a list based on a filter function - - - -Here is the markdown documentation for the configuration file: - -# Configuration for the AI Event Connector - -The AI Event Connector is highly configurable. You can define the components of each flow, the queue depths between them, and the number of instances of each component. The configuration is done through a YAML file that is loaded when the connector starts. This allows you to easily change the configuration without having to modify the code. - -## Configuration File Format and Rules - -The configuration file is a YAML file that is loaded when the connector starts. - -### Special values - -Within the configuration, you can have simple static values, environment variables, or dynamic values using the `invoke` keyword. - -#### Environment Variables - -You can use environment variables in the configuration file by using the `${}` syntax. For example, if you have an environment variable `MY_VAR` you can use it in the configuration file like this: - -```yaml -my_key: ${MY_VAR} -``` - -#### Dynamic Values (invoke keyword) - -You can use dynamic values in the configuration file by using the `invoke` keyword. This allows you to do such things as import a module, instantiate a class and call a function to get the value. For example, if you want to get the operating system type you can use it in the configuration file like this: - -```yaml -os_type: - invoke: - module: platform - function: system -``` - -An `invoke` block works by specifying an 'object' to act on with one (and only one) of the following keys: -- `module`: The name of the module to import in normal Python import syntax (e.g. `os.path`) -- `object`: An object to call a function on or get an attribute from. Note that this must have an `invoke` block itself to create the object. -It is also acceptable to specify neither `module` nor `object` if you are calling a function that is in the global namespace. - -In addition to the object specifier, you can specify one (and only one) of the following keys: -- `function`: The name of the function to call on the object -- `attribute`: The name of the attribute to get from the object - -In the case of a function, you can also specify a `params` key to pass parameters to the function. The params value has the following keys: -- `positional`: A list of positional parameters to pass to the function -- `keyword`: A dictionary of keyword parameters to pass to the function - -`invoke` blocks can be nested to build up complex objects and call functions on them. - -Here is an example of a complex `invoke` block that could be used to get AWS credentials: - -```yaml - # Get AWS credentials and give it a name to reference later - - aws_credentials: &aws_credentials - invoke: - object: - invoke: - # import boto3 - module: boto3 - # Get the session object -> boto3.Session() - function: Session - # Call the get_credentials function on the session object -> session.get_credentials() - function: get_credentials - - - aws_4_auth: - invoke: - # import requests_aws4auth - module: requests_aws4auth - # Get the AWS4Auth object -> requests_aws4auth.AWS4Auth() - function: AWS4Auth - params: - positional: - # Access key - - invoke: - object: *aws_credentials - attribute: access_key - # Secret key - - invoke: - object: *aws_credentials - attribute: secret_key - # Region (from environment variable) - - ${AWS_REGION} - # Service name (from environment variable) - - ${AWS_SERVICE} - keyword: - # Pass the session token if it exists -> session_token= - session_token: - invoke: - object: *aws_credentials - attribute: token -``` - -##### invoke_functions - -There is a module named `invoke_functions` that has a list of functions that can take the place of python operators. This is useful for when you want to use an operator in a configuration file. The following functions are available: -- `add`: param1 + param2 - can be used to add or concatenate two strings or lists -- `append`: Append the second value to the first -- `subtract`: Subtract the second number from the first -- `multiply`: Multiply two numbers together -- `divide`: Divide the first number by the second -- `modulus`: Get the modulus of the first number by the second -- `power`: Raise the first number to the power of the second -- `equal`: Check if two values are equal -- `not_equal`: Check if two values are not equal -- `greater_than`: Check if the first value is greater than the second -- `greater_than_or_equal`: Check if the first value is greater than or equal to the second -- `less_than`: Check if the first value is less than the second -- `less_than_or_equal`: Check if the first value is less than or equal to the second -- `and_op`: Check if both values are true -- `or_op`: Check if either value is true -- `not_op`: Check if the value is false -- `in_op`: Check if the first value is in the second value -- `negate`: Negate the value -- `empty_list`: Return an empty list -- `empty_dict`: Return an empty dictionary -- `empty_string`: Return an empty string -- `empty_set`: Return an empty set -- `empty_tuple`: Return an empty tuple -- `empty_float`: Return 0.0 -- `empty_int`: Return 0 - -Here is an example of using the `invoke_functions` module to do some simple operations: - -```yaml - # Use the invoke_functions module to do some simple operations - - simple_operations: - invoke: - module: invoke_functions - function: add - params: - positional: - - 1 - - 2 -``` - -##### source_expression() - -If the `invoke` block is used within an area of the configuration that relates to message processing -(e.g. input_transforms), an invoke function call can use the special function `source_expression()` for -any of its parameters. This function will be replaced with the value of the source expression at runtime. -It is an error to use `source_expression()` outside of a message processing. - -Example: -```yaml --flows: - -my_flow: - -my_component: - input_transforms: - -type: copy - source_expression: - invoke: - module: invoke_functions - function: add - params: - positional: - - source_expression(input.payload:my_obj.val1) - - 2 - dest_expression: user_data.my_obj:result -``` - -In the above example, the `source_expression()` function is used to get the value of `input.payload:my_obj.val1` and add 2 to it. - -##### user_processor component and invoke - -The `user_processor` component is a special component that allows you to define a user-defined function to process the message. This is useful for when you want to do some processing on the input message that is not possible with the built-in transforms or other components. In order to specify the user-defined function, you must define the `component_processing` property with an `invoke` block. - -Here is an example of using the `user_processor` component with an `invoke` block: - -```yaml - - my_user_processor: - component_name: my_user_processor - component_module: user_processor - component_processing: - invoke: - module: my_module - function: my_function - params: - positional: - - source_expression(input.payload:my_key) - - 2 -``` - - - - - -## Configuration File Structure - -The configuration file is a YAML file with these top-level keys: - -- `log`: Configuration of logging for the connector -- `shared_config`: Named configurations that can be used by multiple components later in the file -- `flows`: A list of flow configurations. - -### Log Configuration - -The `log` configuration section is used to configure the logging for the connector. It configures the logging behaviour for stdout and file logs. It has the following keys: - -- `stdout_log_level`: - The log level for the stdout log -- `log_file_level`: - The log level for the file log -- `log_file`: - The file to log to. If not specified, no file logging will be done - -Here is an example of a log configuration: - -```yaml -log: - stdout_log_level: INFO - log_file_level: DEBUG - log_file: /var/log/ai_event_connector.log -``` - -### Shared Configurations - -The `shared_config` section is used to define configurations that can be used by multiple components later in the file. It is a dictionary of named configurations. Each named configuration is a dictionary of configuration values. Here is an example of a shared configuration: - -```yaml -shared_config: - my_shared_config: &my_shared_config - my_key: my_value - my_other_key: my_other_value -``` - -Later in the file, you can reference this shared configuration like this: - -```yaml - - my_component: - <<: *my_shared_config - my_key: my_new_value -``` - -### Flow Configuration - -The `flows` section is a list of flow configurations. Each flow configuration is a dictionary with the -following keys: -- `name`: - The unique name of the flow -- `components`: A list of component configurations - -#### Component Configuration - -Each component configuration is a dictionary with the following keys: -- `component_name`: - The unique name of the component within the flow -- `component_module`: - The module that contains the component class (python import syntax) -- `component_config`: - The configuration for the component. Its format is specific to the component -- `input_transforms`: - A list of transforms to apply to the input message before sending it to the component -- `component_input`: - A source_expression or source_value to use as the input to the component. -- `queue_depth`: - The depth of the input queue for the component -- `num_instances`: - The number of instances of the component to run - -**Note: For a list of all built-in components, see the [Components](components/index.md) documentation.** - -##### component_config - -The `component_config` is a dictionary of configuration values specific to the component. The format of this dictionary is specific to the component. You must refer to the component's documentation for the specific configuration values. - -##### input_transforms - -The `input_transforms` is a list of transforms to apply to the input message before sending it to the component. Each transform is a dictionary with the following keys: -- `type`: - The type of transform -- `source_expression|source_value`: - The source expression or value to use as the input to the transform -- `dest_expression`: - The destination expression for where to store the transformation output - -The following transform modules are available: -- `copy`: Copy the source value to the destination - -Here is an example of a component configuration with input transforms: - -```yaml - - my_component: - component_module: my_module.my_component - component_config: - my_key: my_value - input_transforms: - - type: copy - # Extract the my_key value from the input payload - source_expression: input.payload:my_key - # Store the value in the newly created my_obj object in the my_keys list - # at index 2 (i.e. my_obj.my_keys[2].my_key = input.payload.my_key) - dest_expression: user_data.my_obj:my_keys.2.my_key -``` - -###### Built-in Transforms - -The AI Event Connector comes with a number of built-in transforms that can be used to process messages. For a list of all built-in transforms, see the [Transforms](transforms/index.md) documentation. - -##### component_input - -The `component_input` is a dictionary with one (and only one) of the following keys: -- `source_expression`: - An expression to use as the input to the component (see below for expression syntax) -- `source_value`: - A value to use as the input to the component. - -Note that, as for all values in the config file, you can use the `invoke` keyword to get dynamic values - -Here is an example of a component configuration with a source expression: - -```yaml - - my_component: - component_module: my_module.my_component - component_config: - my_key: my_value - component_input: - source_expression: input.payload:my_key -``` - -##### queue_depth - -The `queue_depth` is an integer that specifies the depth of the input queue for the component. This is the number of messages that can be buffered in the queue before the component will start to block. By default, the queue depth is 100. - - -##### num_instances - -The `num_instances` is an integer that specifies the number of instances of the component to run. This is the number of threads that will be started to process messages from the input queue. By default, the number of instances is 1. - -#### Built-in components - -The AI Event Connector comes with a number of built-in components that can be used to process messages. For a list of all built-in components, see the [Components](components/index.md) documentation. - -### Expression Syntax - -The `source_expression` and `dest_expression` values in the configuration file use a simple expression syntax to reference values in the input message and to store values in the output message. The format of the expression is: - -`[.][:]` - -Where: - -- `data_type`: - The type of data to reference. This can be one of the following: - - `input`: The input message. It supports the qualifiers: - - `payload`: The payload of the input message - - `topic`: The topic of the input message - - `topic_levels`: A list of the levels of the topic of the input message - - `user_properties`: The user properties of the input message - - `user_data`: The user data object. The qualifier is required to specify the name of the user data object - - `static`: A static value (e.g. `static:my_value`) - - `template`: A template (see more below) - - `previous`: The output from the previous component in the flow. This could be of any type depending on the previous component - -- `qualifier`: - The qualifier to use to reference the data. This is specific to the `data_type` and is optional. If not specified, the entire data type will be used. - -- `index`: - Where to get the data in the data type. This is optional and is specific to the `data_type`. For templates, it is the template. For other data types, it is a dot separated string or an integer index. The index will be split on dots and used to traverse the data type. If it is an integer, it will be used as an index into the data type. If it is a string, it will be used as a key to get the value from the data type. - -Here are some examples of expressions: - -- `input.payload:my_key` - Get the value of `my_key` from the input payload -- `user_data.my_obj:my_key` - Get the value of `my_key` from the `my_obj` object in the user data -- `static:my_value` - Use the static value `my_value` -- `user_data:my_obj2:my_list.2.my_key` - Get the value of `my_key` from the 3rd item in the `my_list` list in the `my_obj2` object in the user data - -When using expressions for destination expressions, lists and objects will be created as needed. If the destination expression is a list index, the list will be extended to the index if it is not long enough. If the destination expression is an object key, the object will be created if it does not exist. - -#### Templates - -The `template` data type is a special data type that allows you to use a template to create a value. The template is a string that can contain expressions to reference values in the input message. The format of the template is: - -`text text text {{template_expression}} text text text` - -Where: - -- `{{template_expression}}` - An expression to reference values in the input message. It has the format: - - `://` - - Where: - - - `encoding`: - The encoding/formatting to use to print out the value. This can be one of the following: - - `base64`: Use base64 encoding - - `json`: Use json format - - `yaml`: Use yaml format - - `text`: Use string format - - `datauri:`: Use data uri encoding with the specified mime type - - - `source_expression`: - An expression to reference values in the input message. This has the same format as the `source_expression` in the configuration file described above. - -Here is an example of a template: - -```yaml - input_transforms: - - type: copy - source_expression: | - template:Write me a dry joke about: - {{text://input.payload}} - Write the joke in the voice of {{text://input.user_properties:comedian}} - dest_expression: user_data.llm_input:messages.0.content - - type: copy - source_value: user - dest_expression: user_data.llm_input:messages.0.role -``` - -In this example, the `source_expression` for the first transform is a template that uses the `text` encoding to create a string. - - -Here is an example configuration: -Take special care to ensure that the data format is correct as it moves component to component. input_transforms will likely need to be created to ensure that the data is in the correct format for each component. -Now, you will have to ask the user for the input_schema, queue, or topic, and the desired output_schema and topic. diff --git a/prompts.yaml b/prompts.yaml deleted file mode 100644 index 43d9fe85..00000000 --- a/prompts.yaml +++ /dev/null @@ -1,621 +0,0 @@ -{"prompt": "Here is a structure that defines all the built-in components and transforms.\ - \ \n\ncomponent:\n- class_name: ErrorInput\n config_parameters:\n\ - \ - default: null\n description: Maximum rate of errors to process per second.\ - \ Any errors above this\n rate will be dropped. If not set, all errors will\ - \ be processed.\n name: max_rate\n required: false\n description: 'Receive\ - \ processing errors from the Solace AI Event Connector. Note\n that the component_input\ - \ configuration is ignored. This component should be used\n to create a flow\ - \ that handles errors from other flows. '\n output_schema:\n properties:\n\ - \ error:\n description: Information about the error\n properties:\n\ - \ exception:\n description: The exception message\n \ - \ type: string\n message:\n description: The error message\n\ - \ type: string\n required:\n - message\n - exception\n\ - \ type: object\n location:\n description: The location where\ - \ the error occurred\n properties:\n component:\n description:\ - \ The component name that generated the error\n type: string\n \ - \ flow:\n description: The flow name of the component that generated\ - \ the error\n type: string\n instance:\n description:\ - \ The instance number of the component that generated the error\n type:\ - \ integer\n required:\n - flow\n - component\n type:\ - \ object\n message:\n description: The message that caused the error\n\ - \ properties:\n payload:\n description: The payload\ - \ of the message\n type: string\n previous:\n description:\ - \ The output from the previous stage that was processed before\n \ - \ the error\n type: object\n topic:\n description:\ - \ The topic of the message\n type: string\n user_data:\n \ - \ description: The user data of the message that was created during\ - \ the\n flow\n type: object\n user_properties:\n\ - \ description: The user properties of the message\n type:\ - \ object\n required: []\n type: object\n required:\n - error\n\ - \ - message\n - location\n type: object\n- class_name: BrokerInput\n\ - \ config_parameters:\n - description: Type of broker (Solace, MQTT, etc.)\n\ - \ name: broker_type\n required: true\n - description: Broker URL (e.g.\ - \ tcp://localhost:55555)\n name: broker_url\n required: true\n - description:\ - \ Client username for broker\n name: broker_username\n required: true\n\ - \ - description: Client password for broker\n name: broker_password\n required:\ - \ true\n - description: Client VPN for broker\n name: broker_vpn\n required:\ - \ true\n - description: Queue name for broker\n name: broker_queue_name\n\ - \ required: true\n - description: Subscriptions for broker\n name: broker_subscriptions\n\ - \ required: true\n - default: utf-8\n description: Encoding for the payload\ - \ (utf-8, base64, gzip, none)\n name: payload_encoding\n required: false\n\ - \ - default: json\n description: Format for the payload (json, yaml, text)\n\ - \ name: payload_format\n required: false\n description: Connect to a messaging\ - \ broker and receive messages from it. The component\n will output the payload,\ - \ topic, and user properties of the message.\n output_schema:\n properties:\n\ - \ payload:\n type: string\n topic:\n type: string\n \ - \ user_properties:\n type: object\n required:\n - payload\n \ - \ - topic\n - user_properties\n type: object\n- class_name: BrokerOutput\n\ - \ config_parameters:\n - description: Type of broker (Solace, MQTT, etc.)\n\ - \ name: broker_type\n required: true\n - description: Broker URL (e.g.\ - \ tcp://localhost:55555)\n name: broker_url\n required: true\n - description:\ - \ Client username for broker\n name: broker_username\n required: true\n\ - \ - description: Client password for broker\n name: broker_password\n required:\ - \ true\n - description: Client VPN for broker\n name: broker_vpn\n required:\ - \ true\n - default: utf-8\n description: Encoding for the payload (utf-8,\ - \ base64, gzip, none)\n name: payload_encoding\n required: false\n - default:\ - \ json\n description: Format for the payload (json, yaml, text)\n name:\ - \ payload_format\n required: false\n - default: true\n description: Propagate\ - \ acknowledgements from the broker to the previous components\n name: propagate_acknowledgements\n\ - \ required: false\n description: Connect to a messaging broker and send messages\ - \ to it. Note that this\n component requires that the data is transformed into\ - \ the input schema.\n input_schema:\n properties:\n payload:\n \ - \ description: Payload of the message sent to the broker\n type: any\n\ - \ topic:\n description: Topic to send the message to\n type:\ - \ string\n user_properties:\n description: User properties to send\ - \ with the message\n type: object\n required:\n - payload\n -\ - \ topic\n type: object\n- class_name: Stdout\n config_parameters: []\n description:\ - \ STDOUT output component\n input_schema:\n properties:\n text:\n \ - \ type: string\n required:\n - text\n type: object\n- class_name:\ - \ Stdin\n config_parameters: []\n description: STDIN input component. The component\ - \ will prompt for input, which will\n then be placed in the message payload\ - \ using the output schema below.\n output_schema:\n properties:\n text:\n\ - \ type: string\n required:\n - text\n type: object\n- class_name:\ - \ SlackInput\n config_parameters:\n - description: The Slack bot token to connect\ - \ to Slack.\n name: slack_bot_token\n type: string\n - description: The\ - \ Slack app token to connect to Slack.\n name: slack_app_token\n type: string\n\ - \ - default: 20\n description: 'The maximum file size to download from Slack\ - \ in MB. Default: 20MB'\n name: max_file_size\n required: false\n type:\ - \ number\n - default: 20\n description: 'The maximum total file size to download\ - \ from Slack in MB. Default:\n 20MB'\n name: max_total_file_size\n \ - \ required: false\n type: number\n description: Slack input component. The\ - \ component connects to Slack using the Bolt\n API and receives messages from\ - \ Slack channels.\n output_schema:\n properties:\n event:\n properties:\n\ - \ channel:\n type: string\n channel_type:\n \ - \ type: string\n client_msg_id:\n type: string\n \ - \ event_ts:\n type: string\n files:\n items:\n\ - \ properties:\n content:\n type:\ - \ string\n filetype:\n type: string\n \ - \ mime_type:\n type: string\n name:\n \ - \ type: string\n size:\n type:\ - \ number\n type: object\n type: array\n mentions:\n\ - \ items:\n type: string\n type: array\n \ - \ subtype:\n type: string\n text:\n type:\ - \ string\n ts:\n type: string\n type:\n \ - \ type: string\n user_email:\n type: string\n user_id:\n\ - \ type: string\n type: object\n required:\n - event\n\ - \ type: object\n- class_name: UserProcessor\n config_parameters: []\n description:\ - \ 'A component that allows the processing stage to be defined in the\n configuration\ - \ file using ''invoke'' statements. The configuration must be specified\n with\ - \ the ''component_processing:'' property alongside the ''component_module:''\n\ - \ property in the component''s configuration. The input and output schemas\ - \ are free-form.\n The user-defined processing must line up with the input\ - \ '\n input_schema:\n properties: {}\n type: object\n output_schema:\n\ - \ properties: {}\n type: object\n short_description: A component that allows\ - \ the processing stage to be defined in\n the configuration file.\n- class_name:\ - \ Aggregate\n config_parameters:\n - default: 10\n description: Number of\ - \ input messages to aggregate before sending an output message\n name: max_items\n\ - \ required: false\n type: integer\n - default: 1000\n description: Number\ - \ of milliseconds to wait before sending an output message\n name: max_time_ms\n\ - \ required: false\n type: integer\n description: Take multiple messages\ - \ and aggregate them into one. The output of this\n component is a list of\ - \ the exact structure of the input data.\n input_schema:\n description: The\ - \ input message to be aggregated\n properties: {}\n type: object\n output_schema:\n\ - \ description: The aggregated messages\n items:\n type: object\n \ - \ type: array\n short_description: Aggregate messages into one message.\n- class_name:\ - \ PassThrough\n config_parameters: []\n description: What goes in comes out\n\ - \ input_schema:\n properties: {}\n type: object\n output_schema:\n \ - \ properties: {}\n type: object\n- class_name: Delay\n config_parameters:\n\ - \ - default: 1\n description: The delay in seconds\n name: delay\n type:\ - \ number\n description: 'A simple component that simply passes the input to the\ - \ output, but\n with a configurable delay. Note that it will not service the\ - \ next input until\n the delay has passed. If this component has num_instances\ - \ > 1, each instance will\n run in parallel. '\n input_schema:\n properties:\ - \ {}\n type: object\n output_schema:\n properties: {}\n type: object\n\ - \ short_description: A simple component that simply passes the input to the output,\n\ - \ but with a configurable delay.\n- class_name: Iterate\n config_parameters:\ - \ []\n description: Take a single message that is a list and output each item\ - \ in that list\n as a separate message\n input_schema:\n items:\n \ - \ type: object\n type: array\n output_schema:\n properties: {}\n type:\ - \ object\n- class_name: MessageFilter\n config_parameters:\n - description:\ - \ A dynmaic invoke configuration that will return true if message should\n \ - \ be passed or false to drop it\n name: filter_expression\n required:\ - \ true\n description: A filtering component. This will apply a user configurable\ - \ expression.\n If the expression evaluates to True, the message will be passed\ - \ on. If the expression\n evaluates to False, the message will be discarded.\ - \ If the message is discarded,\n any previous components that require an acknowledgement\ - \ will be acknowledged.\n input_schema:\n properties: {}\n type: object\n\ - \ output_schema:\n properties: {}\n type: object\n- class_name: LangChainEmbeddings\n\ - \ config_parameters:\n - description: The chat model module - e.g. 'langchain_openai.chat_models'\n\ - \ name: langchain_module\n required: true\n type: string\n - description:\ - \ The chat model class to use - e.g. ChatOpenAI\n name: langchain_class\n \ - \ required: true\n type: string\n - description: Model specific configuration\ - \ for the chat model. See documentation\n for valid parameter names.\n \ - \ name: langchain_component_config\n required: true\n type: object\n description:\ - \ Provide access to all the LangChain Text Embeddings components via\n configuration\n\ - \ input_schema:\n properties:\n text:\n description: The text\ - \ to embed\n type: string\n type:\n description: 'The type\ - \ of embedding to use: ''document'' or ''query'' - default\n is ''document'''\n\ - \ type: string\n required:\n - text\n type: object\n output_schema:\n\ - \ properties:\n embedding:\n description: A list of floating point\ - \ numbers representing the embedding.\n Its length is the size of vector\ - \ that the embedding model produces\n items:\n type: float\n \ - \ type: array\n required:\n - embedding\n type: object\n short_description:\ - \ Provide access to all the LangChain Text Embeddings components\n via configuration\n\ - - class_name: LangChainVectorStoreDelete\n config_parameters:\n - description:\ - \ The vector store library path - e.g. 'langchain_community.vectorstores'\n \ - \ name: vector_store_component_path\n required: true\n - description: The\ - \ vector store to use - e.g. 'Pinecone'\n name: vector_store_component_name\n\ - \ required: true\n - description: Model specific configuration for the vector\ - \ store. See LangChain\n documentation for valid parameter names for this\ - \ specific component (e.g. https://python.langchain.com/docs/integrations/vectorstores/pinecone).\n\ - \ name: vector_store_component_config\n required: true\n - description:\ - \ The name of the index to use\n name: vector_store_index_name\n required:\ - \ false\n - description: The embedding library path - e.g. 'langchain_community.embeddings'\n\ - \ name: embedding_component_path\n required: true\n - description: The\ - \ embedding model to use - e.g. BedrockEmbeddings\n name: embedding_component_name\n\ - \ required: true\n - description: Model specific configuration for the embedding\ - \ model. See documentation\n for valid parameter names.\n name: embedding_component_config\n\ - \ required: true\n - allow_source_expression: true\n description: List\ - \ of ids to delete from the vector store.\n name: delete_ids\n required:\ - \ false\n - allow_source_expression: true\n description: Keyword arguments\ - \ to pass to the delete method of the vector store.See\n documentation for\ - \ valid parameter names.\n name: delete_kwargs\n required: true\n description:\ - \ This component allows for entries in a LangChain Vector Store to be\n deleted.\ - \ This is needed for the continued maintenance of the vector store. Due\n to\ - \ the nature of langchain vector stores, you need to specify an embedding component\n\ - \ even though it is not used in this component.\n input_schema:\n properties:\n\ - \ metadata:\n description: 'Metadata to associate with the text in\ - \ the vector store. '\n type: object\n text:\n description:\ - \ The text to embed\n type: string\n required:\n - text\n type:\ - \ object\n output_schema:\n properties: {}\n type: object\n- class_name:\ - \ LangChainChatModel\n config_parameters:\n - description: The chat model module\ - \ - e.g. 'langchain_openai.chat_models'\n name: langchain_module\n required:\ - \ true\n - description: The chat model class to use - e.g. ChatOpenAI\n name:\ - \ langchain_class\n required: true\n - description: Model specific configuration\ - \ for the chat model. See documentation\n for valid parameter names.\n \ - \ name: langchain_component_config\n required: true\n - description: The\ - \ response format for this LLM request. This can be 'json', 'yaml',\n or\ - \ 'text'. If set to 'json' or 'yaml', the response will be parsed by the appropriate\n\ - \ parser and the fields will be available in the response object. If set\ - \ to 'text',\n the response will be returned as a string.\n name: llm_response_format\n\ - \ required: false\n description: Provide access to all the LangChain chat\ - \ models via configuration\n input_schema:\n properties:\n messages:\n\ - \ items:\n properties:\n content:\n description:\ - \ The content of the LLM message\n type: string\n role:\n\ - \ description: The role of the LLM message (user, assistant, system)\n\ - \ type: string\n required:\n - content\n \ - \ type: object\n type: array\n required:\n - messages\n type:\ - \ object\n output_schema:\n description: The result of the chat model invocation.\ - \ If a format is specified,\n then the result text will be parsed and the\ - \ fields will be available in the\n response object.\n properties:\n \ - \ result:\n type: string\n required:\n - result\n type: object\n\ - - class_name: LangChainVectorStoreEmbeddingsIndex\n config_parameters:\n - description:\ - \ The vector store library path - e.g. 'langchain_community.vectorstores'\n \ - \ name: vector_store_component_path\n required: true\n - description: The\ - \ vector store to use - e.g. 'Pinecone'\n name: vector_store_component_name\n\ - \ required: true\n - description: Model specific configuration for the vector\ - \ store. See LangChain\n documentation for valid parameter names for this\ - \ specific component (e.g. https://python.langchain.com/docs/integrations/vectorstores/pinecone).\n\ - \ name: vector_store_component_config\n required: true\n - description:\ - \ The name of the index to use\n name: vector_store_index_name\n required:\ - \ false\n - description: The embedding library path - e.g. 'langchain_community.embeddings'\n\ - \ name: embedding_component_path\n required: true\n - description: The\ - \ embedding model to use - e.g. BedrockEmbeddings\n name: embedding_component_name\n\ - \ required: true\n - description: Model specific configuration for the embedding\ - \ model. See documentation\n for valid parameter names.\n name: embedding_component_config\n\ - \ required: true\n description: Use LangChain Vector Stores to index text\ - \ for later semantic searches.\n This will take text, run it through an embedding\ - \ model and then store it in a\n vector database.\n input_schema:\n properties:\n\ - \ metadatas:\n items:\n type: object\n type: array\n\ - \ texts:\n items:\n type: string\n type: array\n \ - \ required:\n - texts\n type: object\n output_schema:\n properties:\ - \ {}\n required:\n - results\n type: object\n- class_name: LangChainVectorStoreEmbeddingsSearch\n\ - \ config_parameters:\n - description: The vector store library path - e.g. 'langchain_community.vectorstores'\n\ - \ name: vector_store_component_path\n required: true\n - description: The\ - \ vector store to use - e.g. 'Pinecone'\n name: vector_store_component_name\n\ - \ required: true\n - description: Model specific configuration for the vector\ - \ store. See LangChain\n documentation for valid parameter names for this\ - \ specific component (e.g. https://python.langchain.com/docs/integrations/vectorstores/pinecone).\n\ - \ name: vector_store_component_config\n required: true\n - description:\ - \ The name of the index to use\n name: vector_store_index_name\n required:\ - \ false\n - description: The embedding library path - e.g. 'langchain_community.embeddings'\n\ - \ name: embedding_component_path\n required: true\n - description: The\ - \ embedding model to use - e.g. BedrockEmbeddings\n name: embedding_component_name\n\ - \ required: true\n - description: Model specific configuration for the embedding\ - \ model. See documentation\n for valid parameter names.\n name: embedding_component_config\n\ - \ required: true\n - description: The maximum number of results to return\n\ - \ name: max_results\n required: true\n - default: true\n description:\ - \ Set to False if you don't want to combine all the context from the\n same\ - \ source. Default is True\n name: combine_context_from_same_source\n required:\ - \ false\n description: Use LangChain Vector Stores to search a vector store with\ - \ a semantic\n search. This will take text, run it through an embedding model\ - \ with a query embedding\n and then find the closest matches in the store.\n\ - \ input_schema:\n properties:\n text:\n type: string\n required:\n\ - \ - text\n type: object\n output_schema:\n properties:\n results:\n\ - \ properties:\n matches:\n items:\n properties:\n\ - \ metadata:\n type: object\n score:\n\ - \ type: float\n text:\n type:\ - \ string\n required:\n - text\n type: object\n\ - \ type: array\n type: object\n required:\n - results\n\ - \ type: object\ntransform:\n- class_name: MapTransform\n config_parameters:\n\ - \ - description: Select the list to copy from\n name: source_list_expression\n\ - \ required: true\n type: string|invoke_expression\n - description: 'A field\ - \ to copy. All normal source_expression options are available,\n allowing\ - \ you to use the source list as the iterator, but copy the same value\n from\ - \ elsewhere in the message over and over. Also, two other expression datatypes\n\ - \ are available: ''item'' and ''index''. ''item'' allows you to select from\ - \ the\n source list entry itself (e.g. item:field_name). ''index'' allows\ - \ you to select\n the index of the source list.'\n name: source_expression\n\ - \ required: true\n type: string|invoke_expression\n - description: An optional\ - \ invoke function to process the source data before it\n is placed in the\ - \ destination list\n name: processing_function\n required: false\n type:\ - \ invoke_expression\n - description: The list to copy the item into\n name:\ - \ dest_list_expression\n required: true\n type: string|invoke_expression\n\ - \ - description: The field within the dest list to copy the item into\n name:\ - \ dest_expression\n required: false\n type: string|invoke_expression\n \ - \ description: \"This is a map transform where a list is iterated over. For each\ - \ item,\\\n \\ it is possible to take a value from either the source list (or\ - \ anywhere else\\\n \\ in the message), optionally process it and then put\ - \ it in the same index in\\\n \\ the destination list. If the destination list\ - \ is shorter than the source list,\\\n \\ the destination list will be extended\ - \ to match the length of the source list.\\\n \\ In the processing function,\ - \ you have access to the following keyword arguments:\\n\\\n \\n * index: The\ - \ index of the current item in the source list\\n * current_value:\\\n \\ The\ - \ value of the current item in the source list\\n * source_list: The source\\\n\ - \ \\ list\\n\\nThese should be accessed using `source_expression(keyword_args:)`. For example, `source_expression(keyword_args:current_value)`.\ - \ See the\\\n \\ example below for more detail.\"\n example_config: \"\\n```\ - \ \\n input_transforms:\\n - type: map\\n source_list_expression:\\\ - \n \\ input.payload:my_obj.my_list\\n source_expression: item.my_val\\\ - n \\\n \\ processing_function:\\n invoke:\\n module:\ - \ invoke_functions\\n\\\n \\ function: add\\n params:\\\ - n positional:\\n\\\n \\ - source_expression(keyword_args:current_value)\\\ - n \\\n \\ - 2\\n dest_expression: user_data.output:new_list\\\ - n```\\nThis transform\\\n \\ would take a payload like this:\\n\\n```\\n \ - \ {\\n \\\"my_obj\\\": {\\n \\\n \\ \\\"my_list\\\": [\\n \ - \ {\\\"my_val\\\": 1},\\n {\\\"my_val\\\": 2},\\n \\\n \\ \ - \ {\\\"my_val\\\": 3}\\n ],\\n }\\n }\\n```\\nand produce an\ - \ object\\\n \\ like this:\\n\\n```\\n user_data.output:\\n {\\n \ - \ new_list: [3, 4, 5]\\n\\\n \\ }\\n```\\n\"\n short_description: This\ - \ is a map transform where a list is iterated over, processed\n and then placed\ - \ at the same index in the destination list.\n- class_name: CopyListItemTransform\n\ - \ config_parameters:\n - description: Select the list to copy from\n name:\ - \ source_expression\n required: true\n type: string|invoke_expression\n\ - \ - description: The field within that list to select\n name: source_property\n\ - \ required: true\n type: string|invoke_expression\n - description: The\ - \ list to copy the item into\n name: dest_expression\n required: true\n\ - \ type: string|invoke_expression\n - description: The field within the dest\ - \ list to copy the item into\n name: dest_property\n required: false\n \ - \ type: string|invoke_expression\n description: 'Select a source list. Iterate\ - \ over the list and copy the value of\n a field to a destination list at the\ - \ same index. This can be used to create multiple\n lists from a single list\ - \ or vice versa. NOTE: this transform is deprecated - use\n ''map'' instead.'\n\ - - class_name: AppendTransform\n config_parameters:\n - description: The field\ - \ to append to the destination list.\n name: source_expression\n required:\ - \ true\n type: string|invoke_expression\n - description: The field to append\ - \ the source value to.\n name: dest_expression\n required: true\n type:\ - \ string|invoke_expression\n description: 'Select a source value and append it\ - \ to a destination list. '\n- class_name: ReduceTransform\n config_parameters:\n\ - \ - description: Select the list to iterate over\n name: source_list_expression\n\ - \ required: true\n type: string|invoke_expression\n - description: The\ - \ field in the source list to accumulate\n name: source_expression\n required:\ - \ false\n type: string|invoke_expression\n - description: The invoke expression\ - \ to use to accumulate the values\n name: accumulator_function\n required:\ - \ true\n type: invoke_expression\n - description: The initial value for the\ - \ accumulator as a source_expression\n name: initial_value\n required: true\n\ - \ type: string|invoke_expression\n - description: The field to store the accumulated\ - \ value\n name: dest_expression\n required: true\n type: string|invoke_expression\n\ - \ description: \"This is a reduce transform where a list is iterated over. For\ - \ each\\\n \\ item, it is possible to take a value from either the source list\ - \ (or anywhere\\\n \\ else in the message) and accumulate it in the accumulator.\ - \ The accumulated value\\\n \\ will then be stored in the dest_expression.\\\ - n\\nIn the accumulator function,\\\n \\ you have access to the following keyword\ - \ arguments:\\n\\n * index: The index of\\\n \\ the current item in the source\ - \ list\\n * accumulated_value: The current accumulated\\\n \\ value\\n * current_value:\ - \ The value of the current item in the source list\\n\\\n \\ * source_list:\ - \ The source list\\n\\nThese should be accessed using `source_expression(keyword_args:)`. For example, `source_expression(keyword_args:current_value)`.\ - \ See the\\\n \\ example below for more detail.\"\n example_config: \"\\n```\ - \ \\n input_transforms:\\n - type: reduce\\n \\\n \\ source_list_expression:\ - \ input.payload:my_obj.my_list\\n source_expression:\\\n \\ item.my_val\\\ - n initial_value: 0\\n accumulator_function:\\n \\\n \\ \ - \ invoke:\\n module: invoke_functions\\n function: add\\\ - n\\\n \\ params:\\n positional:\\n \ - \ - source_expression(keyword_args:accumulated_value)\\n\\\n \\ \ - \ - source_expression(keyword_args:current_value)\\n dest_expression:\\\ - \n \\ user_data.output:my_obj.sum\\n```\\nThis transform would take a payload\ - \ like this:\\n\\\n \\n```\\n {\\n \\\"my_obj\\\": {\\n \\\"\ - my_list\\\": [\\n {\\\"my_val\\\"\\\n : 1},\\n {\\\"my_val\\\ - \": 2},\\n {\\\"my_val\\\": 3}\\n ],\\n \\\n \\ }\\n \ - \ }\\n```\\nand produce an object like this:\\n\\n```\\n user_data.output:\\\ - n\\\n \\ {\\n \\\"my_obj\\\": {\\n \\\"sum\\\": 6\\n }\\\ - n }\\n```\\n \"\n short_description: Reduce a list to a single value\n\ - - class_name: CopyTransform\n config_parameters:\n - description: The field\ - \ to copy from.\n name: source_expression\n required: true\n type: string|invoke_expression\n\ - \ - description: The field to copy the source value to.\n name: dest_expression\n\ - \ required: true\n type: string|invoke_expression\n description: Copy Transform\ - \ - copy a value from one field to another.\n- class_name: FilterTransform\n \ - \ config_parameters:\n - description: Select the list to iterate over\n name:\ - \ source_list_expression\n required: true\n type: string|invoke_expression\n\ - \ - description: The field in the source list to accumulate\n name: source_expression\n\ - \ required: false\n type: string|invoke_expression\n - description: The\ - \ invoke function to use to filter the list\n name: filter_function\n required:\ - \ true\n type: invoke_expression\n - description: The list to copy the item\ - \ into\n name: dest_list_expression\n required: true\n type: string|invoke_expression\n\ - \ - description: The field within the dest list to copy the item into\n name:\ - \ dest_expression\n required: false\n type: string|invoke_expression\n \ - \ description: \"This is a filter transform where a list is iterated over. For\ - \ each\\\n \\ item, the provided filter_functions is run. If it evaluates to\ - \ True then the\\\n \\ item is copied to the destination list. If it evaluates\ - \ to False then the item\\\n \\ is not copied to the destination list.\\n\\\ - nIn the filter function, you have access\\\n \\ to the following keyword arguments:\\\ - n\\n * index: The index of the current item\\\n \\ in the source list\\n *\ - \ current_value: The value of the current item in the source\\\n \\ list\\\ - n * source_list: The source list\\n\\nThese should be accessed using `source_expression(keyword_args:)`. For example, `source_expression(keyword_args:current_value)`.\ - \ See the\\\n \\ example below for more detail.\"\n example_config: \"\\n```\ - \ \\n input_transforms:\\n - type: filter\\n \\\n \\ source_list_expression:\ - \ input.payload:my_obj.my_list\\n source_expression:\\\n \\ item\\n\ - \ filter_function:\\n invoke:\\n module: invoke_functions\\\ - n\\\n \\ function: greater_than\\n params:\\n \ - \ positional:\\n\\\n \\ - source_expression(keyword_args:current_value.my_val)\\\ - n \\\n \\ - 2\\n dest_expression: user_data.output:new_list\\\ - n```\\n\\\n This transform would take a payload like this:\\n\\n```\\n {\\\ - n \\\"my_obj\\\"\\\n : {\\n \\\"my_list\\\": [\\n {\\\"\ - my_val\\\": 1},\\n {\\\"my_val\\\"\\\n : 2},\\n {\\\"my_val\\\ - \": 3},\\n {\\\"my_val\\\": 4}\\n ],\\n \\\n \\ }\\n \ - \ }\\n```\\nand produce an object like this:\\n\\n```\\n user_data.output:\\\ - n\\\n \\ {\\n new_list: [\\n {\\\"my_val\\\": 3},\\n \ - \ {\\\"my_val\\\"\\\n : 4}\\n ],\\n }\\n```\\n\"\n short_description:\ - \ Filter a list based on a filter function\n\n\n\ - \nHere is the markdown documentation for the configuration file: \n\n\ - # Configuration for the AI Event Connector\n\nThe AI Event Connector is highly\ - \ configurable. You can define the components of each flow, the queue depths between\ - \ them, and the number of instances of each component. The configuration is done\ - \ through a YAML file that is loaded when the connector starts. This allows you\ - \ to easily change the configuration without having to modify the code.\n\n##\ - \ Configuration File Format and Rules\n\nThe configuration file is a YAML file\ - \ that is loaded when the connector starts. \n\n### Special values\n\nWithin the\ - \ configuration, you can have simple static values, environment variables, or\ - \ dynamic values using the `invoke` keyword.\n\n#### Environment Variables\n\n\ - You can use environment variables in the configuration file by using the `${}`\ - \ syntax. For example, if you have an environment variable `MY_VAR` you can use\ - \ it in the configuration file like this:\n\n```yaml\nmy_key: ${MY_VAR}\n```\n\ - \n#### Dynamic Values (invoke keyword)\n\nYou can use dynamic values in the configuration\ - \ file by using the `invoke` keyword. This allows you to do such things as import\ - \ a module, instantiate a class and call a function to get the value. For example,\ - \ if you want to get the operating system type you can use it in the configuration\ - \ file like this:\n\n```yaml\nos_type: \n invoke:\n module: platform\n \ - \ function: system\n```\n\nAn `invoke` block works by specifying an 'object' to\ - \ act on with one (and only one) of the following keys:\n- `module`: The name\ - \ of the module to import in normal Python import syntax (e.g. `os.path`)\n-\ - \ `object`: An object to call a function on or get an attribute from. Note that\ - \ this must have an `invoke` block itself to create the object. \nIt is also acceptable\ - \ to specify neither `module` nor `object` if you are calling a function that\ - \ is in the global namespace.\n\nIn addition to the object specifier, you can\ - \ specify one (and only one) of the following keys:\n- `function`: The name of\ - \ the function to call on the object\n- `attribute`: The name of the attribute\ - \ to get from the object\n\nIn the case of a function, you can also specify a\ - \ `params` key to pass parameters to the function. The params value has the following\ - \ keys:\n- `positional`: A list of positional parameters to pass to the function\n\ - - `keyword`: A dictionary of keyword parameters to pass to the function\n\n`invoke`\ - \ blocks can be nested to build up complex objects and call functions on them.\n\ - \nHere is an example of a complex `invoke` block that could be used to get AWS\ - \ credentials:\n\n```yaml\n # Get AWS credentials and give it a name to reference\ - \ later\n - aws_credentials: &aws_credentials\n invoke:\n object:\n\ - \ invoke:\n # import boto3\n module: boto3\n \ - \ # Get the session object -> boto3.Session()\n function:\ - \ Session\n # Call the get_credentials function on the session object ->\ - \ session.get_credentials()\n function: get_credentials\n\n - aws_4_auth:\n\ - \ invoke:\n # import requests_aws4auth\n module: requests_aws4auth\n\ - \ # Get the AWS4Auth object -> requests_aws4auth.AWS4Auth()\n function: AWS4Auth\n params:\n positional:\n\ - \ # Access key\n - invoke:\n object: *aws_credentials\n\ - \ attribute: access_key\n # Secret key\n \ - \ - invoke:\n object: *aws_credentials\n attribute:\ - \ secret_key\n # Region (from environment variable)\n -\ - \ ${AWS_REGION}\n # Service name (from environment variable)\n \ - \ - ${AWS_SERVICE}\n keyword:\n # Pass the session\ - \ token if it exists -> session_token=\n session_token:\n\ - \ invoke:\n object: *aws_credentials\n \ - \ attribute: token\n```\n\n##### invoke_functions\n\nThere is a module named\ - \ `invoke_functions` that has a list of functions that can take the place of python\ - \ operators. This is useful for when you want to use an operator in a configuration\ - \ file. The following functions are available:\n- `add`: param1 + param2 - can\ - \ be used to add or concatenate two strings or lists\n- `append`: Append the second\ - \ value to the first\n- `subtract`: Subtract the second number from the first\n\ - - `multiply`: Multiply two numbers together\n- `divide`: Divide the first number\ - \ by the second\n- `modulus`: Get the modulus of the first number by the second\n\ - - `power`: Raise the first number to the power of the second\n- `equal`: Check\ - \ if two values are equal\n- `not_equal`: Check if two values are not equal\n\ - - `greater_than`: Check if the first value is greater than the second\n- `greater_than_or_equal`:\ - \ Check if the first value is greater than or equal to the second\n- `less_than`:\ - \ Check if the first value is less than the second\n- `less_than_or_equal`: Check\ - \ if the first value is less than or equal to the second\n- `and_op`: Check if\ - \ both values are true\n- `or_op`: Check if either value is true\n- `not_op`:\ - \ Check if the value is false\n- `in_op`: Check if the first value is in the second\ - \ value\n- `negate`: Negate the value\n- `empty_list`: Return an empty list\n\ - - `empty_dict`: Return an empty dictionary\n- `empty_string`: Return an empty\ - \ string\n- `empty_set`: Return an empty set\n- `empty_tuple`: Return an empty\ - \ tuple\n- `empty_float`: Return 0.0\n- `empty_int`: Return 0\n\nHere is an example\ - \ of using the `invoke_functions` module to do some simple operations:\n\n```yaml\n\ - \ # Use the invoke_functions module to do some simple operations\n - simple_operations:\n\ - \ invoke:\n module: invoke_functions\n function: add\n \ - \ params:\n positional:\n - 1\n - 2\n```\n\n\ - ##### source_expression()\n\nIf the `invoke` block is used within an area of the\ - \ configuration that relates to message processing \n(e.g. input_transforms),\ - \ an invoke function call can use the special function `source_expression()` for\ - \ \nany of its parameters. This function will be replaced with the value of the\ - \ source expression at runtime.\nIt is an error to use `source_expression()` outside\ - \ of a message processing.\n\nExample:\n```yaml\n-flows:\n -my_flow:\n -my_component:\n\ - \ input_transforms:\n -type: copy\n source_expression: \n\ - \ invoke:\n module: invoke_functions\n function:\ - \ add\n params:\n positional:\n -\ - \ source_expression(input.payload:my_obj.val1)\n - 2\n \ - \ dest_expression: user_data.my_obj:result\n```\n\nIn the above example, the\ - \ `source_expression()` function is used to get the value of `input.payload:my_obj.val1`\ - \ and add 2 to it.\n\n##### user_processor component and invoke\n\nThe `user_processor`\ - \ component is a special component that allows you to define a user-defined function\ - \ to process the message. This is useful for when you want to do some processing\ - \ on the input message that is not possible with the built-in transforms or other\ - \ components. In order to specify the user-defined function, you must define the\ - \ `component_processing` property with an `invoke` block. \n\nHere is an example\ - \ of using the `user_processor` component with an `invoke` block:\n\n```yaml\n\ - \ - my_user_processor:\n component_name: my_user_processor\n component_module:\ - \ user_processor\n component_processing:\n invoke:\n module:\ - \ my_module\n function: my_function\n params:\n positional:\n\ - \ - source_expression(input.payload:my_key)\n - 2\n\ - ```\n\n\n\n\n\n## Configuration File Structure\n\nThe configuration file is a\ - \ YAML file with these top-level keys:\n\n- `log`: Configuration of logging for\ - \ the connector\n- `shared_config`: Named configurations that can be used by multiple\ - \ components later in the file\n- `flows`: A list of flow configurations. \n\n\ - ### Log Configuration\n\nThe `log` configuration section is used to configure\ - \ the logging for the connector. It configures the logging behaviour for stdout\ - \ and file logs. It has the following keys:\n\n- `stdout_log_level`: \ - \ - The log level for the stdout log\n- `log_file_level`: \ - \ - The log level for the file log\n- `log_file`: - The file to log to.\ - \ If not specified, no file logging will be done\n\nHere is an example of a log\ - \ configuration:\n\n```yaml\nlog:\n stdout_log_level: INFO\n log_file_level:\ - \ DEBUG\n log_file: /var/log/ai_event_connector.log\n```\n\n### Shared Configurations\n\ - \nThe `shared_config` section is used to define configurations that can be used\ - \ by multiple components later in the file. It is a dictionary of named configurations.\ - \ Each named configuration is a dictionary of configuration values. Here is an\ - \ example of a shared configuration:\n\n```yaml\nshared_config:\n my_shared_config:\ - \ &my_shared_config\n my_key: my_value\n my_other_key: my_other_value\n\ - ```\n\nLater in the file, you can reference this shared configuration like this:\n\ - \n```yaml\n - my_component:\n <<: *my_shared_config\n my_key: my_new_value\n\ - ```\n\n### Flow Configuration\n\nThe `flows` section is a list of flow configurations.\ - \ Each flow configuration is a dictionary with the\nfollowing keys:\n- `name`:\ - \ - The unique name of the flow\n- `components`: A list of component\ - \ configurations\n\n#### Component Configuration\n\nEach component configuration\ - \ is a dictionary with the following keys:\n- `component_name`: - The\ - \ unique name of the component within the flow\n- `component_module`: \ - \ - The module that contains the component class (python import syntax)\n- `component_config`:\ - \ - The configuration for the component. Its format is specific to\ - \ the component\n- `input_transforms`: - A list of transforms to apply\ - \ to the input message before sending it to the component\n- `component_input`:\ - \ - A source_expression or source_value to use as the input to the\ - \ component. \n- `queue_depth`: - The depth of the input queue for the component\n\ - - `num_instances`: - The number of instances of the component to run\n\n\ - **Note: For a list of all built-in components, see the [Components](components/index.md)\ - \ documentation.**\n\n##### component_config\n\nThe `component_config` is a dictionary\ - \ of configuration values specific to the component. The format of this dictionary\ - \ is specific to the component. You must refer to the component's documentation\ - \ for the specific configuration values.\n\n##### input_transforms\n\nThe `input_transforms`\ - \ is a list of transforms to apply to the input message before sending it to the\ - \ component. Each transform is a dictionary with the following keys:\n- `type`:\ - \ - The type of transform\n- `source_expression|source_value`: \ - \ - The source expression or value to use as the input to the transform\n- `dest_expression`:\ - \ - The destination expression for where to store the transformation\ - \ output\n\nThe following transform modules are available:\n- `copy`: Copy the\ - \ source value to the destination\n\nHere is an example of a component configuration\ - \ with input transforms:\n\n```yaml\n - my_component:\n component_module:\ - \ my_module.my_component\n component_config:\n my_key: my_value\n\ - \ input_transforms:\n - type: copy\n # Extract the my_key\ - \ value from the input payload\n source_expression: input.payload:my_key\n\ - \ # Store the value in the newly created my_obj object in the my_keys\ - \ list \n # at index 2 (i.e. my_obj.my_keys[2].my_key = input.payload.my_key)\n\ - \ dest_expression: user_data.my_obj:my_keys.2.my_key\n```\n\n###### Built-in\ - \ Transforms\n\nThe AI Event Connector comes with a number of built-in transforms\ - \ that can be used to process messages. For a list of all built-in transforms,\ - \ see the [Transforms](transforms/index.md) documentation.\n\n##### component_input\n\ - \nThe `component_input` is a dictionary with one (and only one) of the following\ - \ keys:\n- `source_expression`: - An expression to use as the input to\ - \ the component (see below for expression syntax)\n- `source_value`: \ - \ - A value to use as the input to the component. \n\nNote that, as for all values\ - \ in the config file, you can use the `invoke` keyword to get dynamic values\n\ - \nHere is an example of a component configuration with a source expression:\n\n\ - ```yaml\n - my_component:\n component_module: my_module.my_component\n \ - \ component_config:\n my_key: my_value\n component_input:\n \ - \ source_expression: input.payload:my_key\n```\n\n##### queue_depth\n\nThe\ - \ `queue_depth` is an integer that specifies the depth of the input queue for\ - \ the component. This is the number of messages that can be buffered in the queue\ - \ before the component will start to block. By default, the queue depth is 100.\n\ - \n\n##### num_instances\n\nThe `num_instances` is an integer that specifies the\ - \ number of instances of the component to run. This is the number of threads that\ - \ will be started to process messages from the input queue. By default, the number\ - \ of instances is 1.\n\n#### Built-in components\n\nThe AI Event Connector comes\ - \ with a number of built-in components that can be used to process messages. For\ - \ a list of all built-in components, see the [Components](components/index.md)\ - \ documentation.\n\n### Expression Syntax\n\nThe `source_expression` and `dest_expression`\ - \ values in the configuration file use a simple expression syntax to reference\ - \ values in the input message and to store values in the output message. The format\ - \ of the expression is:\n\n`[.][:]`\n\nWhere:\n\n\ - - `data_type`: - The type of data to reference. This can be one of the\ - \ following:\n - `input`: The input message. It supports the qualifiers:\n \ - \ - `payload`: The payload of the input message\n - `topic`: The topic of\ - \ the input message\n - `topic_levels`: A list of the levels of the topic of\ - \ the input message\n - `user_properties`: The user properties of the input\ - \ message\n - `user_data`: The user data object. The qualifier is required to\ - \ specify the name of the user data object\n - `static`: A static value (e.g.\ - \ `static:my_value`)\n - `template`: A template (see more below)\n - `previous`:\ - \ The output from the previous component in the flow. This could be of any type\ - \ depending on the previous component\n\n- `qualifier`: - The qualifier\ - \ to use to reference the data. This is specific to the `data_type` and is optional.\ - \ If not specified, the entire data type will be used.\n\n- `index`: \ - \ - Where to get the data in the data type. This is optional and is specific to\ - \ the `data_type`. For templates, it is the template. For other data types, it\ - \ is a dot separated string or an integer index. The index will be split on dots\ - \ and used to traverse the data type. If it is an integer, it will be used as\ - \ an index into the data type. If it is a string, it will be used as a key to\ - \ get the value from the data type.\n\nHere are some examples of expressions:\n\ - \n- `input.payload:my_key` - Get the value of `my_key` from the input payload\n\ - - `user_data.my_obj:my_key` - Get the value of `my_key` from the `my_obj` object\ - \ in the user data\n- `static:my_value` - Use the static value `my_value`\n- `user_data:my_obj2:my_list.2.my_key`\ - \ - Get the value of `my_key` from the 3rd item in the `my_list` list in the `my_obj2`\ - \ object in the user data\n\nWhen using expressions for destination expressions,\ - \ lists and objects will be created as needed. If the destination expression is\ - \ a list index, the list will be extended to the index if it is not long enough.\ - \ If the destination expression is an object key, the object will be created if\ - \ it does not exist.\n\n#### Templates\n\nThe `template` data type is a special\ - \ data type that allows you to use a template to create a value. The template\ - \ is a string that can contain expressions to reference values in the input message.\ - \ The format of the template is:\n\n`text text text {{template_expression}} text\ - \ text text`\n\nWhere:\n\n- `{{template_expression}}` - An expression to reference\ - \ values in the input message. It has the format:\n\n `://`\n\ - \n Where:\n\n - `encoding`: - The encoding/formatting to use to print\ - \ out the value. This can be one of the following:\n - `base64`: Use base64\ - \ encoding\n - `json`: Use json format\n - `yaml`: Use yaml format\n \ - \ - `text`: Use string format\n - `datauri:`: Use data uri encoding\ - \ with the specified mime type\n\n - `source_expression`: - An expression\ - \ to reference values in the input message. This has the same format as the `source_expression`\ - \ in the configuration file described above.\n\nHere is an example of a template:\n\ - \n```yaml\n input_transforms:\n - type: copy\n source_expression: |\n\ - \ template:Write me a dry joke about:\n {{text://input.payload}}\n\ - \ Write the joke in the voice of {{text://input.user_properties:comedian}}\n\ - \ dest_expression: user_data.llm_input:messages.0.content\n - type: copy\n\ - \ source_value: user\n dest_expression: user_data.llm_input:messages.0.role\n\ - ```\n\nIn this example, the `source_expression` for the first transform is a template\ - \ that uses the `text` encoding to create a string. \n\n\n\ - Here is an example configuration: \nTake special care to ensure that the data\ - \ format is correct as it moves component to component. input_transforms will\ - \ likely need to be created to ensure that the data is in the correct format for\ - \ each component. \nNow, you will have to ask the user for the input_schema, queue,\ - \ or topic, and the desired output_schema and topic. \n", "system_prompt": "You\ - \ are an assistant who will help users create a new configuration for the Solace\ - \ AI Event Connector. The connector is a tool that allows users to create flows\ - \ that process messages from a Solace event broker, generally to help interface\ - \ with AI based services. A typical flow will start with a message from the broker,\ - \ pass through a series of components and transforms, and then send the message\ - \ back to the broker. The components and transforms are user-configurable and\ - \ can be used to manipulate the message in various ways. The user will have to\ - \ provide the message input_schema, queue, or topic, and the desired output_schema\ - \ and topic. Your job is to to create an initial configuration for the user. \n\ - Make sure you use ${ENV_VARS} for any sensitive information. \nYour interaction\ - \ with the user will via a chat interface. Before you generate the YAML configuration,\ - \ you will have to ask the user for the input_schema, queue, or topic, and the\ - \ desired output_schema and topic. \nYou can ask as many questions as you need\ - \ to get the information you need. Try to make the conversation flow naturally\ - \ and confirm the user's input if there is any ambiguity - for example, if they\ - \ input the schema in a mixed JSON/YAML/pseudo structure, print it back out for\ - \ them in a clean YAML format and get confirmation that it is correct\n"} diff --git a/pyproject.toml b/pyproject.toml index 9fbabd16..8dc87810 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,14 +17,23 @@ classifiers = [ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ] +dependencies = [ + "boto3>=1.34.93", + "langchain_core>=0.2.4", + "PyYAML>=6.0.1", + "Requests>=2.32.3", + "slack_bolt>=1.18.1", + "solace_pubsubplus>=1.6.0", +] [project.urls] homepage = "https://github.com/SolaceLabs/solace-ai-connector" repository = "https://github.com/SolaceLabs/solace-ai-connector" documentation = "https://github.com/SolaceLabs/solace-ai-connector/blob/main/docs/index.md" + [project.scripts] -your-script = "solace_ai_connector:main" +solace-ai-connector = "solace_ai_connector.main:main" [tool.hatch.build.targets.wheel] packages = ["src/solace_ai_connector"] diff --git a/requirements.txt b/requirements.txt index 321282eb..26d24092 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,86 +1,6 @@ -aiohttp==3.9.1 -aiosignal==1.3.1 -annotated-types==0.6.0 -anyio==4.2.0 -argon2-cffi==23.1.0 -argon2-cffi-bindings==21.2.0 -astroid==3.0.2 -async-timeout==4.0.3 -attrs==23.2.0 -boto3==1.34.27 -botocore==1.34.27 -certifi==2023.11.17 -cffi==1.16.0 -charset-normalizer==3.3.2 -coverage==7.4.1 -dataclasses-json==0.6.3 -dill==0.3.7 -distro==1.9.0 -environs==9.5.0 -exceptiongroup==1.2.0 -frozenlist==1.4.1 -greenlet==3.0.3 -grpcio==1.60.0 -h11==0.14.0 -httpcore==1.0.2 -httpx==0.26.0 -idna==3.6 -iniconfig==2.0.0 -isort==5.13.2 -jmespath==1.0.1 -jsonpatch==1.33 -jsonpointer==2.4 -langchain==0.1.16 -langchain-community==0.0.34 -langchain-core==0.1.46 -langchain-openai==0.0.3 -langchain-text-splitters==0.0.1 -langsmith==0.1.51 -marshmallow==3.20.2 -mccabe==0.7.0 -minio==7.2.4 -multidict==6.0.4 -mypy-extensions==1.0.0 -numpy==1.26.3 -openai==1.9.0 -opensearch-py==2.4.2 -orjson==3.10.1 -packaging==23.2 -pandas==2.2.0 -platformdirs==4.1.0 -pluggy==1.4.0 -protobuf==4.25.3 -pyarrow==15.0.0 -pycparser==2.21 -pycryptodome==3.20.0 -pydantic==2.5.3 -pydantic_core==2.14.6 -pylint==3.0.3 -pymilvus==2.3.6 -pytest==8.0.0 -pytest-cov==4.1.0 -python-dateutil==2.8.2 -python-dotenv==1.0.1 -pytz==2024.1 +boto3==1.34.93 +langchain_core==0.2.4 PyYAML==6.0.1 -regex==2023.12.25 -requests==2.31.0 -requests-aws4auth==1.2.3 -s3transfer==0.10.0 -six==1.16.0 -slack-bolt==1.18.1 -slack_sdk==3.27.1 -sniffio==1.3.0 -solace-pubsubplus==1.6.0 -SQLAlchemy==2.0.25 -tenacity==8.2.3 -tiktoken==0.5.2 -tomli==2.0.1 -tomlkit==0.12.3 -tqdm==4.66.1 -typing-inspect==0.9.0 -typing_extensions==4.9.0 -tzdata==2024.1 -ujson==5.9.0 -urllib3==2.0.7 -yarl==1.9.4 +Requests==2.32.3 +slack_bolt==1.18.1 +solace_pubsubplus==1.6.0 diff --git a/requirements.txt.old b/requirements.txt.old new file mode 100644 index 00000000..321282eb --- /dev/null +++ b/requirements.txt.old @@ -0,0 +1,86 @@ +aiohttp==3.9.1 +aiosignal==1.3.1 +annotated-types==0.6.0 +anyio==4.2.0 +argon2-cffi==23.1.0 +argon2-cffi-bindings==21.2.0 +astroid==3.0.2 +async-timeout==4.0.3 +attrs==23.2.0 +boto3==1.34.27 +botocore==1.34.27 +certifi==2023.11.17 +cffi==1.16.0 +charset-normalizer==3.3.2 +coverage==7.4.1 +dataclasses-json==0.6.3 +dill==0.3.7 +distro==1.9.0 +environs==9.5.0 +exceptiongroup==1.2.0 +frozenlist==1.4.1 +greenlet==3.0.3 +grpcio==1.60.0 +h11==0.14.0 +httpcore==1.0.2 +httpx==0.26.0 +idna==3.6 +iniconfig==2.0.0 +isort==5.13.2 +jmespath==1.0.1 +jsonpatch==1.33 +jsonpointer==2.4 +langchain==0.1.16 +langchain-community==0.0.34 +langchain-core==0.1.46 +langchain-openai==0.0.3 +langchain-text-splitters==0.0.1 +langsmith==0.1.51 +marshmallow==3.20.2 +mccabe==0.7.0 +minio==7.2.4 +multidict==6.0.4 +mypy-extensions==1.0.0 +numpy==1.26.3 +openai==1.9.0 +opensearch-py==2.4.2 +orjson==3.10.1 +packaging==23.2 +pandas==2.2.0 +platformdirs==4.1.0 +pluggy==1.4.0 +protobuf==4.25.3 +pyarrow==15.0.0 +pycparser==2.21 +pycryptodome==3.20.0 +pydantic==2.5.3 +pydantic_core==2.14.6 +pylint==3.0.3 +pymilvus==2.3.6 +pytest==8.0.0 +pytest-cov==4.1.0 +python-dateutil==2.8.2 +python-dotenv==1.0.1 +pytz==2024.1 +PyYAML==6.0.1 +regex==2023.12.25 +requests==2.31.0 +requests-aws4auth==1.2.3 +s3transfer==0.10.0 +six==1.16.0 +slack-bolt==1.18.1 +slack_sdk==3.27.1 +sniffio==1.3.0 +solace-pubsubplus==1.6.0 +SQLAlchemy==2.0.25 +tenacity==8.2.3 +tiktoken==0.5.2 +tomli==2.0.1 +tomlkit==0.12.3 +tqdm==4.66.1 +typing-inspect==0.9.0 +typing_extensions==4.9.0 +tzdata==2024.1 +ujson==5.9.0 +urllib3==2.0.7 +yarl==1.9.4 diff --git a/src/__init__.py b/src/__init__.py index f38884b6..3dc1f76b 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1,2 +1 @@ __version__ = "0.1.0" - diff --git a/src/solace_ai_connector/__init__.py b/src/solace_ai_connector/__init__.py index e69de29b..e99d83e4 100644 --- a/src/solace_ai_connector/__init__.py +++ b/src/solace_ai_connector/__init__.py @@ -0,0 +1,3 @@ +# Internal components that are dynamically loaded by the AI Connector +# Listing them here allows for them to use relative imports +__version__ = "0.1.0" diff --git a/src/solace_ai_connector/common/utils.py b/src/solace_ai_connector/common/utils.py index e6e14271..2642cd2f 100644 --- a/src/solace_ai_connector/common/utils.py +++ b/src/solace_ai_connector/common/utils.py @@ -28,9 +28,13 @@ def import_from_directories(module_name, base_path=None): if os.path.exists(module_path): try: # if module_path.startswith("src/solace_ai_connector"): - if "src/solace_ai_connector" in module_path: + if "/solace_ai_connector/" in module_path: # Remove everything up to and including src/ - module_name = re.sub(r".*src/", "", module_path) + module_name = re.sub( + r".*/solace_ai_connector/", + "solace_ai_connector/", + module_path, + ) module_name = module_name.replace("/", ".") if module_name.endswith(".py"): module_name = module_name[:-3] diff --git a/src/solace_ai_connector/components/__init__.py b/src/solace_ai_connector/components/__init__.py new file mode 100644 index 00000000..c60701ec --- /dev/null +++ b/src/solace_ai_connector/components/__init__.py @@ -0,0 +1,69 @@ +# Consolidate all components in one place + +from .inputs_outputs import ( + error_input, + timer_input, + slack_output, + broker_input, + broker_output, + stdout_output, + stdin_input, + slack_input, +) +from .general import ( + user_processor, + aggregate, + pass_through, + delay, + iterate, + message_filter, +) + +from .general.for_testing import ( + need_ack_input, + fail, + give_ack_output, + storage_tester, +) + +from .general.langchain import ( + langchain_embeddings, + langchain_vector_store_delete, + langchain_chat_model, + langchain_chat_model_with_history, + langchain_vector_store_embedding_index, + langchain_vector_store_embedding_search, +) + +# Also import the components from the submodules +from .inputs_outputs.error_input import ErrorInput +from .inputs_outputs.timer_input import TimerInput +from .inputs_outputs.slack_output import SlackOutput +from .inputs_outputs.broker_input import BrokerInput +from .inputs_outputs.broker_output import BrokerOutput +from .inputs_outputs.stdout_output import Stdout +from .inputs_outputs.stdin_input import Stdin +from .inputs_outputs.slack_input import SlackInput +from .general.user_processor import UserProcessor +from .general.aggregate import Aggregate +from .general.for_testing.need_ack_input import NeedAckInput +from .general.for_testing.fail import Fail +from .general.for_testing.give_ack_output import GiveAckOutput +from .general.for_testing.storage_tester import MemoryTester +from .general.pass_through import PassThrough +from .general.delay import Delay +from .general.iterate import Iterate +from .general.message_filter import MessageFilter +from .general.langchain.langchain_base import LangChainBase +from .general.langchain.langchain_embeddings import LangChainEmbeddings +from .general.langchain.langchain_vector_store_delete import LangChainVectorStoreDelete +from .general.langchain.langchain_chat_model import LangChainChatModel +from .general.langchain.langchain_chat_model_with_history import ( + LangChainChatModelWithHistory, +) +from .general.langchain.langchain_vector_store_embedding_index import ( + LangChainVectorStoreEmbeddingsIndex, +) +from .general.langchain.langchain_vector_store_embedding_search import ( + LangChainVectorStoreEmbeddingsSearch, +) diff --git a/src/solace_ai_connector/flow_components/component_base.py b/src/solace_ai_connector/components/component_base.py similarity index 98% rename from src/solace_ai_connector/flow_components/component_base.py rename to src/solace_ai_connector/components/component_base.py index 4f3d6ebe..4bbaf22b 100644 --- a/src/solace_ai_connector/flow_components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -9,7 +9,7 @@ from ..common.log import log from ..common.utils import resolve_config_values from ..common.utils import get_source_expression -from ..common.transforms.transforms import Transforms +from ..transforms.transforms import Transforms from ..common.message import Message from ..common.trace_message import TraceMessage @@ -18,10 +18,10 @@ class ComponentBase: - def __init__(self, **kwargs): + def __init__(self, module_info, **kwargs): + self.module_info = module_info self.config = kwargs.pop("config", {}) self.index = kwargs.pop("index", None) - self.module_info = kwargs.pop("module_info", {}) self.flow_name = kwargs.pop("flow_name", None) self.stop_signal = kwargs.pop("stop_signal", None) self.sibling_component = kwargs.pop("sibling_component", None) diff --git a/src/solace_ai_connector/common/transforms/__init__.py b/src/solace_ai_connector/components/general/__init__.py similarity index 100% rename from src/solace_ai_connector/common/transforms/__init__.py rename to src/solace_ai_connector/components/general/__init__.py diff --git a/src/solace_ai_connector/flow_components/general/aggregate.py b/src/solace_ai_connector/components/general/aggregate.py similarity index 99% rename from src/solace_ai_connector/flow_components/general/aggregate.py rename to src/solace_ai_connector/components/general/aggregate.py index 8ff14ca1..aa14906d 100644 --- a/src/solace_ai_connector/flow_components/general/aggregate.py +++ b/src/solace_ai_connector/components/general/aggregate.py @@ -46,7 +46,7 @@ class Aggregate(ComponentBase): def __init__(self, **kwargs): - super().__init__(**kwargs) + super().__init__(info, **kwargs) self.current_aggregation = None self.aggregate_dest = self.get_config("aggregate_dest") self.max_time_ms = self.get_config("max_time_ms") diff --git a/src/solace_ai_connector/flow_components/general/delay.py b/src/solace_ai_connector/components/general/delay.py similarity index 93% rename from src/solace_ai_connector/flow_components/general/delay.py rename to src/solace_ai_connector/components/general/delay.py index b591657f..d4a05d03 100644 --- a/src/solace_ai_connector/flow_components/general/delay.py +++ b/src/solace_ai_connector/components/general/delay.py @@ -34,6 +34,9 @@ class Delay(ComponentBase): + def __init__(self, **kwargs): + super().__init__(info, **kwargs) + def invoke(self, message, data): sleep(self.get_config("delay")) return deepcopy(data) diff --git a/src/solace_ai_connector/flow_components/__init__.py b/src/solace_ai_connector/components/general/for_testing/__init__.py similarity index 100% rename from src/solace_ai_connector/flow_components/__init__.py rename to src/solace_ai_connector/components/general/for_testing/__init__.py diff --git a/src/solace_ai_connector/flow_components/general/for_testing/fail.py b/src/solace_ai_connector/components/general/for_testing/fail.py similarity index 94% rename from src/solace_ai_connector/flow_components/general/for_testing/fail.py rename to src/solace_ai_connector/components/general/for_testing/fail.py index dc579e66..9dc0a0a7 100644 --- a/src/solace_ai_connector/flow_components/general/for_testing/fail.py +++ b/src/solace_ai_connector/components/general/for_testing/fail.py @@ -36,6 +36,9 @@ class Fail(ComponentBase): + def __init__(self, **kwargs): + super().__init__(info, **kwargs) + def invoke(self, message, data): error_message = self.get_config("error_message") exception_type = self.get_config("exception_type") diff --git a/src/solace_ai_connector/flow_components/general/for_testing/give_ack_output.py b/src/solace_ai_connector/components/general/for_testing/give_ack_output.py similarity index 88% rename from src/solace_ai_connector/flow_components/general/for_testing/give_ack_output.py rename to src/solace_ai_connector/components/general/for_testing/give_ack_output.py index 5df31a89..e3fdf886 100644 --- a/src/solace_ai_connector/flow_components/general/for_testing/give_ack_output.py +++ b/src/solace_ai_connector/components/general/for_testing/give_ack_output.py @@ -19,6 +19,9 @@ class GiveAckOutput(ComponentBase): + def __init__(self, **kwargs): + super().__init__(info, **kwargs) + def invoke(self, message, data): message.call_acknowledgements() return data diff --git a/src/solace_ai_connector/flow_components/general/for_testing/need_ack_input.py b/src/solace_ai_connector/components/general/for_testing/need_ack_input.py similarity index 97% rename from src/solace_ai_connector/flow_components/general/for_testing/need_ack_input.py rename to src/solace_ai_connector/components/general/for_testing/need_ack_input.py index 582498e9..7d840353 100644 --- a/src/solace_ai_connector/flow_components/general/for_testing/need_ack_input.py +++ b/src/solace_ai_connector/components/general/for_testing/need_ack_input.py @@ -32,7 +32,7 @@ class NeedAckInput(ComponentBase): def __init__(self, **kwargs): - super().__init__(**kwargs) + super().__init__(info, **kwargs) self.need_acknowledgement = True def invoke(self, message, data): diff --git a/src/solace_ai_connector/flow_components/general/for_testing/storage_tester.py b/src/solace_ai_connector/components/general/for_testing/storage_tester.py similarity index 94% rename from src/solace_ai_connector/flow_components/general/for_testing/storage_tester.py rename to src/solace_ai_connector/components/general/for_testing/storage_tester.py index 32f89f57..97f1a683 100644 --- a/src/solace_ai_connector/flow_components/general/for_testing/storage_tester.py +++ b/src/solace_ai_connector/components/general/for_testing/storage_tester.py @@ -32,6 +32,9 @@ class MemoryTester(ComponentBase): + def __init__(self, **kwargs): + super().__init__(info, **kwargs) + def invoke(self, message, data): storage = self.storage_manager.get_storage_handler( self.get_config("storage_name") diff --git a/src/solace_ai_connector/flow_components/general/iterate.py b/src/solace_ai_connector/components/general/iterate.py similarity index 95% rename from src/solace_ai_connector/flow_components/general/iterate.py rename to src/solace_ai_connector/components/general/iterate.py index fe521887..e0af98e8 100644 --- a/src/solace_ai_connector/flow_components/general/iterate.py +++ b/src/solace_ai_connector/components/general/iterate.py @@ -24,6 +24,9 @@ class Iterate(ComponentBase): + def __init__(self, **kwargs): + super().__init__(info, **kwargs) + def invoke(self, message, data): # data is the list of items # Loop over them and output each one as a separate message diff --git a/src/solace_ai_connector/flow_components/general/__init__.py b/src/solace_ai_connector/components/general/langchain/__init__.py similarity index 100% rename from src/solace_ai_connector/flow_components/general/__init__.py rename to src/solace_ai_connector/components/general/langchain/__init__.py diff --git a/src/solace_ai_connector/flow_components/general/langchain/langchain_base.py b/src/solace_ai_connector/components/general/langchain/langchain_base.py similarity index 94% rename from src/solace_ai_connector/flow_components/general/langchain/langchain_base.py rename to src/solace_ai_connector/components/general/langchain/langchain_base.py index 36fc7fdf..4ffda1bb 100644 --- a/src/solace_ai_connector/flow_components/general/langchain/langchain_base.py +++ b/src/solace_ai_connector/components/general/langchain/langchain_base.py @@ -7,8 +7,8 @@ class LangChainBase(ComponentBase): - def __init__(self, **kwargs): - super().__init__(**kwargs) + def __init__(self, module_info, **kwargs): + super().__init__(module_info, **kwargs) self.name = self.get_config("component_name") self.component_config = self.get_config("component_config") self.init() diff --git a/src/solace_ai_connector/flow_components/general/langchain/langchain_chat_model.py b/src/solace_ai_connector/components/general/langchain/langchain_chat_model.py similarity index 100% rename from src/solace_ai_connector/flow_components/general/langchain/langchain_chat_model.py rename to src/solace_ai_connector/components/general/langchain/langchain_chat_model.py diff --git a/src/solace_ai_connector/flow_components/general/langchain/langchain_chat_model_base.py b/src/solace_ai_connector/components/general/langchain/langchain_chat_model_base.py similarity index 99% rename from src/solace_ai_connector/flow_components/general/langchain/langchain_chat_model_base.py rename to src/solace_ai_connector/components/general/langchain/langchain_chat_model_base.py index f5895abe..8965324e 100644 --- a/src/solace_ai_connector/flow_components/general/langchain/langchain_chat_model_base.py +++ b/src/solace_ai_connector/components/general/langchain/langchain_chat_model_base.py @@ -88,7 +88,6 @@ class LangChainChatModelBase(LangChainBase): - def invoke(self, message, data): messages = [] diff --git a/src/solace_ai_connector/flow_components/general/langchain/langchain_chat_model_with_history.py b/src/solace_ai_connector/components/general/langchain/langchain_chat_model_with_history.py similarity index 99% rename from src/solace_ai_connector/flow_components/general/langchain/langchain_chat_model_with_history.py rename to src/solace_ai_connector/components/general/langchain/langchain_chat_model_with_history.py index 660f3d22..16f2df40 100644 --- a/src/solace_ai_connector/flow_components/general/langchain/langchain_chat_model_with_history.py +++ b/src/solace_ai_connector/components/general/langchain/langchain_chat_model_with_history.py @@ -98,7 +98,7 @@ class LangChainChatModelWithHistory(LangChainChatModelBase): _lock = threading.Lock() def __init__(self, **kwargs): - super().__init__(**kwargs) + super().__init__(info, **kwargs) self.history_max_turns = self.get_config("history_max_turns", 20) self.history_max_tokens = self.get_config("history_max_tokens", 8000) self.stream_to_flow = self.get_config("stream_to_flow", "") diff --git a/src/solace_ai_connector/flow_components/general/langchain/langchain_embeddings.py b/src/solace_ai_connector/components/general/langchain/langchain_embeddings.py similarity index 97% rename from src/solace_ai_connector/flow_components/general/langchain/langchain_embeddings.py rename to src/solace_ai_connector/components/general/langchain/langchain_embeddings.py index 703b4b13..7951622e 100644 --- a/src/solace_ai_connector/flow_components/general/langchain/langchain_embeddings.py +++ b/src/solace_ai_connector/components/general/langchain/langchain_embeddings.py @@ -62,8 +62,8 @@ class LangChainEmbeddings(LangChainBase): - # def __init__(self, config: dict): - # super().__init__(config) + def __init__(self, **kwargs): + super().__init__(info, **kwargs) def invoke(self, message, data): text = data["text"] diff --git a/src/solace_ai_connector/flow_components/general/langchain/langchain_vector_store_delete.py b/src/solace_ai_connector/components/general/langchain/langchain_vector_store_delete.py similarity index 97% rename from src/solace_ai_connector/flow_components/general/langchain/langchain_vector_store_delete.py rename to src/solace_ai_connector/components/general/langchain/langchain_vector_store_delete.py index 490f0169..667d8828 100644 --- a/src/solace_ai_connector/flow_components/general/langchain/langchain_vector_store_delete.py +++ b/src/solace_ai_connector/components/general/langchain/langchain_vector_store_delete.py @@ -4,9 +4,6 @@ # embedding model configuration from ....common.log import log -from .langchain_base import ( - LangChainBase, -) from .langchain_vector_store_embedding_base import ( LangChainVectorStoreEmbeddingsBase, ) @@ -100,8 +97,8 @@ class LangChainVectorStoreDelete(LangChainVectorStoreEmbeddingsBase): - # def __init__(self, **kwargs): - # super().__init__(**kwargs) + def __init__(self, **kwargs): + super().__init__(info, **kwargs) def invoke(self, message, data): delete_ids = self.get_config("delete_ids", None) diff --git a/src/solace_ai_connector/flow_components/general/langchain/langchain_vector_store_embedding_base.py b/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_base.py similarity index 94% rename from src/solace_ai_connector/flow_components/general/langchain/langchain_vector_store_embedding_base.py rename to src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_base.py index ea929523..aaa6ad19 100644 --- a/src/solace_ai_connector/flow_components/general/langchain/langchain_vector_store_embedding_base.py +++ b/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_base.py @@ -5,9 +5,9 @@ ) -class LangChainVectorStoreEmbeddingsBase(LangChainBase): - # def __init__(self, **kwargs): - # super().__init__(**kwargs) +class LangChainVectorStoreEmbeddingsBase( + LangChainBase +): # pylint: disable=abstract-method def init(self): self.vector_store_info = { diff --git a/src/solace_ai_connector/flow_components/general/langchain/langchain_vector_store_embedding_index.py b/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_index.py similarity index 97% rename from src/solace_ai_connector/flow_components/general/langchain/langchain_vector_store_embedding_index.py rename to src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_index.py index 801a29cf..9e41b541 100644 --- a/src/solace_ai_connector/flow_components/general/langchain/langchain_vector_store_embedding_index.py +++ b/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_index.py @@ -3,9 +3,6 @@ # as well, so the configuration for this component will also include the # embedding model configuration -from .langchain_base import ( - LangChainBase, -) from .langchain_vector_store_embedding_base import ( LangChainVectorStoreEmbeddingsBase, ) @@ -107,8 +104,8 @@ class LangChainVectorStoreEmbeddingsIndex(LangChainVectorStoreEmbeddingsBase): - # def __init__(self, **kwargs): - # super().__init__(**kwargs) + def __init__(self, **kwargs): + super().__init__(info, **kwargs) def invoke(self, message, data): # Get the texts and normalize them diff --git a/src/solace_ai_connector/flow_components/general/langchain/langchain_vector_store_embedding_search.py b/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_search.py similarity index 97% rename from src/solace_ai_connector/flow_components/general/langchain/langchain_vector_store_embedding_search.py rename to src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_search.py index 11260c41..457db97e 100644 --- a/src/solace_ai_connector/flow_components/general/langchain/langchain_vector_store_embedding_search.py +++ b/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_search.py @@ -5,9 +5,6 @@ """ from ....common.log import log -from .langchain_base import ( - LangChainBase, -) from .langchain_vector_store_embedding_base import ( LangChainVectorStoreEmbeddingsBase, ) @@ -106,8 +103,8 @@ class LangChainVectorStoreEmbeddingsSearch(LangChainVectorStoreEmbeddingsBase): - # def __init__(self, **kwargs): - # super().__init__(**kwargs) + def __init__(self, **kwargs): + super().__init__(info, **kwargs) def invoke(self, message, data): text = data["text"] diff --git a/src/solace_ai_connector/flow_components/general/message_filter.py b/src/solace_ai_connector/components/general/message_filter.py similarity index 97% rename from src/solace_ai_connector/flow_components/general/message_filter.py rename to src/solace_ai_connector/components/general/message_filter.py index 7c34bea3..f02e0d63 100644 --- a/src/solace_ai_connector/flow_components/general/message_filter.py +++ b/src/solace_ai_connector/components/general/message_filter.py @@ -38,7 +38,7 @@ class MessageFilter(ComponentBase): def __init__(self, **kwargs): - super().__init__(**kwargs) + super().__init__(info, **kwargs) self.pass_current_message = False def invoke(self, message, data): diff --git a/src/solace_ai_connector/flow_components/general/pass_through.py b/src/solace_ai_connector/components/general/pass_through.py similarity index 87% rename from src/solace_ai_connector/flow_components/general/pass_through.py rename to src/solace_ai_connector/components/general/pass_through.py index ccc32ffa..635e1059 100644 --- a/src/solace_ai_connector/flow_components/general/pass_through.py +++ b/src/solace_ai_connector/components/general/pass_through.py @@ -19,6 +19,9 @@ class PassThrough(ComponentBase): + def __init__(self, **kwargs): + super().__init__(info, **kwargs) + def invoke(self, message, data): if data is None: return {} diff --git a/src/solace_ai_connector/flow_components/general/user_processor.py b/src/solace_ai_connector/components/general/user_processor.py similarity index 94% rename from src/solace_ai_connector/flow_components/general/user_processor.py rename to src/solace_ai_connector/components/general/user_processor.py index 4e9c5ec2..3f61e318 100644 --- a/src/solace_ai_connector/flow_components/general/user_processor.py +++ b/src/solace_ai_connector/components/general/user_processor.py @@ -31,6 +31,9 @@ class UserProcessor(ComponentBase): + def __init__(self, **kwargs): + super().__init__(info, **kwargs) + def invoke(self, message, data): message.set_invoke_data(data) component_processing = self.get_config("component_processing") diff --git a/src/solace_ai_connector/flow_components/general/for_testing/__init__.py b/src/solace_ai_connector/components/inputs_outputs/__init__.py similarity index 100% rename from src/solace_ai_connector/flow_components/general/for_testing/__init__.py rename to src/solace_ai_connector/components/inputs_outputs/__init__.py diff --git a/src/solace_ai_connector/flow_components/inputs_outputs/broker_base.py b/src/solace_ai_connector/components/inputs_outputs/broker_base.py similarity index 97% rename from src/solace_ai_connector/flow_components/inputs_outputs/broker_base.py rename to src/solace_ai_connector/components/inputs_outputs/broker_base.py index 8871065f..ccc5f70d 100644 --- a/src/solace_ai_connector/flow_components/inputs_outputs/broker_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_base.py @@ -28,8 +28,8 @@ class BrokerBase(ComponentBase): - def __init__(self, **kwargs): - super().__init__(**kwargs) + def __init__(self, module_info, **kwargs): + super().__init__(module_info, **kwargs) self.broker_properties = self.get_broker_properties() self.messaging_service = ( MessagingServiceBuilder().from_properties(self.broker_properties).build() diff --git a/src/solace_ai_connector/flow_components/inputs_outputs/broker_input.py b/src/solace_ai_connector/components/inputs_outputs/broker_input.py similarity index 99% rename from src/solace_ai_connector/flow_components/inputs_outputs/broker_input.py rename to src/solace_ai_connector/components/inputs_outputs/broker_input.py index e996a261..40053413 100644 --- a/src/solace_ai_connector/flow_components/inputs_outputs/broker_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_input.py @@ -87,7 +87,7 @@ class BrokerInput(BrokerBase): def __init__(self, **kwargs): - super().__init__(**kwargs) + super().__init__(info, **kwargs) self.need_acknowledgement = True self.connect() diff --git a/src/solace_ai_connector/flow_components/inputs_outputs/broker_output.py b/src/solace_ai_connector/components/inputs_outputs/broker_output.py similarity index 99% rename from src/solace_ai_connector/flow_components/inputs_outputs/broker_output.py rename to src/solace_ai_connector/components/inputs_outputs/broker_output.py index e8416649..435d9c83 100644 --- a/src/solace_ai_connector/flow_components/inputs_outputs/broker_output.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_output.py @@ -100,7 +100,7 @@ class BrokerOutput(BrokerBase): def __init__(self, **kwargs): - super().__init__(**kwargs) + super().__init__(info, **kwargs) self.needs_acknowledgement = False self.propagate_acknowledgements = self.get_config("propagate_acknowledgements") self.copy_user_properties = self.get_config("copy_user_properties") diff --git a/src/solace_ai_connector/flow_components/inputs_outputs/error_input.py b/src/solace_ai_connector/components/inputs_outputs/error_input.py similarity index 99% rename from src/solace_ai_connector/flow_components/inputs_outputs/error_input.py rename to src/solace_ai_connector/components/inputs_outputs/error_input.py index 0ebc53c3..88a68289 100644 --- a/src/solace_ai_connector/flow_components/inputs_outputs/error_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/error_input.py @@ -101,7 +101,7 @@ class ErrorInput(ComponentBase): def __init__(self, **kwargs): - super().__init__(**kwargs) + super().__init__(info, **kwargs) self.max_rate = self.get_config("max_rate") self.error_count_in_last_second = 0 self.error_count_start_time = time.time() diff --git a/src/solace_ai_connector/flow_components/inputs_outputs/file_input.py b/src/solace_ai_connector/components/inputs_outputs/file_input.py similarity index 83% rename from src/solace_ai_connector/flow_components/inputs_outputs/file_input.py rename to src/solace_ai_connector/components/inputs_outputs/file_input.py index 8ea0300e..73c57a66 100644 --- a/src/solace_ai_connector/flow_components/inputs_outputs/file_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/file_input.py @@ -4,9 +4,14 @@ from ..component_base import ComponentBase from ...common.message import Message +info = {} + class File(ComponentBase): + def __init__(self, **kwargs): + super().__init__(info, **kwargs) + def get_next_message(self): # Get the next message from the file diff --git a/src/solace_ai_connector/flow_components/inputs_outputs/slack_base.py b/src/solace_ai_connector/components/inputs_outputs/slack_base.py similarity index 92% rename from src/solace_ai_connector/flow_components/inputs_outputs/slack_base.py rename to src/solace_ai_connector/components/inputs_outputs/slack_base.py index 4f4f68e7..5d93e34a 100644 --- a/src/solace_ai_connector/flow_components/inputs_outputs/slack_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/slack_base.py @@ -8,8 +8,8 @@ class SlackBase(ComponentBase, ABC): _slack_apps = {} - def __init__(self, **kwargs): - super().__init__(**kwargs) + def __init__(self, module_info, **kwargs): + super().__init__(module_info, **kwargs) self.slack_bot_token = self.get_config("slack_bot_token") self.slack_app_token = self.get_config("slack_app_token") self.max_file_size = self.get_config("max_file_size", 20) diff --git a/src/solace_ai_connector/flow_components/inputs_outputs/slack_input.py b/src/solace_ai_connector/components/inputs_outputs/slack_input.py similarity index 99% rename from src/solace_ai_connector/flow_components/inputs_outputs/slack_input.py rename to src/solace_ai_connector/components/inputs_outputs/slack_input.py index 98a8054a..63712111 100644 --- a/src/solace_ai_connector/flow_components/inputs_outputs/slack_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/slack_input.py @@ -143,7 +143,7 @@ class SlackInput(SlackBase): def __init__(self, **kwargs): - super().__init__(**kwargs) + super().__init__(info, **kwargs) self.slack_receiver_queue = None self.slack_receiver = None self.init_slack_receiver() @@ -220,7 +220,7 @@ def handle_channel_event(self, event): self.handle_event(event) - def handle_group_event(self, event): + def handle_group_event(self, _event): log.info("Received a private group event. Ignoring.") def handle_event(self, event): @@ -432,7 +432,7 @@ def handle_app_mention(event): self.handle_event(event) @self.app.event("member_joined_channel") - def handle_member_joined_channel(event, say, context): + def handle_member_joined_channel(event, _say, context): if ( self.send_history_on_join and event.get("user") == context["bot_user_id"] diff --git a/src/solace_ai_connector/flow_components/inputs_outputs/slack_output.py b/src/solace_ai_connector/components/inputs_outputs/slack_output.py similarity index 97% rename from src/solace_ai_connector/flow_components/inputs_outputs/slack_output.py rename to src/solace_ai_connector/components/inputs_outputs/slack_output.py index ce12cee3..2263f644 100644 --- a/src/solace_ai_connector/flow_components/inputs_outputs/slack_output.py +++ b/src/solace_ai_connector/components/inputs_outputs/slack_output.py @@ -104,8 +104,8 @@ class SlackOutput(SlackBase): - # def __init__(self, **kwargs): - # super().__init__(**kwargs) + def __init__(self, **kwargs): + super().__init__(info, **kwargs) def invoke(self, message, data): message_info = data.get("message_info") @@ -165,7 +165,7 @@ def send_message(self, message): filename=file["name"], ) except Exception as e: - log.error(f"Error sending slack message: {e}") + log.error("Error sending slack message: %s", e) super().send_message(message) diff --git a/src/solace_ai_connector/flow_components/inputs_outputs/stdin_input.py b/src/solace_ai_connector/components/inputs_outputs/stdin_input.py similarity index 93% rename from src/solace_ai_connector/flow_components/inputs_outputs/stdin_input.py rename to src/solace_ai_connector/components/inputs_outputs/stdin_input.py index e54facc3..a4fb83eb 100644 --- a/src/solace_ai_connector/flow_components/inputs_outputs/stdin_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/stdin_input.py @@ -25,6 +25,9 @@ class Stdin(ComponentBase): + def __init__(self, **kwargs): + super().__init__(info, **kwargs) + def get_next_message(self): # Get the next message from STDIN obj = {"text": input(self.config.get("prompt", "Enter text: "))} diff --git a/src/solace_ai_connector/flow_components/inputs_outputs/stdout_output.py b/src/solace_ai_connector/components/inputs_outputs/stdout_output.py similarity index 88% rename from src/solace_ai_connector/flow_components/inputs_outputs/stdout_output.py rename to src/solace_ai_connector/components/inputs_outputs/stdout_output.py index 8bb97d7e..0309f1f9 100644 --- a/src/solace_ai_connector/flow_components/inputs_outputs/stdout_output.py +++ b/src/solace_ai_connector/components/inputs_outputs/stdout_output.py @@ -20,6 +20,8 @@ class Stdout(ComponentBase): + def __init__(self, **kwargs): + super().__init__(info, **kwargs) def invoke(self, message, data): # Print the message to STDOUT diff --git a/src/solace_ai_connector/flow_components/inputs_outputs/timer_input.py b/src/solace_ai_connector/components/inputs_outputs/timer_input.py similarity index 97% rename from src/solace_ai_connector/flow_components/inputs_outputs/timer_input.py rename to src/solace_ai_connector/components/inputs_outputs/timer_input.py index f38adebe..dc2821e4 100644 --- a/src/solace_ai_connector/flow_components/inputs_outputs/timer_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/timer_input.py @@ -5,7 +5,7 @@ from ..component_base import ComponentBase from ...common.message import Message -from ...common.log import log +# from ...common.log import log info = { @@ -35,7 +35,7 @@ class TimerInput(ComponentBase): def __init__(self, **kwargs): - super().__init__(**kwargs) + super().__init__(info, **kwargs) self.interval_ms = self.get_config("interval_ms") if not self.interval_ms: raise ValueError( diff --git a/src/solace_ai_connector/flow/flow.py b/src/solace_ai_connector/flow/flow.py index 2b4ec53d..20a0a3a7 100644 --- a/src/solace_ai_connector/flow/flow.py +++ b/src/solace_ai_connector/flow/flow.py @@ -78,7 +78,7 @@ def create_component_group(self, component, index): component_instance = component_class( config=component, index=index, - module_info=self.module_info, + # module_info=self.module_info, flow_name=self.name, stop_signal=self.stop_signal, sibling_component=sibling_component, diff --git a/src/solace_ai_connector/flow_components/general/langchain/__init__.py b/src/solace_ai_connector/flow_components/general/langchain/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/solace_ai_connector/flow_components/inputs_outputs/__init__.py b/src/solace_ai_connector/flow_components/inputs_outputs/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/main.py b/src/solace_ai_connector/main.py similarity index 82% rename from src/main.py rename to src/solace_ai_connector/main.py index e58d2f2d..fd785887 100644 --- a/src/main.py +++ b/src/solace_ai_connector/main.py @@ -1,7 +1,7 @@ import os import sys import yaml -from solace_ai_connector.solace_ai_connector import SolaceAiConnector +from .solace_ai_connector import SolaceAiConnector def load_config(file): @@ -38,7 +38,17 @@ def merge_config(dict1, dict2): return merged -def main(files): +def main(): + + files = sys.argv[1:] + + if not files: + print("No configuration files provided", file=sys.stderr) + base_file = os.path.basename(sys.argv[0]) + print( + f"Usage: {base_file} [ ...]", file=sys.stderr + ) + sys.exit(1) # Loop over the configuration files full_config = {} @@ -59,6 +69,5 @@ def main(files): if __name__ == "__main__": # Read in the configuration yaml filenames from the args - config_files = sys.argv[1:] - main(config_files) + main() diff --git a/src/solace_ai_connector/transforms/__init__.py b/src/solace_ai_connector/transforms/__init__.py new file mode 100644 index 00000000..b7406e44 --- /dev/null +++ b/src/solace_ai_connector/transforms/__init__.py @@ -0,0 +1,6 @@ +from .map import MapTransform +from .copy_list_item import CopyListItemTransform +from .append import AppendTransform +from .reduce import ReduceTransform +from .copy import CopyTransform +from .filter import FilterTransform diff --git a/src/solace_ai_connector/common/transforms/append.py b/src/solace_ai_connector/transforms/append.py similarity index 98% rename from src/solace_ai_connector/common/transforms/append.py rename to src/solace_ai_connector/transforms/append.py index 588c00b9..4b99a2c2 100644 --- a/src/solace_ai_connector/common/transforms/append.py +++ b/src/solace_ai_connector/transforms/append.py @@ -1,6 +1,6 @@ """Append Transform - add a value to a list""" -from ..log import log +from ..common.log import log from .transform_base import TransformBase info = { diff --git a/src/solace_ai_connector/common/transforms/copy.py b/src/solace_ai_connector/transforms/copy.py similarity index 100% rename from src/solace_ai_connector/common/transforms/copy.py rename to src/solace_ai_connector/transforms/copy.py diff --git a/src/solace_ai_connector/common/transforms/copy_list_item.py b/src/solace_ai_connector/transforms/copy_list_item.py similarity index 96% rename from src/solace_ai_connector/common/transforms/copy_list_item.py rename to src/solace_ai_connector/transforms/copy_list_item.py index a2ede7f2..1451637e 100644 --- a/src/solace_ai_connector/common/transforms/copy_list_item.py +++ b/src/solace_ai_connector/transforms/copy_list_item.py @@ -107,6 +107,6 @@ def invoke(self, message, calling_object=None): return message - def extend_list_if_needed(self, list, index): - while len(list) <= index: - list.append(None) + def extend_list_if_needed(self, list_to_extend, index): + while len(list_to_extend) <= index: + list_to_extend.append(None) diff --git a/src/solace_ai_connector/common/transforms/filter.py b/src/solace_ai_connector/transforms/filter.py similarity index 100% rename from src/solace_ai_connector/common/transforms/filter.py rename to src/solace_ai_connector/transforms/filter.py diff --git a/src/solace_ai_connector/common/transforms/map.py b/src/solace_ai_connector/transforms/map.py similarity index 100% rename from src/solace_ai_connector/common/transforms/map.py rename to src/solace_ai_connector/transforms/map.py diff --git a/src/solace_ai_connector/common/transforms/reduce.py b/src/solace_ai_connector/transforms/reduce.py similarity index 100% rename from src/solace_ai_connector/common/transforms/reduce.py rename to src/solace_ai_connector/transforms/reduce.py diff --git a/src/solace_ai_connector/common/transforms/transform_base.py b/src/solace_ai_connector/transforms/transform_base.py similarity index 97% rename from src/solace_ai_connector/common/transforms/transform_base.py rename to src/solace_ai_connector/transforms/transform_base.py index d40ed76c..6492326c 100644 --- a/src/solace_ai_connector/common/transforms/transform_base.py +++ b/src/solace_ai_connector/transforms/transform_base.py @@ -1,6 +1,6 @@ # Coordinator for performing transforms on data -from ..utils import get_source_expression +from ..common.utils import get_source_expression class TransformBase: diff --git a/src/solace_ai_connector/common/transforms/transforms.py b/src/solace_ai_connector/transforms/transforms.py similarity index 100% rename from src/solace_ai_connector/common/transforms/transforms.py rename to src/solace_ai_connector/transforms/transforms.py From fea275911ab7292308dc734f5583f5631aa35fe1 Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Thu, 6 Jun 2024 15:17:15 -0400 Subject: [PATCH 07/18] Fixed some documentation generation after package reorganization --- .gitignore | 2 + Makefile | 7 +- docs/components/broker_output.md | 6 ++ docs/components/index.md | 3 + .../langchain_chat_model_with_history.md | 74 +++++++++++++++++++ docs/components/slack_input.md | 8 ++ docs/components/slack_output.md | 74 +++++++++++++++++++ docs/components/timer_input.md | 26 +++++++ .../general/langchain/langchain_chat_model.py | 6 +- .../langchain_chat_model_with_history.py | 3 +- .../components/inputs_outputs/file_input.py | 4 +- src/tools/gen_component_docs.py | 37 +++++++--- 12 files changed, 231 insertions(+), 19 deletions(-) create mode 100644 docs/components/langchain_chat_model_with_history.md create mode 100644 docs/components/slack_output.md create mode 100644 docs/components/timer_input.md diff --git a/.gitignore b/.gitignore index 7c97f0e6..f67ffbab 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,8 @@ temp/ *.trace examples/private launch.json +prompts.txt +prompts.yaml # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/Makefile b/Makefile index bd7250d3..57f406eb 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,10 @@ VERSION ?= local gen-docs: @python3 src/tools/gen_component_docs.py -build: gen-docs +build-pypi: + @python3 -m build + +build: gen-docs build-pypi @docker build --platform=linux/amd64 -t solace/solace-ai-connector:${VERSION} . run-local: @@ -19,7 +22,7 @@ structure-test: --config container-structure-test-file.yaml pytest: - @pytest + @pytest pytest-docker: @docker run --rm --entrypoint pytest solace/solace-ai-connector:${VERSION} diff --git a/docs/components/broker_output.md b/docs/components/broker_output.md index 71562fcb..3389bfca 100644 --- a/docs/components/broker_output.md +++ b/docs/components/broker_output.md @@ -16,6 +16,9 @@ component_config: payload_encoding: payload_format: propagate_acknowledgements: + copy_user_properties: + decrement_ttl: + discard_on_ttl_expiration: ``` | Parameter | Required | Default | Description | @@ -28,6 +31,9 @@ component_config: | payload_encoding | False | utf-8 | Encoding for the payload (utf-8, base64, gzip, none) | | payload_format | False | json | Format for the payload (json, yaml, text) | | propagate_acknowledgements | False | True | Propagate acknowledgements from the broker to the previous components | +| copy_user_properties | False | False | Copy user properties from the input message | +| decrement_ttl | False | | If present, decrement the user_properties.ttl by 1 | +| discard_on_ttl_expiration | False | False | If present, discard the message when the user_properties.ttl is 0 | ## Component Input Schema diff --git a/docs/components/index.md b/docs/components/index.md index 1e7e90ac..a6b7cd54 100644 --- a/docs/components/index.md +++ b/docs/components/index.md @@ -9,6 +9,7 @@ | [error_input](error_input.md) | Receive processing errors from the Solace AI Event Connector. Note that the component_input configuration is ignored. This component should be used to create a flow that handles errors from other flows. | | [iterate](iterate.md) | Take a single message that is a list and output each item in that list as a separate message | | [langchain_chat_model](langchain_chat_model.md) | Provide access to all the LangChain chat models via configuration | +| [langchain_chat_model_with_history](langchain_chat_model_with_history.md) | A chat model based on LangChain that includes keeping per-session history of the conversation. Note that this component will only take the first system message and the first human message in the messages array. | | [langchain_embeddings](langchain_embeddings.md) | Provide access to all the LangChain Text Embeddings components via configuration | | [langchain_vector_store_delete](langchain_vector_store_delete.md) | This component allows for entries in a LangChain Vector Store to be deleted. This is needed for the continued maintenance of the vector store. Due to the nature of langchain vector stores, you need to specify an embedding component even though it is not used in this component. | | [langchain_vector_store_embedding_index](langchain_vector_store_embedding_index.md) | Use LangChain Vector Stores to index text for later semantic searches. This will take text, run it through an embedding model and then store it in a vector database. | @@ -16,6 +17,8 @@ | [message_filter](message_filter.md) | A filtering component. This will apply a user configurable expression. If the expression evaluates to True, the message will be passed on. If the expression evaluates to False, the message will be discarded. If the message is discarded, any previous components that require an acknowledgement will be acknowledged. | | [pass_through](pass_through.md) | What goes in comes out | | [slack_input](slack_input.md) | Slack input component. The component connects to Slack using the Bolt API and receives messages from Slack channels. | +| [slack_output](slack_output.md) | Slack output component. The component sends messages to Slack channels using the Bolt API. | | [stdin_input](stdin_input.md) | STDIN input component. The component will prompt for input, which will then be placed in the message payload using the output schema below. | | [stdout_output](stdout_output.md) | STDOUT output component | +| [timer_input](timer_input.md) | An input that will generate a message at a specified interval. | | [user_processor](user_processor.md) | A component that allows the processing stage to be defined in the configuration file. | diff --git a/docs/components/langchain_chat_model_with_history.md b/docs/components/langchain_chat_model_with_history.md new file mode 100644 index 00000000..5f828b9d --- /dev/null +++ b/docs/components/langchain_chat_model_with_history.md @@ -0,0 +1,74 @@ +# LangChainChatModelWithHistory + +A chat model based on LangChain that includes keeping per-session history of the conversation. Note that this component will only take the first system message and the first human message in the messages array. + +## Configuration Parameters + +```yaml +component_name: +component_module: langchain_chat_model_with_history +component_config: + langchain_module: + langchain_class: + langchain_component_config: + llm_response_format: + history_max_turns: + history_max_tokens: + history_module: + history_class: + history_config: + stream_to_flow: + llm_mode: + stream_batch_size: +``` + +| Parameter | Required | Default | Description | +| --- | --- | --- | --- | +| langchain_module | True | | The chat model module - e.g. 'langchain_openai.chat_models' | +| langchain_class | True | | The chat model class to use - e.g. ChatOpenAI | +| langchain_component_config | True | | Model specific configuration for the chat model. See documentation for valid parameter names. | +| llm_response_format | False | | The response format for this LLM request. This can be 'json', 'yaml', or 'text'. If set to 'json' or 'yaml', the response will be parsed by the appropriate parser and the fields will be available in the response object. If set to 'text', the response will be returned as a string. | +| history_max_turns | False | 20 | The maximum number of turns to keep in the history. If not set, the history will be limited to 20 turns. | +| history_max_tokens | False | 8000 | The maximum number of tokens to keep in the history. If not set, the history will be limited to 8000 tokens. | +| history_module | False | langchain_community.chat_message_histories | The module that contains the history class. Default: 'langchain_community.chat_message_histories' | +| history_class | False | ChatMessageHistory | The class to use for the history. Default: 'ChatMessageHistory' | +| history_config | False | | The configuration for the history class. | +| stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. | +| llm_mode | False | | The mode for streaming results: 'sync' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | +| stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | + + +## Component Input Schema + +``` +{ + messages: [ + { + role: , + content: + }, + ... + ], + session_id: , + clear_history: +} +``` +| Field | Required | Description | +| --- | --- | --- | +| messages | True | | +| messages[].role | False | The role of the LLM message (user, assistant, system) | +| messages[].content | True | The content of the LLM message | +| session_id | True | The session ID for the conversation. | +| clear_history | False | Whether to clear the history for the session. | + + +## Component Output Schema + +``` +{ + result: +} +``` +| Field | Required | Description | +| --- | --- | --- | +| result | True | | diff --git a/docs/components/slack_input.md b/docs/components/slack_input.md index 5351c940..de6110b1 100644 --- a/docs/components/slack_input.md +++ b/docs/components/slack_input.md @@ -10,16 +10,24 @@ component_module: slack_input component_config: slack_bot_token: slack_app_token: + share_slack_connection: max_file_size: max_total_file_size: + listen_to_channels: + send_history_on_join: + acknowledgement_message: ``` | Parameter | Required | Default | Description | | --- | --- | --- | --- | | slack_bot_token | False | | The Slack bot token to connect to Slack. | | slack_app_token | False | | The Slack app token to connect to Slack. | +| share_slack_connection | False | | Share the Slack connection with other components in this instance. | | max_file_size | False | 20 | The maximum file size to download from Slack in MB. Default: 20MB | | max_total_file_size | False | 20 | The maximum total file size to download from Slack in MB. Default: 20MB | +| listen_to_channels | False | False | Whether to listen to channels or not. Default: False | +| send_history_on_join | False | False | Send history on join. Default: False | +| acknowledgement_message | False | | The message to send to acknowledge the user's message has been received. | diff --git a/docs/components/slack_output.md b/docs/components/slack_output.md new file mode 100644 index 00000000..e1ba4383 --- /dev/null +++ b/docs/components/slack_output.md @@ -0,0 +1,74 @@ +# SlackOutput + +Slack output component. The component sends messages to Slack channels using the Bolt API. + +## Configuration Parameters + +```yaml +component_name: +component_module: slack_output +component_config: + slack_bot_token: + slack_app_token: + share_slack_connection: +``` + +| Parameter | Required | Default | Description | +| --- | --- | --- | --- | +| slack_bot_token | False | | The Slack bot token to connect to Slack. | +| slack_app_token | False | | The Slack app token to connect to Slack. | +| share_slack_connection | False | | Share the Slack connection with other components in this instance. | + + +## Component Input Schema + +``` +{ + message_info: { + channel: , + type: , + user_email: , + client_msg_id: , + ts: , + subtype: , + event_ts: , + channel_type: , + user_id: , + session_id: + }, + content: { + text: , + files: [ + { + name: , + content: , + mime_type: , + filetype: , + size: + }, + ... + ] + } +} +``` +| Field | Required | Description | +| --- | --- | --- | +| message_info | True | | +| message_info.channel | True | | +| message_info.type | False | | +| message_info.user_email | False | | +| message_info.client_msg_id | False | | +| message_info.ts | False | | +| message_info.subtype | False | | +| message_info.event_ts | False | | +| message_info.channel_type | False | | +| message_info.user_id | False | | +| message_info.session_id | True | | +| content | True | | +| content.text | False | | +| content.files | False | | +| contentfiles[].name | False | | +| contentfiles[].content | False | | +| contentfiles[].mime_type | False | | +| contentfiles[].filetype | False | | +| contentfiles[].size | False | | diff --git a/docs/components/timer_input.md b/docs/components/timer_input.md new file mode 100644 index 00000000..0b35cbc1 --- /dev/null +++ b/docs/components/timer_input.md @@ -0,0 +1,26 @@ +# TimerInput + +An input that will generate a message at a specified interval. + +## Configuration Parameters + +```yaml +component_name: +component_module: timer_input +component_config: + interval_ms: + skip_messages_if_behind: +``` + +| Parameter | Required | Default | Description | +| --- | --- | --- | --- | +| interval_ms | False | | The interval in milliseconds at which to generate a message. | +| skip_messages_if_behind | False | False | If false, when the component is blocked for some time, it will catch up by generating multiple messages in quick succession. If true, then the component will always wait at least the interval time before generating the next message. Note that due to some messages in the pipeline, there will always be a couple of quick messages generated. | + + + +## Component Output Schema + +``` + +``` diff --git a/src/solace_ai_connector/components/general/langchain/langchain_chat_model.py b/src/solace_ai_connector/components/general/langchain/langchain_chat_model.py index 29a71502..a025b518 100644 --- a/src/solace_ai_connector/components/general/langchain/langchain_chat_model.py +++ b/src/solace_ai_connector/components/general/langchain/langchain_chat_model.py @@ -1,13 +1,13 @@ # This is a wrapper around all the LangChain chat models # The configuration will control dynamic loading of the chat models - +from copy import deepcopy from .langchain_chat_model_base import ( LangChainChatModelBase, info_base, ) - -info = info_base +# Deepcopy info_base +info = deepcopy(info_base) info["class_name"] = "LangChainChatModel" diff --git a/src/solace_ai_connector/components/general/langchain/langchain_chat_model_with_history.py b/src/solace_ai_connector/components/general/langchain/langchain_chat_model_with_history.py index 16f2df40..62f3e1dd 100644 --- a/src/solace_ai_connector/components/general/langchain/langchain_chat_model_with_history.py +++ b/src/solace_ai_connector/components/general/langchain/langchain_chat_model_with_history.py @@ -2,6 +2,7 @@ import threading from collections import namedtuple +from copy import deepcopy from langchain_core.chat_history import BaseChatMessageHistory from langchain_core.runnables.history import RunnableWithMessageHistory @@ -20,7 +21,7 @@ ) -info = info_base +info = deepcopy(info_base) info["class_name"] = "LangChainChatModelWithHistory" info["description"] = ( "A chat model based on LangChain that includes keeping per-session history of " diff --git a/src/solace_ai_connector/components/inputs_outputs/file_input.py b/src/solace_ai_connector/components/inputs_outputs/file_input.py index 73c57a66..f2cba7c5 100644 --- a/src/solace_ai_connector/components/inputs_outputs/file_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/file_input.py @@ -4,13 +4,11 @@ from ..component_base import ComponentBase from ...common.message import Message -info = {} - class File(ComponentBase): def __init__(self, **kwargs): - super().__init__(info, **kwargs) + super().__init__({}, **kwargs) def get_next_message(self): # Get the next message from the file diff --git a/src/tools/gen_component_docs.py b/src/tools/gen_component_docs.py index 314e0650..2556a308 100644 --- a/src/tools/gen_component_docs.py +++ b/src/tools/gen_component_docs.py @@ -23,7 +23,19 @@ def find_python_files(directory): def find_info_dicts(directory): for file in find_python_files(directory): # Dynamically import the module - spec = importlib.util.spec_from_file_location("module.name", file) + if file.endswith("__init__.py"): + continue + if "/solace_ai_connector/" in file: + module_name = re.sub( + r".*/solace_ai_connector/", + "solace_ai_connector/", + file, + ) + module_name = module_name.replace("/", ".") + if module_name.endswith(".py"): + module_name = module_name[:-3] + + spec = importlib.util.spec_from_file_location(module_name, file) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) # Check if the module has an info dictionary @@ -122,7 +134,7 @@ def create_markdown_documentation(directory, output_dir, module_type): # Create the component index table # Capitalize the type - type_title = type.capitalize() + type_title = module_type.capitalize() if isinstance(module_type, str) else "" markdown += f"# Built-in {type_title}s\n\n" markdown += "| Component | Description |\n" @@ -356,12 +368,17 @@ def schema_as_human_readable_string(schema): return schema["type"] -# Call the function -create_markdown_documentation( - "src/solace_ai_connector/flow_components", "docs/components", "component" -) -create_markdown_documentation( - "src/solace_ai_connector/common/transforms", "docs/transforms", "transform" -) +def main(): + # Call the function + create_markdown_documentation( + "src/solace_ai_connector/components", "docs/components", "component" + ) + create_markdown_documentation( + "src/solace_ai_connector/transforms", "docs/transforms", "transform" + ) + + create_ai_prompt(full_info) + -create_ai_prompt(full_info) +if __name__ == "__main__": + main() From bcac8ea7e6b7b8e144cafdb30d27ceeda984204b Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Thu, 6 Jun 2024 15:22:07 -0400 Subject: [PATCH 08/18] chore: Remove unused slack.yaml configuration file --- slack.yaml | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 slack.yaml diff --git a/slack.yaml b/slack.yaml deleted file mode 100644 index f6cfeb25..00000000 --- a/slack.yaml +++ /dev/null @@ -1,29 +0,0 @@ ---- -log: - stdout_log_level: INFO - log_file_level: DEBUG - log_file: solace_ai_connector.log - - -# List of flows -flows: - - name: slack_input - trace_level: DEBUG - components: - - component_name: slack_input - component_module: slack_input - component_config: - slack_bot_token: ${SLACK_BOT_TOKEN} - slack_app_token: ${SLACK_APP_TOKEN} - - - component_name: stdout_output - component_module: stdout_output - - # - name: broker_output - # component: outputs.solace_event_broker - # config: - # broker_connection_config: - # share_connection: solace - # topic_config: - # reply: true - \ No newline at end of file From 5e4d35069b2096cede9ca8e94f395c1ff05e139b Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Sat, 8 Jun 2024 15:01:58 -0400 Subject: [PATCH 09/18] Made some changes in utils.py for dynamic loading. We will no longer do a directory search looking for modules. We are more explicit now Also promote the gen_docs tool to an installed script so that it can be used for plugins --- Makefile | 2 +- pyproject.toml | 1 + src/solace_ai_connector/common/utils.py | 35 +- .../components/__init__.py | 1 + .../tools/gen_component_docs.py | 401 ++++++++++++++++++ src/tools/gen_component_docs.py | 26 +- 6 files changed, 453 insertions(+), 13 deletions(-) create mode 100644 src/solace_ai_connector/tools/gen_component_docs.py diff --git a/Makefile b/Makefile index 57f406eb..94518aaa 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ include .env VERSION ?= local gen-docs: - @python3 src/tools/gen_component_docs.py + @python3 src/solace_ai_connector/tools/gen_component_docs.py build-pypi: @python3 -m build diff --git a/pyproject.toml b/pyproject.toml index 8dc87810..1899d481 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ documentation = "https://github.com/SolaceLabs/solace-ai-connector/blob/main/doc [project.scripts] solace-ai-connector = "solace_ai_connector.main:main" +solace-ai-connector-gen-docs = "solace_ai_connector.tools.gen_component_docs:main" [tool.hatch.build.targets.wheel] packages = ["src/solace_ai_connector"] diff --git a/src/solace_ai_connector/common/utils.py b/src/solace_ai_connector/common/utils.py index 2642cd2f..31e4c27b 100644 --- a/src/solace_ai_connector/common/utils.py +++ b/src/solace_ai_connector/common/utils.py @@ -98,18 +98,35 @@ def import_module(name, base_path=None): """Import a module by name""" if base_path: - if not os.path.exists(base_path): + if base_path not in sys.path: sys.path.append(base_path) try: module = importlib.import_module(name) - except ModuleNotFoundError: - try: - module = import_from_directories(name, base_path=base_path) - except Exception as e: - raise ImportError( - f"Module load error for {name}, base_path={base_path} ", e - ) from e - return module + return module + except ModuleNotFoundError as exc: + # If the module does not have a path associated with it, try + # importing it from the known prefixes - annoying that this + # is necessary. It seems you can't dynamically import a module + # that is listed in an __init__.py file :( + if "." not in name: + for prefix in [ + "solace_ai_connector.components", + "solace_ai_connector.components.general", + "solace_ai_connector.components.general.for_testing", + "solace_ai_connector.components.general.langchain", + "solace_ai_connector.components.inputs_outputs", + "solace_ai_connector.transforms", + "solace_ai_connector.common", + ]: + full_name = f"{prefix}.{name}" + try: + module = importlib.import_module(full_name) + return module + except ModuleNotFoundError: + pass + except Exception as e: + raise ImportError(f"Module load error for {full_name}: {e}") from e + raise ImportError(f"Module load error for {name}") from exc def invoke_config(config, allow_source_expression=False): diff --git a/src/solace_ai_connector/components/__init__.py b/src/solace_ai_connector/components/__init__.py index c60701ec..b025a4eb 100644 --- a/src/solace_ai_connector/components/__init__.py +++ b/src/solace_ai_connector/components/__init__.py @@ -10,6 +10,7 @@ stdin_input, slack_input, ) + from .general import ( user_processor, aggregate, diff --git a/src/solace_ai_connector/tools/gen_component_docs.py b/src/solace_ai_connector/tools/gen_component_docs.py new file mode 100644 index 00000000..e8bd0be9 --- /dev/null +++ b/src/solace_ai_connector/tools/gen_component_docs.py @@ -0,0 +1,401 @@ +import os +import re +import sys +import json +import glob +import importlib.util +import yaml # pylint: disable=import-error + +sys.path.append("src") + + +# Function to descend into a directory and find all Python files +def find_python_files(directory): + for root, _, files in os.walk(directory): + for file in files: + # Skip if 'for_testing' is in the path + if "for_testing" in root: + continue + if file.endswith(".py"): + yield os.path.join(root, file) + + +# For each Python file, import it and see if it has a info dictionary at the top level +def find_info_dicts(directory): + for file in find_python_files(directory): + # Dynamically import the module + if file.endswith("__init__.py"): + continue + if "/solace_ai_connector/" in file: + module_name = re.sub( + r".*/solace_ai_connector/", + "solace_ai_connector/", + file, + ) + module_name = module_name.replace("/", ".") + if module_name.endswith(".py"): + module_name = module_name[:-3] + + spec = importlib.util.spec_from_file_location(module_name, file) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + # Check if the module has an info dictionary + if hasattr(module, "info"): + yield file, module.info + + +# For each info dictionary, create the markdown documentation +current_component = "" +full_info = {} + + +def create_markdown_documentation(directory, output_dir, module_type): + components = [] + + # full_info contains all the info dictionaries. This will be used later + # to produce an AI prompt to help users create a new configuration + full_info[module_type] = [] + for file, info in find_info_dicts(directory): + # Get the base file name without the extension + name = re.sub(r".*/", "", file) + name = re.sub(r".py$", "", name) + global current_component # pylint: disable=global-statement + current_component = name + + full_info[module_type].append(info) + + # Create the markdown documentation + markdown = f"# {info['class_name']}\n\n" + markdown += f"{info['description']}\n\n" + markdown += "## Configuration Parameters\n\n" + markdown += "```yaml\n" + if module_type == "component": + markdown += "component_name: \n" + markdown += f"component_module: {name}\n" + markdown += "component_config:\n" + elif module_type == "transform": + markdown += "input_transforms:\n" + markdown += f" type: {name}\n" + for param in info["config_parameters"]: + markdown += f" {param['name']}: <{param.get('type', 'string')}>\n" + markdown += "```\n\n" + + if "config_parameters" in info and len(info["config_parameters"]) > 0: + markdown += "| Parameter | Required | Default | Description |\n" + markdown += "| --- | --- | --- | --- |\n" + for param in info["config_parameters"]: + markdown += f"| {param['name']} | {param.get('required', False)} | {param.get('default', '')} | {param['description']} |\n" + markdown += "\n" + else: + markdown += "No configuration parameters\n\n" + + if "request_schema" in info: + print(f"{name} has a request schema") + if "input_schema" in info: + fields = [] + markdown += "\n## Component Input Schema\n\n```\n" + markdown += format_json_schema(info["input_schema"], fields) + markdown += "\n```\n" + # markdown += "\n## Component Input Schema Fields\n\n" + markdown += format_fields(fields) + + if "output_schema" in info: + fields = [] + markdown += "\n\n## Component Output Schema\n\n```\n" + markdown += format_json_schema(info["output_schema"], fields) + markdown += "\n```\n" + # markdown += "\n## Component Output Schema Fields\n\n" + markdown += format_fields(fields) + + if "example_config" in info: + markdown += "\n\n## Example Configuration\n\n" + markdown += info["example_config"] + + # Write all the files into "./docs" and change the .py to .md + # The files are uniquely named without the path, so we can remove that + file = re.sub(r".*/", "", file) + file = re.sub(r".py$", ".md", file) + components.append( + { + "file": file, + "name": re.sub(r"\..*", "", file), + "description": info.get( + "short_description", info.get("description", "") + ), + } + ) + file = f"{output_dir}/{file}" + + # Write the markdown to a file + with open(file, "w", encoding="utf-8") as f: + f.write(markdown) + + markdown = "" + + # Create the component index table + + # Capitalize the type + type_title = module_type.capitalize() if isinstance(module_type, str) else "" + markdown += f"# Built-in {type_title}s\n\n" + + markdown += "| Component | Description |\n" + markdown += "| --- | --- |\n" + + # Sort the components by name + components = sorted(components, key=lambda x: x["name"]) + + for component in components: + markdown += f"| [{component['name']}]({component['file']}) | {component['description']} |\n" + + with open(f"{output_dir}/index.md", "w", encoding="utf-8") as f: + f.write(markdown) + + +def create_ai_prompt(info): + """Use the info dictionary to create an AI prompt to help users create a + new configuration. This prompt will contain all the component and transform information, + information about the purpose of the connector and an example configuration. Later, the + user will have to provide the message {input_schema, queue, topic}, and the desired + output_schema and topic. + + """ + + system_prompt = ( + "You are an assistant who will help users create a new configuration for the " + "Solace AI Event Connector. The connector is a tool that allows users to create " + "flows that process messages from a Solace event broker, generally to help interface " + "with AI based services. A typical flow will start with a message from the broker, " + "pass through a series of components and transforms, and then send the message back to " + "the broker. The components and transforms are user-configurable and can be used to " + "manipulate the message in various ways. The user will have to provide the message " + "input_schema, queue, or topic, and the desired output_schema and topic. Your job is to " + "to create an initial configuration for the user. \n" + "Make sure you use ${ENV_VARS} for any sensitive information. \n" + "Your interaction with the user will via a chat interface. Before you generate the " + "YAML configuration, you will have to ask the user for the input_schema, queue, or topic, " + "and the desired output_schema and topic. \n" + "You can ask as many questions as you need to get the information you need. Try to make " + "the conversation flow naturally and confirm the user's input if there is any ambiguity - " + "for example, if they input the schema in a mixed JSON/YAML/pseudo structure, print it " + "back out for them in a clean YAML format and get confirmation that it is correct\n" + ) + + # Read in docs/configuration.md + with open("docs/configuration.md", "r", encoding="utf-8") as f: + configuration_prompt = f.read() + + # Read in an example configuration + # with open("examples/milvus_store.yaml", "r", encoding="utf-8") as f: + # example_config = f.read() + + prompt = ( + "Here is a structure that defines all the built-in components and transforms. \n" + f"\n{yaml.dump(info, default_flow_style=False)}\n" + "\n\n" + "Here is the markdown documentation for the configuration file: \n" + f"\n{configuration_prompt}\n\n" + "Here is an example configuration: \n" + "Take special care to ensure that the data format is correct as it moves component to " + "component. input_transforms will likely need to be created to ensure that the data is " + "in the correct format for each component. \n" + "Now, you will have to ask the user for the input_schema, queue, or topic, and the desired " + "output_schema and topic. \n" + ) + + # Write out a prompts.yaml file + prompts = { + "system_prompt": system_prompt, + "prompt": prompt, + } + with open("prompts.yaml", "w", encoding="utf-8") as f: + f.write(yaml.dump(prompts, default_style=">", default_flow_style=True)) + + print(prompts["system_prompt"]) + print(prompts["prompt"]) + with open("prompts.txt", "w", encoding="utf-8") as f: + f.write(prompts["system_prompt"]) + f.write(prompts["prompt"]) + + +def format_json_schema( + schema_dict, field_list, level=0, first_line_string="", prop_path="" +): + indent = " " * level + output = "" + if schema_dict is None: + print(f"Schema is None for {current_component}") + return "" + if "type" not in schema_dict: + print(f"Missing type in schema: {schema_dict} for {current_component}") + return "" + if schema_dict["type"] == "object": + # output += f"{indent}{{{first_line_string}\n" + output += f"{indent}{{{first_line_string}\n" + required = schema_dict.get("required", []) + for prop_name, prop_data in schema_dict.get("properties", {}).items(): + field_list.append( + { + "name": prop_path + "." + prop_name if prop_path else prop_name, + "required": prop_name in required, + "description": prop_data.get("description", ""), + "data": prop_data, + } + ) + output += f"{indent} {prop_name}: " + output += format_json_schema( + prop_data, + field_list, + level + 1, + "", + prop_path + f"{prop_name}", + ) + # If not the last property, add a comma + if prop_name != list(schema_dict["properties"].keys())[-1]: + output += "," + output += "\n" + # If there were no properties, add to indicate that any object is allowed + if not schema_dict.get("properties"): + output += f"{indent} \n" + output += f"{indent}}}" + elif schema_dict["type"] == "array": + # output += f"{indent}[{first_line_string}\n" + output += f"[{first_line_string}\n" + output += format_json_schema( + schema_dict.get("items"), field_list, level + 1, "", prop_path + "[]" + ) + output += f",\n{indent} ...\n" + output += f"{indent}]" + else: + output += f"{indent}<{schema_dict['type']}>" + + return output + + +def format_fields(fields): + if not fields or len(fields) == 0: + return "" + # Put the fields in a markdown table + output = "| Field | Required | Description |\n" + output += "| --- | --- | --- |\n" + for field in fields: + output += ( + f"| {field['name']} | {field['required']} | {field['description']} |\n" + ) + return output + + +def format_response_schema_for_markdown(response_schema): + """ + Converts a response schema dictionary into a Markdown-formatted string. + + Args: + response_schema (dict): The response schema dictionary. + + Returns: + str: A Markdown-formatted string representing the schema. + """ + + def recursive_markdown(data, level=0): + """Recursively builds the Markdown.""" + lines = [] + indent = " " * level + + if data["type"] == "object": + lines.append(f"{indent}" "{") + for prop_name, prop_data in data.get("properties", {}).items(): + if prop_data.get("type", "invalid") == "object": + lines.append(f"{indent} {prop_name}:") + lines.extend(recursive_markdown(prop_data, level + 2)) + lines.append(f"{indent} {prop_name}:") + lines.extend(recursive_markdown(prop_data, level + 2)) + lines.append(f"{indent}" "}") + + elif data["type"] == "array": + lines.append(f"{indent}* **Array of:**") + lines.extend(recursive_markdown(data.get("items"), level + 1)) + + else: # Base type + lines.append(f"{indent}* **{data['type']}**") + + if "required" in data: + lines.append(f"{indent}_(Required fields: {', '.join(data['required'])})_") + + return lines + + # Start the Markdown output + output = "```json\n" + output += json.dumps(response_schema, indent=2) # Pretty-print JSON + output += "\n```\n\n" + + # Add formatted description using the recursive helper + output += "**Detailed Schema Description**\n\n" + output += "\n".join(recursive_markdown(response_schema)) + + return output + + +# Example schema: +# "output_schema": { +# "type": "object", +# "properties": { +# "results": { +# "type": "object", +# "properties": { +# "matches": { +# "type": "array", +# "items": { +# "type": "object", +# "properties": { +# "text": {"type": "string"}, +# "metadata": {"type": "object"}, +# "score": {"type": "float"}, +# }, +# "required": ["text"], +# }, +# }, +# }, +# } +# }, +# "required": ["results"], +# }, + + +def schema_as_human_readable_string(schema): + if schema["type"] == "object": + return schema_as_human_readable_string(schema["properties"]) + elif schema["type"] == "array": + return schema_as_human_readable_string(schema["items"]) + else: + return schema["type"] + +def print_usage(): + # Get the basename of the script (remove dirs) + name = os.path.basename(sys.argv[0]) + print(f"Usage: {name} [base_directory]") + +def main(): + # Get a base directory from the command line + if len(sys.argv) > 1: + base_dir = sys.argv[1] + elif not os.path.exists("src/solace_ai_connector"): + if glob.glob("src/*/components"): + base_dir = "." + else: + print("You must specify a base directory for the components\n") + print_usage() + else: + base_dir = "src/solace_ai_connector" + + # Call the function + create_markdown_documentation( + f"{base_dir}/components", "docs/components", "component" + ) + create_markdown_documentation( + f"{base_dir}/transforms", "docs/transforms", "transform" + ) + + # create_ai_prompt(full_info) + + +if __name__ == "__main__": + main() diff --git a/src/tools/gen_component_docs.py b/src/tools/gen_component_docs.py index 2556a308..3a215cb1 100644 --- a/src/tools/gen_component_docs.py +++ b/src/tools/gen_component_docs.py @@ -2,6 +2,7 @@ import re import sys import json +import glob import importlib.util import yaml # pylint: disable=import-error @@ -368,16 +369,35 @@ def schema_as_human_readable_string(schema): return schema["type"] +def print_usage(): + # Get the basename of the script (remove dirs) + name = os.path.basename(sys.argv[0]) + print(f"Usage: {name} [base_directory]") + + def main(): + # Get a base directory from the command line + if len(sys.argv) > 1: + base_dir = sys.argv[1] + elif not os.path.exists("src/solace_ai_connector"): + if glob.glob("src/*/components"): + base_dir = "." + else: + print("You must specify a base directory for the components\n") + print_usage() + sys.exit(1) + else: + base_dir = "src/solace_ai_connector" + # Call the function create_markdown_documentation( - "src/solace_ai_connector/components", "docs/components", "component" + f"{base_dir}/components", "docs/components", "component" ) create_markdown_documentation( - "src/solace_ai_connector/transforms", "docs/transforms", "transform" + f"{base_dir}/transforms", "docs/transforms", "transform" ) - create_ai_prompt(full_info) + # create_ai_prompt(full_info) if __name__ == "__main__": From 8c809e8f9e8568cc1f8df0d4c2c3c2482982c85c Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Sun, 9 Jun 2024 11:11:36 -0400 Subject: [PATCH 10/18] Moved slack components into their own plugin: solace-ai-connector-slack. Adjusted the importing of modules from config files to be more friendly for plugins and added a 'component_package' properly that will be auto-installed if it is specified and the package is not present. --- docs/components/index.md | 2 - docs/components/slack_input.md | 85 ---- docs/components/slack_output.md | 74 --- pyproject.toml | 13 +- requirements.txt | 12 +- src/solace_ai_connector/common/utils.py | 16 +- .../components/__init__.py | 4 - .../components/inputs_outputs/slack_base.py | 36 -- .../components/inputs_outputs/slack_input.py | 440 ------------------ .../components/inputs_outputs/slack_output.py | 176 ------- src/solace_ai_connector/flow/flow.py | 3 +- .../tools/gen_component_docs.py | 14 +- tests/test_invoke.py | 2 +- 13 files changed, 40 insertions(+), 837 deletions(-) delete mode 100644 docs/components/slack_input.md delete mode 100644 docs/components/slack_output.md delete mode 100644 src/solace_ai_connector/components/inputs_outputs/slack_base.py delete mode 100644 src/solace_ai_connector/components/inputs_outputs/slack_input.py delete mode 100644 src/solace_ai_connector/components/inputs_outputs/slack_output.py diff --git a/docs/components/index.md b/docs/components/index.md index a6b7cd54..0fa9b6d5 100644 --- a/docs/components/index.md +++ b/docs/components/index.md @@ -16,8 +16,6 @@ | [langchain_vector_store_embedding_search](langchain_vector_store_embedding_search.md) | Use LangChain Vector Stores to search a vector store with a semantic search. This will take text, run it through an embedding model with a query embedding and then find the closest matches in the store. | | [message_filter](message_filter.md) | A filtering component. This will apply a user configurable expression. If the expression evaluates to True, the message will be passed on. If the expression evaluates to False, the message will be discarded. If the message is discarded, any previous components that require an acknowledgement will be acknowledged. | | [pass_through](pass_through.md) | What goes in comes out | -| [slack_input](slack_input.md) | Slack input component. The component connects to Slack using the Bolt API and receives messages from Slack channels. | -| [slack_output](slack_output.md) | Slack output component. The component sends messages to Slack channels using the Bolt API. | | [stdin_input](stdin_input.md) | STDIN input component. The component will prompt for input, which will then be placed in the message payload using the output schema below. | | [stdout_output](stdout_output.md) | STDOUT output component | | [timer_input](timer_input.md) | An input that will generate a message at a specified interval. | diff --git a/docs/components/slack_input.md b/docs/components/slack_input.md deleted file mode 100644 index de6110b1..00000000 --- a/docs/components/slack_input.md +++ /dev/null @@ -1,85 +0,0 @@ -# SlackInput - -Slack input component. The component connects to Slack using the Bolt API and receives messages from Slack channels. - -## Configuration Parameters - -```yaml -component_name: -component_module: slack_input -component_config: - slack_bot_token: - slack_app_token: - share_slack_connection: - max_file_size: - max_total_file_size: - listen_to_channels: - send_history_on_join: - acknowledgement_message: -``` - -| Parameter | Required | Default | Description | -| --- | --- | --- | --- | -| slack_bot_token | False | | The Slack bot token to connect to Slack. | -| slack_app_token | False | | The Slack app token to connect to Slack. | -| share_slack_connection | False | | Share the Slack connection with other components in this instance. | -| max_file_size | False | 20 | The maximum file size to download from Slack in MB. Default: 20MB | -| max_total_file_size | False | 20 | The maximum total file size to download from Slack in MB. Default: 20MB | -| listen_to_channels | False | False | Whether to listen to channels or not. Default: False | -| send_history_on_join | False | False | Send history on join. Default: False | -| acknowledgement_message | False | | The message to send to acknowledge the user's message has been received. | - - - -## Component Output Schema - -``` -{ - event: { - text: , - files: [ - { - name: , - content: , - mime_type: , - filetype: , - size: - }, - ... - ], - user_email: , - mentions: [ - , - ... - ], - type: , - user_id: , - client_msg_id: , - ts: , - channel: , - subtype: , - event_ts: , - channel_type: - } -} -``` -| Field | Required | Description | -| --- | --- | --- | -| event | True | | -| event.text | False | | -| event.files | False | | -| eventfiles[].name | False | | -| eventfiles[].content | False | | -| eventfiles[].mime_type | False | | -| eventfiles[].filetype | False | | -| eventfiles[].size | False | | -| event.user_email | False | | -| event.mentions | False | | -| event.type | False | | -| event.user_id | False | | -| event.client_msg_id | False | | -| event.ts | False | | -| event.channel | False | | -| event.subtype | False | | -| event.event_ts | False | | -| event.channel_type | False | | diff --git a/docs/components/slack_output.md b/docs/components/slack_output.md deleted file mode 100644 index e1ba4383..00000000 --- a/docs/components/slack_output.md +++ /dev/null @@ -1,74 +0,0 @@ -# SlackOutput - -Slack output component. The component sends messages to Slack channels using the Bolt API. - -## Configuration Parameters - -```yaml -component_name: -component_module: slack_output -component_config: - slack_bot_token: - slack_app_token: - share_slack_connection: -``` - -| Parameter | Required | Default | Description | -| --- | --- | --- | --- | -| slack_bot_token | False | | The Slack bot token to connect to Slack. | -| slack_app_token | False | | The Slack app token to connect to Slack. | -| share_slack_connection | False | | Share the Slack connection with other components in this instance. | - - -## Component Input Schema - -``` -{ - message_info: { - channel: , - type: , - user_email: , - client_msg_id: , - ts: , - subtype: , - event_ts: , - channel_type: , - user_id: , - session_id: - }, - content: { - text: , - files: [ - { - name: , - content: , - mime_type: , - filetype: , - size: - }, - ... - ] - } -} -``` -| Field | Required | Description | -| --- | --- | --- | -| message_info | True | | -| message_info.channel | True | | -| message_info.type | False | | -| message_info.user_email | False | | -| message_info.client_msg_id | False | | -| message_info.ts | False | | -| message_info.subtype | False | | -| message_info.event_ts | False | | -| message_info.channel_type | False | | -| message_info.user_id | False | | -| message_info.session_id | True | | -| content | True | | -| content.text | False | | -| content.files | False | | -| contentfiles[].name | False | | -| contentfiles[].content | False | | -| contentfiles[].mime_type | False | | -| contentfiles[].filetype | False | | -| contentfiles[].size | False | | diff --git a/pyproject.toml b/pyproject.toml index 1899d481..9ead989c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,12 +18,13 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ - "boto3>=1.34.93", - "langchain_core>=0.2.4", - "PyYAML>=6.0.1", - "Requests>=2.32.3", - "slack_bolt>=1.18.1", - "solace_pubsubplus>=1.6.0", + "boto3~=1.34.122", + "langchain_core~=0.2.5", + "langchain~=0.2.3", + "PyYAML~=6.0.1", + "Requests~=2.32.3", + "solace_pubsubplus~=1.6.0", + ] [project.urls] diff --git a/requirements.txt b/requirements.txt index 26d24092..273945fa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -boto3==1.34.93 -langchain_core==0.2.4 -PyYAML==6.0.1 -Requests==2.32.3 -slack_bolt==1.18.1 -solace_pubsubplus==1.6.0 +boto3~=1.34.122 +langchain_core~=0.2.5 +langchain~=0.2.3 +PyYAML~=6.0.1 +Requests~=2.32.3 +solace_pubsubplus~=1.6.0 diff --git a/src/solace_ai_connector/common/utils.py b/src/solace_ai_connector/common/utils.py index 31e4c27b..22a8e79f 100644 --- a/src/solace_ai_connector/common/utils.py +++ b/src/solace_ai_connector/common/utils.py @@ -5,6 +5,7 @@ import sys import re import builtins +import subprocess from .log import log @@ -94,9 +95,12 @@ def resolve_config_values(config, allow_source_expression=False): return config -def import_module(name, base_path=None): +def import_module(name, base_path=None, component_package=None): """Import a module by name""" + if component_package: + install_package(component_package) + if base_path: if base_path not in sys.path: sys.path.append(base_path) @@ -126,7 +130,7 @@ def import_module(name, base_path=None): pass except Exception as e: raise ImportError(f"Module load error for {full_name}: {e}") from e - raise ImportError(f"Module load error for {name}") from exc + raise ModuleNotFoundError(f"Module '{name}' not found") from exc def invoke_config(config, allow_source_expression=False): @@ -230,6 +234,14 @@ def call_function(function, params, allow_source_expression): return function(**params) +def install_package(package_name): + """Install a package using pip if it isn't already installed""" + try: + importlib.import_module(package_name) + except ImportError: + subprocess.run(["pip", "install", package_name], check=True) + + def extract_source_expression(se_call): # First remove the source_expression( and the trailing ) # Account for possible whitespace diff --git a/src/solace_ai_connector/components/__init__.py b/src/solace_ai_connector/components/__init__.py index b025a4eb..a7b8043a 100644 --- a/src/solace_ai_connector/components/__init__.py +++ b/src/solace_ai_connector/components/__init__.py @@ -3,12 +3,10 @@ from .inputs_outputs import ( error_input, timer_input, - slack_output, broker_input, broker_output, stdout_output, stdin_input, - slack_input, ) from .general import ( @@ -39,12 +37,10 @@ # Also import the components from the submodules from .inputs_outputs.error_input import ErrorInput from .inputs_outputs.timer_input import TimerInput -from .inputs_outputs.slack_output import SlackOutput from .inputs_outputs.broker_input import BrokerInput from .inputs_outputs.broker_output import BrokerOutput from .inputs_outputs.stdout_output import Stdout from .inputs_outputs.stdin_input import Stdin -from .inputs_outputs.slack_input import SlackInput from .general.user_processor import UserProcessor from .general.aggregate import Aggregate from .general.for_testing.need_ack_input import NeedAckInput diff --git a/src/solace_ai_connector/components/inputs_outputs/slack_base.py b/src/solace_ai_connector/components/inputs_outputs/slack_base.py deleted file mode 100644 index 5d93e34a..00000000 --- a/src/solace_ai_connector/components/inputs_outputs/slack_base.py +++ /dev/null @@ -1,36 +0,0 @@ -"""Base class for all Slack components""" - -from abc import ABC, abstractmethod -from slack_bolt import App # pylint: disable=import-error -from ..component_base import ComponentBase - - -class SlackBase(ComponentBase, ABC): - _slack_apps = {} - - def __init__(self, module_info, **kwargs): - super().__init__(module_info, **kwargs) - self.slack_bot_token = self.get_config("slack_bot_token") - self.slack_app_token = self.get_config("slack_app_token") - self.max_file_size = self.get_config("max_file_size", 20) - self.max_total_file_size = self.get_config("max_total_file_size", 20) - self.share_slack_connection = self.get_config("share_slack_connection") - - if self.share_slack_connection: - if self.slack_bot_token not in SlackBase._slack_apps: - self.app = App(token=self.slack_bot_token) - SlackBase._slack_apps[self.slack_bot_token] = self.app - else: - self.app = SlackBase._slack_apps[self.slack_bot_token] - else: - self.app = App(token=self.slack_bot_token) - - @abstractmethod - def invoke(self, message, data): - pass - - def __str__(self): - return self.__class__.__name__ + " " + str(self.config) - - def __repr__(self): - return self.__str__() diff --git a/src/solace_ai_connector/components/inputs_outputs/slack_input.py b/src/solace_ai_connector/components/inputs_outputs/slack_input.py deleted file mode 100644 index 63712111..00000000 --- a/src/solace_ai_connector/components/inputs_outputs/slack_input.py +++ /dev/null @@ -1,440 +0,0 @@ -import threading -import queue -import base64 -import requests - - -from slack_bolt.adapter.socket_mode import SocketModeHandler -from .slack_base import SlackBase -from ...common.message import Message -from ...common.log import log - - -info = { - "class_name": "SlackInput", - "description": ( - "Slack input component. The component connects to Slack using the Bolt API " - "and receives messages from Slack channels." - ), - "config_parameters": [ - { - "name": "slack_bot_token", - "type": "string", - "description": "The Slack bot token to connect to Slack.", - }, - { - "name": "slack_app_token", - "type": "string", - "description": "The Slack app token to connect to Slack.", - }, - { - "name": "share_slack_connection", - "type": "string", - "description": "Share the Slack connection with other components in this instance.", - }, - { - "name": "max_file_size", - "type": "number", - "description": "The maximum file size to download from Slack in MB. Default: 20MB", - "default": 20, - "required": False, - }, - { - "name": "max_total_file_size", - "type": "number", - "description": "The maximum total file size to download " - "from Slack in MB. Default: 20MB", - "default": 20, - "required": False, - }, - { - "name": "listen_to_channels", - "type": "boolean", - "description": "Whether to listen to channels or not. Default: False", - "default": False, - "required": False, - }, - { - "name": "send_history_on_join", - "type": "boolean", - "description": "Send history on join. Default: False", - "default": False, - "required": False, - }, - { - "name": "acknowledgement_message", - "type": "string", - "description": "The message to send to acknowledge the user's message has been received.", - "required": False, - }, - ], - "output_schema": { - "type": "object", - "properties": { - "event": { - "type": "object", - "properties": { - "text": { - "type": "string", - }, - "files": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - }, - "content": { - "type": "string", - }, - "mime_type": { - "type": "string", - }, - "filetype": { - "type": "string", - }, - "size": { - "type": "number", - }, - }, - }, - }, - "user_email": { - "type": "string", - }, - "mentions": { - "type": "array", - "items": { - "type": "string", - }, - }, - "type": { - "type": "string", - }, - "user_id": { - "type": "string", - }, - "client_msg_id": { - "type": "string", - }, - "ts": { - "type": "string", - }, - "channel": { - "type": "string", - }, - "subtype": { - "type": "string", - }, - "event_ts": { - "type": "string", - }, - "channel_type": { - "type": "string", - }, - }, - }, - }, - "required": ["event"], - }, -} - - -class SlackInput(SlackBase): - def __init__(self, **kwargs): - super().__init__(info, **kwargs) - self.slack_receiver_queue = None - self.slack_receiver = None - self.init_slack_receiver() - - def init_slack_receiver(self): - # Create a queue to get messages from the Slack receiver - self.slack_receiver_queue = queue.Queue() - self.stop_receiver_event = threading.Event() - self.slack_receiver = SlackReceiver( - app=self.app, - slack_app_token=self.slack_app_token, - slack_bot_token=self.slack_bot_token, - input_queue=self.slack_receiver_queue, - stop_event=self.stop_receiver_event, - max_file_size=self.get_config("max_file_size"), - max_total_file_size=self.get_config("max_total_file_size"), - listen_to_channels=self.get_config("listen_to_channels"), - send_history_on_join=self.get_config("send_history_on_join"), - acknowledgement_message=self.get_config("acknowledgement_message"), - ) - self.slack_receiver.start() - - def stop_component(self): - self.stop_slack_receiver() - - def stop_slack_receiver(self): - self.stop_receiver_event.set() - self.slack_receiver.join() - - def get_next_message(self): - # Get the next message from the Slack receiver queue - message = self.slack_receiver_queue.get() - return message - - def invoke(self, _message, data): - return data - - -class SlackReceiver(threading.Thread): - def __init__( - self, - app, - slack_app_token, - slack_bot_token, - input_queue, - stop_event, - max_file_size=20, - max_total_file_size=20, - listen_to_channels=False, - send_history_on_join=False, - acknowledgement_message=None, - ): - threading.Thread.__init__(self) - self.app = app - self.slack_app_token = slack_app_token - self.slack_bot_token = slack_bot_token - self.input_queue = input_queue - self.stop_event = stop_event - self.max_file_size = max_file_size - self.max_total_file_size = max_total_file_size - self.listen_to_channels = listen_to_channels - self.send_history_on_join = send_history_on_join - self.acknowledgement_message = acknowledgement_message - self.register_handlers() - - def run(self): - SocketModeHandler(self.app, self.slack_app_token).connect() - self.stop_event.wait() - - def handle_channel_event(self, event): - # For now, just do the normal handling - channel_name = self.get_channel_name(event.get("channel")) - event["channel_name"] = channel_name - - self.handle_event(event) - - def handle_group_event(self, _event): - log.info("Received a private group event. Ignoring.") - - def handle_event(self, event): - files = [] - total_file_size = 0 - if "files" in event: - for file in event["files"]: - file_url = file["url_private"] - file_name = file["name"] - size = file["size"] - total_file_size += size - if size > self.max_file_size * 1024 * 1024: - log.warning( - "File %s is too large to download. Skipping download.", - file_name, - ) - continue - if total_file_size > self.max_total_file_size * 1024 * 1024: - log.warning( - "Total file size exceeds the maximum limit. Skipping download." - ) - break - b64_file = self.download_file_as_base64_string(file_url) - files.append( - { - "name": file_name, - "content": b64_file, - "mime_type": file["mimetype"], - "filetype": file["filetype"], - "size": size, - } - ) - - team_domain = None - try: - permalink = self.app.client.chat_getPermalink( - channel=event["channel"], message_ts=event["event_ts"] - ) - team_domain = permalink.get("permalink", "").split("//")[1] - team_domain = team_domain.split(".")[0] - except Exception as e: - log.error("Error getting team domain: %s", e) - - user_email = self.get_user_email(event["user"]) - (text, mention_emails) = self.process_text_for_mentions(event["text"]) - payload = { - "text": text, - "files": files, - "user_email": user_email, - "team_id": event.get("team"), - "team_domain": team_domain, - "mentions": mention_emails, - "type": event.get("type"), - "client_msg_id": event.get("client_msg_id"), - "ts": event.get("thread_ts"), - "channel": event.get("channel"), - "channel_name": event.get("channel_name", ""), - "subtype": event.get("subtype"), - "event_ts": event.get("event_ts"), - "channel_type": event.get("channel_type"), - "user_id": event.get("user"), - } - user_properties = { - "user_email": user_email, - "team_id": event.get("team"), - "type": event.get("type"), - "client_msg_id": event.get("client_msg_id"), - "ts": event.get("thread_ts"), - "channel": event.get("channel"), - "subtype": event.get("subtype"), - "event_ts": event.get("event_ts"), - "channel_type": event.get("channel_type"), - "user_id": event.get("user"), - } - - if self.acknowledgement_message: - ack_msg_ts = self.app.client.chat_postMessage( - channel=event["channel"], - text=self.acknowledgement_message, - thread_ts=event.get("thread_ts"), - ).get("ts") - user_properties["ack_msg_ts"] = ack_msg_ts - - message = Message(payload=payload, user_properties=user_properties) - message.set_previous(payload) - self.input_queue.put(message) - - def download_file_as_base64_string(self, file_url): - headers = {"Authorization": "Bearer " + self.slack_bot_token} - response = requests.get(file_url, headers=headers, timeout=10) - base64_string = base64.b64encode(response.content).decode("utf-8") - return base64_string - - def get_user_email(self, user_id): - response = self.app.client.users_info(user=user_id) - return response["user"]["profile"].get("email", user_id) - - def process_text_for_mentions(self, text): - mention_emails = [] - for mention in text.split("<@"): - if mention.startswith("!"): - mention = mention[1:] - if mention.startswith("U"): - user_id = mention.split(">")[0] - response = self.app.client.users_info(user=user_id) - profile = response.get("user", {}).get("profile") - if profile: - replacement = profile.get( - "email", "<@" + profile.get("real_name_normalized") + ">" - ) - mention_emails.append(replacement) - text = text.replace( - f"<@{user_id}>", - replacement, - ) - return text, mention_emails - - def get_channel_name(self, channel_id): - response = self.app.client.conversations_info(channel=channel_id) - return response["channel"].get("name") - - def get_channel_history(self, channel_id, team_id): - response = self.app.client.conversations_history(channel=channel_id) - - # First search through messages to get all their replies - messages_to_add = [] - for message in response["messages"]: - if "subtype" not in message and "text" in message: - if "reply_count" in message: - # Get the replies - replies = self.app.client.conversations_replies( - channel=channel_id, ts=message.get("ts") - ) - messages_to_add.extend(replies["messages"]) - - response["messages"].extend(messages_to_add) - - # Go through the messages and remove any that have a sub_type - messages = [] - emails = {} - for message in response["messages"]: - if "subtype" not in message and "text" in message: - if message.get("user") not in emails: - emails[message.get("user")] = self.get_user_email( - message.get("user") - ) - payload = { - "text": message.get("text"), - "team_id": team_id, - "user_email": emails[message.get("user")], - "mentions": [], - "type": message.get("type"), - "client_msg_id": message.get("client_msg_id") or message.get("ts"), - "ts": message.get("ts"), - "event_ts": message.get("event_ts") or message.get("ts"), - "channel": channel_id, - "subtype": message.get("subtype"), - "user_id": message.get("user"), - "message_id": message.get("client_msg_id"), - } - messages.append(payload) - - return messages - - def handle_new_channel_join(self, event): - """We have been added to a new channel. This will get all the history and send it to the input queue.""" - history = self.get_channel_history(event.get("channel"), event.get("team")) - payload = { - "text": "New channel joined", - "user_email": "", - "mentions": [], - "type": "channel_join", - "client_msg_id": "", - "ts": "", - "channel": event.get("channel"), - "subtype": "channel_join", - "event_ts": "", - "channel_type": "channel", - "channel_name": self.get_channel_name(event.get("channel")), - "user_id": "", - "history": history, - } - user_properties = { - "type": "channel_join", - "channel": event.get("channel"), - "subtype": "channel_join", - "channel_type": "channel", - } - message = Message(payload=payload, user_properties=user_properties) - message.set_previous(payload) - self.input_queue.put(message) - - def register_handlers(self): - @self.app.event("message") - def handle_chat_message(event): - print("Got message event: ", event, event.get("channel_type")) - if event.get("channel_type") == "im": - self.handle_event(event) - elif event.get("channel_type") == "channel": - self.handle_channel_event(event) - elif event.get("channel_type") == "group": - self.handle_group_event(event) - - @self.app.event("app_mention") - def handle_app_mention(event): - print("Got app_mention event: ", event) - event["channel_type"] = "im" - event["channel_name"] = self.get_channel_name(event.get("channel")) - self.handle_event(event) - - @self.app.event("member_joined_channel") - def handle_member_joined_channel(event, _say, context): - if ( - self.send_history_on_join - and event.get("user") == context["bot_user_id"] - ): - self.handle_new_channel_join(event) diff --git a/src/solace_ai_connector/components/inputs_outputs/slack_output.py b/src/solace_ai_connector/components/inputs_outputs/slack_output.py deleted file mode 100644 index 2263f644..00000000 --- a/src/solace_ai_connector/components/inputs_outputs/slack_output.py +++ /dev/null @@ -1,176 +0,0 @@ -import base64 - - -from .slack_base import SlackBase -from ...common.log import log - - -info = { - "class_name": "SlackOutput", - "description": ( - "Slack output component. The component sends messages to Slack channels using the Bolt API." - ), - "config_parameters": [ - { - "name": "slack_bot_token", - "type": "string", - "description": "The Slack bot token to connect to Slack.", - }, - { - "name": "slack_app_token", - "type": "string", - "description": "The Slack app token to connect to Slack.", - }, - { - "name": "share_slack_connection", - "type": "string", - "description": "Share the Slack connection with other components in this instance.", - }, - ], - "input_schema": { - "type": "object", - "properties": { - "message_info": { - "type": "object", - "properties": { - "channel": { - "type": "string", - }, - "type": { - "type": "string", - }, - "user_email": { - "type": "string", - }, - "client_msg_id": { - "type": "string", - }, - "ts": { - "type": "string", - }, - "subtype": { - "type": "string", - }, - "event_ts": { - "type": "string", - }, - "channel_type": { - "type": "string", - }, - "user_id": { - "type": "string", - }, - "session_id": { - "type": "string", - }, - }, - "required": ["channel", "session_id"], - }, - "content": { - "type": "object", - "properties": { - "text": { - "type": "string", - }, - "files": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - }, - "content": { - "type": "string", - }, - "mime_type": { - "type": "string", - }, - "filetype": { - "type": "string", - }, - "size": { - "type": "number", - }, - }, - }, - }, - }, - }, - }, - "required": ["message_info", "content"], - }, -} - - -class SlackOutput(SlackBase): - def __init__(self, **kwargs): - super().__init__(info, **kwargs) - - def invoke(self, message, data): - message_info = data.get("message_info") - content = data.get("content") - text = content.get("text") - stream = content.get("stream") - channel = message_info.get("channel") - thread_ts = message_info.get("ts") - ack_msg_ts = message_info.get("ack_msg_ts") - - return { - "channel": channel, - "text": text, - "files": content.get("files"), - "thread_ts": thread_ts, - "ack_msg_ts": ack_msg_ts, - "stream": stream, - } - - def send_message(self, message): - try: - channel = message.get_data("previous:channel") - messages = message.get_data("previous:text") - stream = message.get_data("previous:stream") - files = message.get_data("previous:files") or [] - thread_ts = message.get_data("previous:ts") - ack_msg_ts = message.get_data("previous:ack_msg_ts") - - if not isinstance(messages, list): - if messages is not None: - messages = [messages] - else: - messages = [] - - for text in messages: - if stream: - if ack_msg_ts: - try: - self.app.client.chat_update( - channel=channel, ts=ack_msg_ts, text=text - ) - except Exception: - # It is normal to possibly get an update after the final message has already - # arrived and deleted the ack message - pass - else: - self.app.client.chat_postMessage( - channel=channel, text=text, thread_ts=thread_ts - ) - - for file in files: - file_content = base64.b64decode(file["content"]) - self.app.client.files_upload_v2( - channel=channel, - file=file_content, - thread_ts=thread_ts, - filename=file["name"], - ) - except Exception as e: - log.error("Error sending slack message: %s", e) - - super().send_message(message) - - try: - if ack_msg_ts and not stream: - self.app.client.chat_delete(channel=channel, ts=ack_msg_ts) - except Exception: - pass diff --git a/src/solace_ai_connector/flow/flow.py b/src/solace_ai_connector/flow/flow.py index 20a0a3a7..bd61badf 100644 --- a/src/solace_ai_connector/flow/flow.py +++ b/src/solace_ai_connector/flow/flow.py @@ -55,12 +55,13 @@ def create_components(self): def create_component_group(self, component, index): component_module = component.get("component_module", "") base_path = component.get("component_base_path", None) + component_package = component.get("component_package", None) num_instances = component.get("num_instances", 1) # component_config = component.get("component_config", {}) # component_name = component.get("component_name", "") # imported_module = import_from_directories(component_module) - imported_module = import_module(component_module, base_path) + imported_module = import_module(component_module, base_path, component_package) try: self.module_info = getattr(imported_module, "info") diff --git a/src/solace_ai_connector/tools/gen_component_docs.py b/src/solace_ai_connector/tools/gen_component_docs.py index e8bd0be9..98610885 100644 --- a/src/solace_ai_connector/tools/gen_component_docs.py +++ b/src/solace_ai_connector/tools/gen_component_docs.py @@ -32,9 +32,13 @@ def find_info_dicts(directory): "solace_ai_connector/", file, ) - module_name = module_name.replace("/", ".") - if module_name.endswith(".py"): - module_name = module_name[:-3] + else: + # This does assume that the plugin is conforming to + # the standard directory structure + module_name = re.sub(r"src/", "", file) + + module_name = re.sub(r".py$", "", module_name) + module_name = re.sub(r"/", ".", module_name) spec = importlib.util.spec_from_file_location(module_name, file) module = importlib.util.module_from_spec(spec) @@ -247,7 +251,7 @@ def format_json_schema( field_list, level + 1, "", - prop_path + f"{prop_name}", + prop_path + f".{prop_name}" if prop_path else prop_name, ) # If not the last property, add a comma if prop_name != list(schema_dict["properties"].keys())[-1]: @@ -368,11 +372,13 @@ def schema_as_human_readable_string(schema): else: return schema["type"] + def print_usage(): # Get the basename of the script (remove dirs) name = os.path.basename(sys.argv[0]) print(f"Usage: {name} [base_directory]") + def main(): # Get a base directory from the command line if len(sys.argv) > 1: diff --git a/tests/test_invoke.py b/tests/test_invoke.py index 80e2ceaa..e62d0303 100644 --- a/tests/test_invoke.py +++ b/tests/test_invoke.py @@ -339,7 +339,7 @@ def test_resolve_config_values(test): # Test the resolve_config_values function with a missing module def test_resolve_config_values_missing_module(): - with pytest.raises(ImportError, match="Could not import module 'missing_module'"): + with pytest.raises(ImportError, match="Module 'missing_module' not found"): resolve_config_values( { "a": { From 9cbba1de07ed1cc9b63c2da47227f9d5a979165b Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Tue, 18 Jun 2024 12:52:37 -0400 Subject: [PATCH 11/18] chore: Update component_base.py to include flow_lock_manager and flow_kv_store This commit updates the component_base.py file to include the flow_lock_manager and flow_kv_store attributes. These attributes are necessary for components to access the flow-level lock manager and key-value store. By adding these attributes, components can now utilize the lock manager to synchronize access to shared resources and the key-value store to store and retrieve data across multiple components within the flow. This change improves the flexibility and functionality of the component_base.py file. Changed the default location of the trust_store for the Solace API to be provided by the certifi module. Added a configuration item for the ChatModel with History component to be able to limit the size of entries being added to the chat history --- .../langchain_chat_model_with_history.md | 2 ++ ...langchain_vector_store_embedding_search.md | 6 ++-- .../common/messaging/solace_messaging.py | 2 ++ .../components/component_base.py | 11 ++++++ .../langchain_chat_model_with_history.py | 26 ++++++++++++++ src/solace_ai_connector/flow/flow.py | 34 +++++++++++++++++++ 6 files changed, 78 insertions(+), 3 deletions(-) diff --git a/docs/components/langchain_chat_model_with_history.md b/docs/components/langchain_chat_model_with_history.md index 5f828b9d..04ef74b1 100644 --- a/docs/components/langchain_chat_model_with_history.md +++ b/docs/components/langchain_chat_model_with_history.md @@ -13,6 +13,7 @@ component_config: langchain_component_config: llm_response_format: history_max_turns: + history_max_message_size: history_max_tokens: history_module: history_class: @@ -29,6 +30,7 @@ component_config: | langchain_component_config | True | | Model specific configuration for the chat model. See documentation for valid parameter names. | | llm_response_format | False | | The response format for this LLM request. This can be 'json', 'yaml', or 'text'. If set to 'json' or 'yaml', the response will be parsed by the appropriate parser and the fields will be available in the response object. If set to 'text', the response will be returned as a string. | | history_max_turns | False | 20 | The maximum number of turns to keep in the history. If not set, the history will be limited to 20 turns. | +| history_max_message_size | False | 1000 | The maximum amount of characters to keep in a single message in the history. | | history_max_tokens | False | 8000 | The maximum number of tokens to keep in the history. If not set, the history will be limited to 8000 tokens. | | history_module | False | langchain_community.chat_message_histories | The module that contains the history class. Default: 'langchain_community.chat_message_histories' | | history_class | False | ChatMessageHistory | The class to use for the history. Default: 'ChatMessageHistory' | diff --git a/docs/components/langchain_vector_store_embedding_search.md b/docs/components/langchain_vector_store_embedding_search.md index 76ae4e5a..6e24d839 100644 --- a/docs/components/langchain_vector_store_embedding_search.md +++ b/docs/components/langchain_vector_store_embedding_search.md @@ -66,6 +66,6 @@ component_config: | --- | --- | --- | | results | True | | | results.matches | False | | -| resultsmatches[].text | True | | -| resultsmatches[].metadata | False | | -| resultsmatches[].score | False | | +| results.matches[].text | True | | +| results.matches[].metadata | False | | +| results.matches[].score | False | | diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index 601298cc..d8515276 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -2,6 +2,7 @@ import logging import os +import certifi from solace.messaging.messaging_service import ( MessagingService, @@ -130,6 +131,7 @@ def connect(self): "trust_store_path" ) or os.environ.get("TRUST_STORE") + or certifi.where() or "/usr/share/ca-certificates/mozilla/", } # print (f"Broker Properties: {self.broker_properties}") diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 4bbaf22b..aef1aa36 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -23,6 +23,8 @@ def __init__(self, module_info, **kwargs): self.config = kwargs.pop("config", {}) self.index = kwargs.pop("index", None) self.flow_name = kwargs.pop("flow_name", None) + self.flow_lock_manager = kwargs.pop("flow_lock_manager", None) + self.flow_kv_store = kwargs.pop("flow_kv_store", None) self.stop_signal = kwargs.pop("stop_signal", None) self.sibling_component = kwargs.pop("sibling_component", None) self.component_index = kwargs.pop("component_index", None) @@ -283,6 +285,15 @@ def set_queue_timeout(self, timeout_ms): def get_default_queue_timeout(self): return DEFAULT_QUEUE_TIMEOUT_MS + def get_lock(self, lock_name): + return self.flow_lock_manager.get_lock(lock_name) + + def kv_store_get(self, key): + return self.flow_kv_store.get(key) + + def kv_store_set(self, key, value): + self.flow_kv_store.set(key, value) + def setup_communications(self): self.queue_timeout_ms = None # pylint: disable=assignment-from-none self.queue_max_depth = self.config.get( diff --git a/src/solace_ai_connector/components/general/langchain/langchain_chat_model_with_history.py b/src/solace_ai_connector/components/general/langchain/langchain_chat_model_with_history.py index 62f3e1dd..bce75528 100644 --- a/src/solace_ai_connector/components/general/langchain/langchain_chat_model_with_history.py +++ b/src/solace_ai_connector/components/general/langchain/langchain_chat_model_with_history.py @@ -37,6 +37,12 @@ "If not set, the history will be limited to 20 turns.", "default": 20, }, + { + "name": "history_max_message_size", + "required": False, + "description": "The maximum amount of characters to keep in a single message in the history. ", + "default": 1000, + }, { "name": "history_max_tokens", "required": False, @@ -101,6 +107,9 @@ class LangChainChatModelWithHistory(LangChainChatModelBase): def __init__(self, **kwargs): super().__init__(info, **kwargs) self.history_max_turns = self.get_config("history_max_turns", 20) + self.history_max_message_size = self.get_config( + "history_max_message_size", 1000 + ) self.history_max_tokens = self.get_config("history_max_tokens", 8000) self.stream_to_flow = self.get_config("stream_to_flow", "") self.llm_mode = self.get_config("llm_mode", "none") @@ -176,6 +185,8 @@ def invoke_model( result = namedtuple("Result", ["content"])(aggregate_result) + self.prune_large_message_from_history(session_id) + return result def send_streaming_message(self, input_message, chunk, aggregate_result): @@ -215,6 +226,21 @@ def get_history(self, session_id: str) -> BaseChatMessageHistory: ] return self._histories[session_id] + def prune_large_message_from_history(self, session_id: str): + with self._lock: + # Loop over the last 2 messages in the history and truncate if needed + if ( + session_id in self._histories + and len(self._histories[session_id].messages) > 1 + ): + last_two_messages = self._histories[session_id].messages[-2:] + for message in last_two_messages: + if len(message.content) > self.history_max_message_size: + message.content = ( + message.content[: self.history_max_message_size] + + " ...truncated..." + ) + def clear_history(self, session_id: str): with self._lock: if session_id in self._histories: diff --git a/src/solace_ai_connector/flow/flow.py b/src/solace_ai_connector/flow/flow.py index bd61badf..413be2b8 100644 --- a/src/solace_ai_connector/flow/flow.py +++ b/src/solace_ai_connector/flow/flow.py @@ -1,10 +1,40 @@ """Main class for the flow""" +import threading + # from solace_ai_connector.common.log import log from ..common.utils import import_module +class FlowLockManager: + def __init__(self): + self._lock = threading.Lock() + self.locks = {} + + def get_lock(self, lock_name): + with self._lock: + if lock_name not in self.locks: + self.locks[lock_name] = threading.Lock() + + return self.locks[lock_name] + + +class FlowKVStore: + def __init__(self): + self.store = {} + + def set(self, key, value): + self.store[key] = value + + def get(self, key): + return self.store.get(key, None) + + class Flow: + + _lock_manager = FlowLockManager() + _kv_store = FlowKVStore() + def __init__( self, flow_config, @@ -31,6 +61,8 @@ def __init__( self.connector = connector self.flow_input_queue = None self.threads = [] + self.flow_lock_manager = Flow._lock_manager + self.flow_kv_store = Flow._kv_store self.create_components() def create_components(self): @@ -81,6 +113,8 @@ def create_component_group(self, component, index): index=index, # module_info=self.module_info, flow_name=self.name, + flow_lock_manager=self.flow_lock_manager, + flow_kv_store=self.flow_kv_store, stop_signal=self.stop_signal, sibling_component=sibling_component, component_index=component_index, From fc21829719fce5c2304ba8b5cd00330cbe4e0aec Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Tue, 18 Jun 2024 16:01:39 -0400 Subject: [PATCH 12/18] chore: Update trust_store_path for Solace API This commit updates the trust_store_path for the Solace API in the solace_messaging.py file. The trust_store_path is now provided by the certifi module, which ensures that the default location of the trust store is used. This change improves the security and reliability of the Solace API integration. ``` --- src/solace_ai_connector/common/messaging/solace_messaging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index d8515276..ef1e29c5 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -131,7 +131,7 @@ def connect(self): "trust_store_path" ) or os.environ.get("TRUST_STORE") - or certifi.where() + or os.path.dirname(certifi.where()) or "/usr/share/ca-certificates/mozilla/", } # print (f"Broker Properties: {self.broker_properties}") From 85bf89abed2d50fcd3cbf4043ce3a1e7021a202d Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Mon, 8 Jul 2024 13:32:22 -0400 Subject: [PATCH 13/18] Bump up to latest Solace API and small fix in a debug log --- pyproject.toml | 2 +- requirements.txt | 2 +- .../components/inputs_outputs/broker_input.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9ead989c..ff665adf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ dependencies = [ "langchain~=0.2.3", "PyYAML~=6.0.1", "Requests~=2.32.3", - "solace_pubsubplus~=1.6.0", + "solace_pubsubplus>=1.8.0", ] diff --git a/requirements.txt b/requirements.txt index 273945fa..702aa66d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,4 @@ langchain_core~=0.2.5 langchain~=0.2.3 PyYAML~=6.0.1 Requests~=2.32.3 -solace_pubsubplus~=1.6.0 +solace_pubsubplus~=1.8.0 diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_input.py b/src/solace_ai_connector/components/inputs_outputs/broker_input.py index 40053413..a5fad8ef 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_input.py @@ -115,7 +115,7 @@ def get_next_message(self, timeout_ms=None): "Received message from broker: topic=%s, user_properties=%s, payload length=%d", topic, user_properties, - len(payload), + len(payload) if payload is not None else 0, ) return Message(payload=payload, topic=topic, user_properties=user_properties) From f140ca61e87de263a2d4f167b6738c7ba8779497 Mon Sep 17 00:00:00 2001 From: Aman Riat Date: Mon, 8 Jul 2024 13:43:54 -0400 Subject: [PATCH 14/18] DATAGO-79372: Add Publish workflow (#3) --- .github/workflows/release.yaml | 74 ++++++++++++++++++++++++++++++++++ pyproject.toml | 5 ++- 2 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/release.yaml diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 00000000..f0b98f25 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,74 @@ +name: Release +on: + workflow_dispatch: + inputs: + version: + type: choice + required: true + description: "Version bump type" + options: + - patch + - minor + - major + +jobs: + release: + name: Release + timeout-minutes: 20 + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/solace_ai_connector + permissions: + id-token: write + contents: write + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ssh-key: ${{ secrets.COMMIT_KEY }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Install hatch + run: | + pip install --upgrade pip + pip install hatch + + - name: Bump Version + run: | + CURRENT_VERSION=$(hatch version) + echo "CURRENT_VERSION=${CURRENT_VERSION}" >> $GITHUB_ENV + hatch version "${{ github.event.inputs.version }}" + NEW_VERSION=$(hatch version) + echo "NEW_VERSION=${NEW_VERSION}" >> $GITHUB_ENV + + - name: Fail if the current version doesn't exist + if: env.CURRENT_VERSION == '' + run: exit 1 + + - name: Build project for distribution + run: hatch build + + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + + - name: Create Release + uses: ncipollo/release-action@v1 + with: + artifacts: "dist/*.whl" + makeLatest: true + generateReleaseNotes: true + tag: ${{ env.CURRENT_VERSION }} + + - name: Commit new version + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + git commit -a -m "[ci skip] Bump version to $NEW_VERSION" + git push diff --git a/pyproject.toml b/pyproject.toml index ff665adf..a4443c86 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "hatchling.build" [project] name = "solace_ai_connector" -version = "0.0.1" +dynamic = ["version"] authors = [ { name="Edward Funnekotter", email="edward.funnekotter@solace.com" }, ] @@ -39,3 +39,6 @@ solace-ai-connector-gen-docs = "solace_ai_connector.tools.gen_component_docs:mai [tool.hatch.build.targets.wheel] packages = ["src/solace_ai_connector"] + +[tool.hatch.version] +path = "src/solace_ai_connector/__init__.py" From df0fd3d2ffd04e545922256721f21961098315a8 Mon Sep 17 00:00:00 2001 From: Art Morozov Date: Mon, 8 Jul 2024 13:55:59 -0400 Subject: [PATCH 15/18] DATAGO-78654 : Add CI (#4) DATAGO-78654: Add ci --- .github/workflows/ci.yml | 156 +++++++++++++++++++++++++++++ Dockerfile | 4 +- pyproject.toml | 21 +++- tests/test_message_get_set_data.py | 2 +- 4 files changed, 180 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..425048ff --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,156 @@ +name: CI +on: + push: + branches: + - main + pull_request: + types: [opened, synchronize] + +permissions: + id-token: write + checks: write + issues: read + pull-requests: write + +jobs: + test: + runs-on: ubuntu-latest + env: + HATCH_CACHE_DIR: ${{ github.workspace }}/.hatch_cache + HATCH_DATA_DIR: ${{ github.workspace }}/.hatch_data + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install Hatch + uses: pypa/hatch@install + + - name: Restore Hatch Directory + uses: actions/cache/restore@v4 + id: cache-restore + with: + path: | + ${{ env.HATCH_CACHE_DIR }} + ${{ env.HATCH_DATA_DIR }} + key: ${{ runner.os }}-hatch-${{ hashFiles('pyproject.toml','requirements.txt') }} + + - name: Install Dependencies + if: steps.cache-restore.outputs.cache-hit != 'true' + run: | + hatch python install 3.8 3.12 + + - name: Install Dependencies + if: steps.cache-restore.outputs.cache-hit != 'true' + run: | + hatch env create test + + - name: Cache Hatch Directory + uses: actions/cache/save@v4 + if: steps.cache-restore.outputs.cache-hit != 'true' + id: cache-hatch + with: + path: | + ${{ env.HATCH_CACHE_DIR }} + ${{ env.HATCH_DATA_DIR }} + key: ${{ runner.os }}-hatch-${{ hashFiles('pyproject.toml','requirements.txt') }} + + - name: Set up Docker Buildx + id: builder + uses: docker/setup-buildx-action@v3 + + - name: Prepare env file + run: | + cp .env_template .env + shell: bash + + - name: Build Docker Image + uses: docker/build-push-action@v6 + with: + push: false + tags: solace/solace-ai-connector:local + platforms: linux/amd64 + builder: ${{ steps.builder.outputs.name }} + load: true + + - name: Run Lint + continue-on-error: true + run: | + hatch run +py=312 lint:ruff check -o lint.json --output-format json ./src ./tests + shell: bash + + - name: Run Structured Tests + run: | + hatch run +py=312 test:make structure-test + shell: bash + + - name: Run Unit Tests + shell: bash + run: | + hatch test --cover --all --parallel --junitxml=junit.xml + + - name: Combine Coverage Reports + continue-on-error: true + run: | + hatch run +py=312 test:coverage combine + shell: bash + + - name: Report coverage + run: | + hatch run +py=312 test:coverage xml + shell: bash + + - name: SonarQube Scan + if: always() + uses: sonarsource/sonarqube-scan-action@v2.2.0 + env: + SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} + SONAR_HOST_URL: ${{ vars.SONAR_HOST_URL }} + with: + args: > + -Dsonar.tests=tests/ + -Dsonar.verbose=true + -Dsonar.sources=src/ + -Dsonar.projectKey=${{github.repository_owner}}_${{github.event.repository.name}} + -Dsonar.python.coverage.reportPaths=coverage.xml + -Dsonar.python.ruff.reportPaths=lint.json + + - name: SonarQube Quality Gate check + id: sonarqube-quality-gate-check + uses: sonarsource/sonarqube-quality-gate-action@master + env: + SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} + SONAR_HOST_URL: ${{ vars.SONAR_HOST_URL }} + + # Build and verify packages + - name: Build + run: hatch build + + - name: Verify Packages + run: | + ls dist/*.tar.gz | hatch run +py=312 test:xargs -n1 twine check + ls dist/*.whl | hatch run +py=312 test:xargs -n1 twine check + shell: bash + + - name: Surface failing tests + if: always() + uses: pmeier/pytest-results-action@main + with: + # A list of JUnit XML files, directories containing the former, and wildcard + # patterns to process. + # See @actions/glob for supported patterns. + path: junit.xml + + # (Optional) Add a summary of the results at the top of the report + summary: true + + # (Optional) Select which results should be included in the report. + # Follows the same syntax as `pytest -r` + display-options: fEX + + # (Optional) Fail the workflow if no JUnit XML was found. + fail-on-empty: true + + # (Optional) Title of the test results section in the workflow summary + title: Unit Test results diff --git a/Dockerfile b/Dockerfile index f18f7828..2f919ea6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,9 @@ RUN apt-get update && \ apt-get clean #Install main program -COPY . /app +COPY /src /app/src +COPY requirements.txt /app + RUN python3.10 -m pip install -r requirements.txt ENV PYTHONUNBUFFERED=1 diff --git a/pyproject.toml b/pyproject.toml index a4443c86..e3154b17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,6 @@ dependencies = [ "PyYAML~=6.0.1", "Requests~=2.32.3", "solace_pubsubplus>=1.8.0", - ] [project.urls] @@ -42,3 +41,23 @@ packages = ["src/solace_ai_connector"] [tool.hatch.version] path = "src/solace_ai_connector/__init__.py" + +[tool.hatch.envs.test] +dependencies = [ + "pytest>=8.2.2", + "coverage>=7.5.4", + "twine>=5.1.1", +] + +[tool.hatch.envs.lint] +detached = true +dependencies = [ + "ruff>=0.5.0", +] + +[tool.ruff] +lint.select = ["E4", "E7", "E9", "F"] +lint.ignore = ["F401", "E731"] + +[[tool.hatch.envs.test.matrix]] +python = ["38", "312"] diff --git a/tests/test_message_get_set_data.py b/tests/test_message_get_set_data.py index d525b486..f2258d1f 100644 --- a/tests/test_message_get_set_data.py +++ b/tests/test_message_get_set_data.py @@ -427,6 +427,6 @@ def test_get_set_user_properties(): def test_get_set_previous(): """Test getting and setting the previous data of a message""" message = Message(payload=payloads["simple"]) - assert message.get_previous() == None + assert message.get_previous() is None message.set_previous(payloads["complex"]) assert message.get_previous() == payloads["complex"] From 714e2393c5112b8f524ef776da6bef69e507ddde Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Thu, 11 Jul 2024 10:24:22 -0400 Subject: [PATCH 16/18] chore: Fix a bug in the user_processor component to properly return scalar values after processing (#5) --- .../components/general/user_processor.py | 2 +- tests/test_filter.py | 57 +++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/src/solace_ai_connector/components/general/user_processor.py b/src/solace_ai_connector/components/general/user_processor.py index 3f61e318..c70ec111 100644 --- a/src/solace_ai_connector/components/general/user_processor.py +++ b/src/solace_ai_connector/components/general/user_processor.py @@ -39,4 +39,4 @@ def invoke(self, message, data): component_processing = self.get_config("component_processing") if component_processing and callable(component_processing): return component_processing(message) - return data + return component_processing diff --git a/tests/test_filter.py b/tests/test_filter.py index 17b72f9b..aa4e6730 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -99,3 +99,60 @@ def test_missing_item_filter(): # Clean up dispose_connector(connector) + + +def test_filter_with_multi_stage_data(): + """Test the filter component with a previous stage passing on data and the filter + input_transforms copying that data into a user_data area""" + config_yaml = """ +log: + log_file_level: DEBUG + log_file: solace_ai_connector.log +flows: + - name: test_flow + components: + - component_name: user_processor + component_module: user_processor + component_config: + component_processing: + invoke: + module: invoke_functions + function: add + params: + positional: + - 5 + - 6 + - component_name: message_filter + component_module: message_filter + component_config: + filter_expression: + invoke: + module: invoke_functions + function: not_equal + params: + positional: + - 1 + - 2 + input_transforms: + - type: copy + source_expression: previous + dest_expression: user_data.output + - component_name: pass_through + component_module: pass_through + + +""" + connector, flows = create_test_flows(config_yaml) + flow = flows[0] + + # Send 1 message + message = Message(payload={"my_list": [1, 2, 3], "my_obj": {"a": 1, "b": 2}}) + send_message_to_flow(flow, message) + + # Expect a message + output_message = get_message_from_flow(flow) + assert output_message.get_data("input.payload:my_list") == [1, 2, 3] + assert output_message.get_data("user_data.output") == 11 + + # Clean up + dispose_connector(connector) From 1873ae4698c92cd4a65e30c2885546c39855ebd1 Mon Sep 17 00:00:00 2001 From: Art Morozov Date: Thu, 11 Jul 2024 13:26:45 -0400 Subject: [PATCH 17/18] Test clean up prints (#7) * Cleanup of some lingering prints and add a couple more tests * Print cleanup * Exclude coverage.py result from sonarqube --------- Co-authored-by: Edward Funnekotter Co-authored-by: Edward Funnekotter --- .github/workflows/ci.yml | 1 - .../common/messaging/messaging_builder.py | 1 - .../common/messaging/solace_messaging.py | 32 ++++---- src/solace_ai_connector/common/utils.py | 1 - .../langchain_chat_model_with_history.py | 1 - .../components/inputs_outputs/broker_input.py | 1 - test | 0 tests/test_filter.py | 76 +++++++++++++++++-- 8 files changed, 84 insertions(+), 29 deletions(-) create mode 100644 test diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 425048ff..5e37b9bb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -113,7 +113,6 @@ jobs: -Dsonar.verbose=true -Dsonar.sources=src/ -Dsonar.projectKey=${{github.repository_owner}}_${{github.event.repository.name}} - -Dsonar.python.coverage.reportPaths=coverage.xml -Dsonar.python.ruff.reportPaths=lint.json - name: SonarQube Quality Gate check diff --git a/src/solace_ai_connector/common/messaging/messaging_builder.py b/src/solace_ai_connector/common/messaging/messaging_builder.py index 3005b92d..423d2465 100644 --- a/src/solace_ai_connector/common/messaging/messaging_builder.py +++ b/src/solace_ai_connector/common/messaging/messaging_builder.py @@ -14,7 +14,6 @@ def from_properties(self, broker_properties: dict): def build(self): if self.broker_properties["broker_type"] == "solace": - print("Building Solace Messaging Service", self.broker_properties) return SolaceMessaging(self.broker_properties) raise ValueError( diff --git a/src/solace_ai_connector/common/messaging/solace_messaging.py b/src/solace_ai_connector/common/messaging/solace_messaging.py index ef1e29c5..af97469d 100644 --- a/src/solace_ai_connector/common/messaging/solace_messaging.py +++ b/src/solace_ai_connector/common/messaging/solace_messaging.py @@ -47,14 +47,10 @@ def on_message(self, message: InboundMessage): else message.get_payload_as_bytes() ) if isinstance(payload, bytearray): - print(f"Received a message of type: {type(payload)}. Decoding to string") payload = payload.decode() - topic = message.get_destination_name() - print("\n" + f"Received message on: {topic}") - print("\n" + f"Message payload: {payload} \n") + # topic = message.get_destination_name() self.receiver.ack(message) - # print("\n" + f"Message dump: {message} \n") class MessagePublishReceiptListenerImpl(MessagePublishReceiptListener): @@ -72,19 +68,16 @@ class ServiceEventHandler( ReconnectionListener, ReconnectionAttemptListener, ServiceInterruptionListener ): def on_reconnected(self, service_event: ServiceEvent): - print("\non_reconnected") - print(f"Error cause: {service_event.get_cause()}") - print(f"Message: {service_event.get_message()}") + log.debug("Reconnected to broker: %s", service_event.get_cause()) + log.debug("Message: %s", service_event.get_message()) def on_reconnecting(self, event: "ServiceEvent"): - print("\non_reconnecting") - print(f"Error cause: {event.get_cause()}") - print(f"Message: {event.get_message()}") + log.debug("Reconnecting - Error cause: %s", event.get_cause()) + log.debug("Message: %s", event.get_message()) def on_service_interrupted(self, event: "ServiceEvent"): - print("\non_service_interrupted") - print(f"Error cause: {event.get_cause()}") - print(f"Message: {event.get_message()}") + log.debug("Service interrupted - Error cause: %s", event.get_cause()) + log.debug("Message: %s", event.get_message()) def set_python_solace_log_level(level: str): @@ -112,7 +105,6 @@ def __init__(self, broker_properties: dict): # set_python_solace_log_level("DEBUG") def __del__(self): - print("DESTRUCTOR: SolaceMessaging") self.disconnect() def connect(self): @@ -196,7 +188,11 @@ def bind_to_queue(self, queue_name: str, subscriptions: list = None): # Handle API exception except PubSubPlusClientError as exception: - print(f"\nMake sure queue {queue_name} exists on broker!", exception) + log.warning( + "Error creating persistent receiver for queue [%s], %s", + queue_name, + exception, + ) # Add to list of receivers self.persistent_receivers.append(self.persistent_receiver) @@ -206,7 +202,7 @@ def bind_to_queue(self, queue_name: str, subscriptions: list = None): for subscription in subscriptions: sub = TopicSubscription.of(subscription.get("topic")) self.persistent_receiver.add_subscription(sub) - print(f"Subscribed to topic: {subscription}") + log.debug("Subscribed to topic: %s", subscription) return self.persistent_receiver @@ -214,7 +210,7 @@ def disconnect(self): try: self.messaging_service.disconnect() except Exception as exception: # pylint: disable=broad-except - print(f"Error disconnecting: {exception}") + log.debug("Error disconnecting: %s", exception) def is_connected(self): return self.messaging_service.is_connected() diff --git a/src/solace_ai_connector/common/utils.py b/src/solace_ai_connector/common/utils.py index 22a8e79f..596f2741 100644 --- a/src/solace_ai_connector/common/utils.py +++ b/src/solace_ai_connector/common/utils.py @@ -25,7 +25,6 @@ def import_from_directories(module_name, base_path=None): if "." in module_name: module_file = module_name.replace(".", os.sep) module_path = os.path.join(directory, module_file + ".py") - # print(f"module_path: {module_path}") if os.path.exists(module_path): try: # if module_path.startswith("src/solace_ai_connector"): diff --git a/src/solace_ai_connector/components/general/langchain/langchain_chat_model_with_history.py b/src/solace_ai_connector/components/general/langchain/langchain_chat_model_with_history.py index bce75528..02ea5962 100644 --- a/src/solace_ai_connector/components/general/langchain/langchain_chat_model_with_history.py +++ b/src/solace_ai_connector/components/general/langchain/langchain_chat_model_with_history.py @@ -167,7 +167,6 @@ def invoke_model( "configurable": {"session_id": session_id}, }, ): - # print(f"Streaming chunk: {chunk.content}") aggregate_result += chunk.content current_batch += chunk.content if len(current_batch.split()) >= self.stream_batch_size: diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_input.py b/src/solace_ai_connector/components/inputs_outputs/broker_input.py index a5fad8ef..10d61c38 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_input.py @@ -137,7 +137,6 @@ def decode_payload(self, payload): return payload def acknowledge_message(self, broker_message): - # print("Acknowledging message") self.messaging_service.ack_message(broker_message) def get_acknowledgement_callback(self): diff --git a/test b/test new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_filter.py b/tests/test_filter.py index aa4e6730..651ee9da 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -142,7 +142,7 @@ def test_filter_with_multi_stage_data(): """ - connector, flows = create_test_flows(config_yaml) + connector, flows = create_test_flows(config_yaml, queue_timeout=1) flow = flows[0] # Send 1 message @@ -150,9 +150,73 @@ def test_filter_with_multi_stage_data(): send_message_to_flow(flow, message) # Expect a message - output_message = get_message_from_flow(flow) - assert output_message.get_data("input.payload:my_list") == [1, 2, 3] - assert output_message.get_data("user_data.output") == 11 + try: + output_message = get_message_from_flow(flow) + assert output_message.get_data("input.payload:my_list") == [1, 2, 3] + assert output_message.get_data("user_data.output") == 11 + finally: + # Clean up + dispose_connector(connector) + + +def test_filter_with_multi_stage_data_with_timer_input(): + """Test the filter component with a previous stage passing on data and the filter + input_transforms copying that data into a user_data area - this time with a timer causing the message to be sent + """ + config_yaml = """ +log: + log_file_level: DEBUG + log_file: solace_ai_connector.log +trace: + trace_file: solace_ai_connector.trace +flows: + - name: test_flow + components: + - component_name: timer_input + component_module: timer_input + component_config: + interval_ms: 500 + skip_messages_if_behind: false + - component_name: user_processor + component_module: user_processor + component_config: + component_processing: + invoke: + module: invoke_functions + function: add + params: + positional: + - 5 + - 6 + - component_name: message_filter + component_module: message_filter + component_config: + filter_expression: + invoke: + module: invoke_functions + function: not_equal + params: + positional: + - 1 + - 2 + input_transforms: + - type: copy + source_expression: previous + dest_expression: user_data.output + - component_name: pass_through + component_module: pass_through + + +""" + connector, flows = create_test_flows(config_yaml, queue_timeout=3) + flow = flows[0] + + try: + # Get the output messages (should be at least 3 seconds worth) + for _ in range(3): + msg = get_message_from_flow(flow) + assert msg.get_data("user_data.output") == 11 + finally: + # Clean up + dispose_connector(connector) - # Clean up - dispose_connector(connector) From c1a5ba8c1192bf597dc667961869923ca2aa0212 Mon Sep 17 00:00:00 2001 From: Art Morozov Date: Fri, 12 Jul 2024 11:08:51 -0400 Subject: [PATCH 18/18] Add coverage CI workflow --- .github/workflows/ci.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5e37b9bb..c69ce975 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -102,7 +102,7 @@ jobs: shell: bash - name: SonarQube Scan - if: always() + if: always() && github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository uses: sonarsource/sonarqube-scan-action@v2.2.0 env: SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} @@ -113,10 +113,12 @@ jobs: -Dsonar.verbose=true -Dsonar.sources=src/ -Dsonar.projectKey=${{github.repository_owner}}_${{github.event.repository.name}} + -Dsonar.python.coverage.reportPaths=coverage.xml -Dsonar.python.ruff.reportPaths=lint.json - name: SonarQube Quality Gate check id: sonarqube-quality-gate-check + if: always() && github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository uses: sonarsource/sonarqube-quality-gate-action@master env: SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}