From 67e01f9363c509183ba5d3f29af662062c0e7406 Mon Sep 17 00:00:00 2001 From: Cyrus Mobini <68962752+cyrus2281@users.noreply.github.com> Date: Tue, 27 Aug 2024 14:04:33 -0400 Subject: [PATCH 01/26] Examples Update + Code Refactor (#25) * Removed StorageManager * Added examples for OpenAI, Bedrock, Anthropic, and VertexAI * Updating old examples (1/2) * Updating old examples (2/2) --- examples/ack_test.yaml | 7 ++ examples/anthropic_bedrock.yaml | 33 +++--- examples/chat_model_with_history.yaml | 9 +- examples/error_handler.yaml | 46 ++++++-- examples/llm/anthropic_chat.yaml | 99 +++++++++++++++++ examples/llm/bedrock_anthropic_chat.yaml | 14 ++- .../langchain_openai_with_history_chat.yaml | 98 +++++++++++++++++ examples/llm/openai_chat.yaml | 70 ++++++------ examples/llm/vertexai_chat.yaml | 100 +++++++++++++++++ examples/milvus_store.yaml | 49 +++++---- examples/request_reply.yaml | 7 ++ examples/vector_store_search.yaml | 63 ++++++++--- src/solace_ai_connector/common/utils.py | 1 + .../components/__init__.py | 2 - .../components/component_base.py | 2 +- .../general/for_testing/storage_tester.py | 51 --------- .../general/langchain/langchain_chat_model.py | 3 + .../langchain_vector_store_embedding_base.py | 44 ++++++-- .../general/openai/openai_chat_model_base.py | 2 +- src/solace_ai_connector/flow/flow.py | 3 - .../solace_ai_connector.py | 3 - src/solace_ai_connector/storage/storage.py | 28 ----- .../storage/storage_file.py | 53 --------- .../storage/storage_manager.py | 42 ------- .../storage/storage_memory.py | 27 ----- src/solace_ai_connector/storage/storage_s3.py | 31 ------ tests/test_storage.py | 103 ------------------ 27 files changed, 534 insertions(+), 456 deletions(-) create mode 100644 examples/llm/anthropic_chat.yaml create mode 100644 examples/llm/langchain_openai_with_history_chat.yaml create mode 100644 examples/llm/vertexai_chat.yaml delete mode 100644 src/solace_ai_connector/components/general/for_testing/storage_tester.py delete mode 100644 src/solace_ai_connector/storage/storage.py delete mode 100644 src/solace_ai_connector/storage/storage_file.py delete mode 100644 src/solace_ai_connector/storage/storage_manager.py delete mode 100644 src/solace_ai_connector/storage/storage_memory.py delete mode 100644 src/solace_ai_connector/storage/storage_s3.py delete mode 100644 tests/test_storage.py diff --git a/examples/ack_test.yaml b/examples/ack_test.yaml index 13b8deeb..278f9f04 100644 --- a/examples/ack_test.yaml +++ b/examples/ack_test.yaml @@ -1,6 +1,13 @@ --- # Simple loopback flow # Solace -> Pass Through -> Solace +# +# required ENV variables: +# - SOLACE_BROKER_URL +# - SOLACE_BROKER_USERNAME +# - SOLACE_BROKER_PASSWORD +# - SOLACE_BROKER_VPN + log: stdout_log_level: DEBUG log_file_level: DEBUG diff --git a/examples/anthropic_bedrock.yaml b/examples/anthropic_bedrock.yaml index 517d3f6a..1f3c60a9 100644 --- a/examples/anthropic_bedrock.yaml +++ b/examples/anthropic_bedrock.yaml @@ -4,6 +4,22 @@ # sends the response back to the Solace broker # It will ask the model to write a dry joke about the input # message. It takes the entire payload of the input message +# +# Dependencies: +# pip install langchain_aws langchain_community +# +# Dependencies: +# - langchain_aws +# pip install langchain_aws +# +# required ENV variables: +# - SOLACE_BROKER_URL +# - SOLACE_BROKER_USERNAME +# - SOLACE_BROKER_PASSWORD +# - SOLACE_BROKER_VPN +# - AWS_BEDROCK_ANTHROPIC_CLAUDE_MODEL_ID + + instance_name: LLM log: stdout_log_level: DEBUG @@ -19,21 +35,6 @@ shared_config: broker_password: ${SOLACE_BROKER_PASSWORD} broker_vpn: ${SOLACE_BROKER_VPN} -# Storage -storage: - - storage_name: default - storage_type: file - storage_config: - path: app/data.json - - storage_name: backup - storage_type: aws_s3 - storage_config: - aws_access_key_id: ${AWS_ACCESS_KEY_ID} - aws_secret_access_key: ${AWS_SECRET_ACCESS_KEY} - aws_region_name: ${AWS_REGION_NAME} - bucket_name: ${AWS_BUCKET_NAME} - path: app/data.json - # List of flows flows: - name: test_flow @@ -90,7 +91,7 @@ flows: payload_format: text input_transforms: - type: copy - source_expression: user_data.temp + source_expression: previous dest_expression: user_data.output:payload - type: copy source_expression: template:response/{{text://input.topic}} diff --git a/examples/chat_model_with_history.yaml b/examples/chat_model_with_history.yaml index 080236b2..c889ea56 100644 --- a/examples/chat_model_with_history.yaml +++ b/examples/chat_model_with_history.yaml @@ -1,5 +1,12 @@ --- -# Example uses goes from STDIN to STDOUT with a chat model with history +# Example uses goes from STDIN to STDOUT with a chat model with history hosted on AWS Bedrock + +# Dependencies: +# pip install langchain_aws + +# required ENV variables: +# - AWS_DEFAULT_REGION + log: stdout_log_level: INFO log_file_level: DEBUG diff --git a/examples/error_handler.yaml b/examples/error_handler.yaml index 9e478564..416d1bcd 100644 --- a/examples/error_handler.yaml +++ b/examples/error_handler.yaml @@ -1,8 +1,21 @@ --- # This is an example configuration file that contains an # error handler flow and a test flow. The error handler flow -# will log any error messages locally and will also -# send them to a Solace broker. +# will log any error messages locally to a file and will also +# send them to a Solace broker. +# +# It will subscribe to `my/topic1` and expect an event with the payload: +# { +# "value": +# } +# If value is not a number, the error will be caught, logged to file and send back to the Solace broker. +# +# required ENV variables: +# - SOLACE_BROKER_URL +# - SOLACE_BROKER_USERNAME +# - SOLACE_BROKER_PASSWORD +# - SOLACE_BROKER_VPN + instance: name: solace_ai_connector1 log: @@ -27,12 +40,19 @@ flows: component_module: error_input component_config: - component_name: error_logger - component_module: logger - component_config: - log_level: ERROR - max_log_line_size: 1000 + component_module: file_output + input_transforms: + - type: copy + source_expression: input.payload + dest_expression: user_data.log:content + - type: copy + source_value: a + dest_expression: user_data.log:mode + - type: copy + source_value: error_log.log + dest_expression: user_data.log:file_path component_input: - source_expression: input.payload + source_expression: user_data.log - component_name: solace_sw_broker component_module: broker_output component_config: @@ -66,7 +86,7 @@ flows: - topic: my/topic1 qos: 1 payload_encoding: utf-8 - payload_format: text + payload_format: json - component_name: pass_through component_module: pass_through @@ -89,6 +109,16 @@ flows: - type: copy source_expression: input.payload dest_expression: user_data.output:payload.original_payload + - type: copy + source_expression: + invoke: + module: invoke_functions + function: power + params: + positional: + - source_expression(input.payload:value) # This will throw an error if value is not a number + - 2 + dest_expression: user_data.output:payload.valueSquared - type: copy source_expression: input.user_properties dest_expression: user_data.output:payload.user_properties diff --git a/examples/llm/anthropic_chat.yaml b/examples/llm/anthropic_chat.yaml new file mode 100644 index 00000000..5bc1c164 --- /dev/null +++ b/examples/llm/anthropic_chat.yaml @@ -0,0 +1,99 @@ +# This will create a flow like this: +# Solace -> Anthropic -> Solace +# +# It will subscribe to `demo/question` and expect an event with the payload: +# +# The input message has the following schema: +# { +# "text": "" +# } +# +# It will then send an event back to Solace with the topic: `demo/question/response` +# +# Dependencies: +# pip install -U langchain-anthropic +# +# required ENV variables: +# - ANTHROPIC_API_KEY +# - ANTHROPIC_API_ENDPOINT +# - MODEL_NAME +# - SOLACE_BROKER_URL +# - SOLACE_BROKER_USERNAME +# - SOLACE_BROKER_PASSWORD +# - SOLACE_BROKER_VPN + +--- +log: + stdout_log_level: INFO + log_file_level: DEBUG + log_file: solace_ai_connector.log + +shared_config: + - broker_config: &broker_connection + broker_type: solace + broker_url: ${SOLACE_BROKER_URL} + broker_username: ${SOLACE_BROKER_USERNAME} + broker_password: ${SOLACE_BROKER_PASSWORD} + broker_vpn: ${SOLACE_BROKER_VPN} + +# Take from Slack and publish to Solace +flows: + # Slack chat input processing + - name: Simple template to LLM + components: + # Input from a Solace broker + - component_name: solace_sw_broker + component_module: broker_input + component_config: + <<: *broker_connection + broker_queue_name: demo_question + broker_subscriptions: + - topic: demo/question + qos: 1 + payload_encoding: utf-8 + payload_format: json + + # + # Do an LLM request + # + - component_name: llm_request + component_module: langchain_chat_model + component_config: + langchain_module: langchain_anthropic + langchain_class: ChatAnthropic + langchain_component_config: + api_key: ${ANTHROPIC_API_KEY} + base_url: ${ANTHROPIC_API_ENDPOINT} + model: ${MODEL_NAME} + temperature: 0.01 + input_transforms: + - type: copy + source_expression: | + template:You are a helpful AI assistant. Please help with the user's request below: + + {{text://input.payload:text}} + + dest_expression: user_data.llm_input:messages.0.content + - type: copy + source_expression: static:user + dest_expression: user_data.llm_input:messages.0.role + component_input: + source_expression: user_data.llm_input + + # Send response back to broker + - component_name: send_response + component_module: broker_output + component_config: + <<: *broker_connection + payload_encoding: utf-8 + payload_format: json + copy_user_properties: true + input_transforms: + - type: copy + source_expression: previous + dest_expression: user_data.output:payload + - type: copy + source_expression: template:{{text://input.topic}}/response + dest_expression: user_data.output:topic + component_input: + source_expression: user_data.output diff --git a/examples/llm/bedrock_anthropic_chat.yaml b/examples/llm/bedrock_anthropic_chat.yaml index 3064a0e8..879bb03b 100644 --- a/examples/llm/bedrock_anthropic_chat.yaml +++ b/examples/llm/bedrock_anthropic_chat.yaml @@ -7,8 +7,16 @@ # # The input message has the following schema: # { -# "text": "", +# "text": "" # } +# +# Dependencies: +# pip install langchain_aws +# +# required ENV variables: +# - AWS_BEDROCK_ANTHROPIC_CLAUDE_MODEL_ID +# - AWS_BEDROCK_ANTHROPIC_CLAUDE_REGION + --- log: stdout_log_level: DEBUG @@ -35,8 +43,8 @@ flows: - component_name: llm_request component_module: langchain_chat_model component_config: - langchain_module: langchain_community.chat_models - langchain_class: BedrockChat + langchain_module: langchain_aws + langchain_class: ChatBedrock langchain_component_config: model_id: ${AWS_BEDROCK_ANTHROPIC_CLAUDE_MODEL_ID} region_name: ${AWS_BEDROCK_ANTHROPIC_CLAUDE_REGION} diff --git a/examples/llm/langchain_openai_with_history_chat.yaml b/examples/llm/langchain_openai_with_history_chat.yaml new file mode 100644 index 00000000..1440b332 --- /dev/null +++ b/examples/llm/langchain_openai_with_history_chat.yaml @@ -0,0 +1,98 @@ +# This will create a flow like this: +# Solace -> OpenAI -> Solace +# +# It will subscribe to `demo/joke/subject` and expect an event with the payload: +# +# { +# "joke": { +# "subject": "" +# } +# } +# +# It will then send an event back to Solace with the topic: `demo/joke/subject/response` +# +# Dependencies: +# pip install -U langchain_openai openai +# +# required ENV variables: +# - OPENAI_API_KEY +# - OPENAI_API_ENDPOINT - optional +# - MODEL_NAME +# - SOLACE_BROKER_URL +# - SOLACE_BROKER_USERNAME +# - SOLACE_BROKER_PASSWORD +# - SOLACE_BROKER_VPN + +--- +log: + stdout_log_level: INFO + log_file_level: DEBUG + log_file: solace_ai_connector.log + +shared_config: + - broker_config: &broker_connection + broker_type: solace + broker_url: ${SOLACE_BROKER_URL} + broker_username: ${SOLACE_BROKER_USERNAME} + broker_password: ${SOLACE_BROKER_PASSWORD} + broker_vpn: ${SOLACE_BROKER_VPN} + +# Take from Slack and publish to Solace +flows: + # Slack chat input processing + - name: Simple template to LLM + components: + # Input from a Solace broker + - component_name: solace_sw_broker + component_module: broker_input + component_config: + <<: *broker_connection + broker_queue_name: ed_demo_joke + broker_subscriptions: + - topic: demo/joke/subject + qos: 1 + payload_encoding: utf-8 + payload_format: json + + # Go to the LLM and keep history + - component_name: chat_request_llm + component_module: langchain_chat_model_with_history + component_config: + langchain_module: langchain_openai + langchain_class: ChatOpenAI + langchain_component_config: + api_key: ${OPENAI_API_KEY} + base_url: ${OPENAI_API_ENDPOINT} + model: ${MODEL_NAME} + temperature: 0.01 + history_module: langchain_core.chat_history + history_class: InMemoryChatMessageHistory + history_max_turns: 20 + history_max_length: 6000 + input_transforms: + - type: copy + source_expression: template:Write a joke about {{text://input.payload:joke.subject}} + dest_expression: user_data.input:messages.0.content + - type: copy + source_value: user + dest_expression: user_data.input:messages.0.role + component_input: + source_expression: user_data.input + + # Send response back to broker + - component_name: send_response + component_module: broker_output + component_config: + <<: *broker_connection + payload_encoding: utf-8 + payload_format: json + copy_user_properties: true + input_transforms: + - type: copy + source_expression: previous + dest_expression: user_data.output:payload + - type: copy + source_expression: template:{{text://input.topic}}/response + dest_expression: user_data.output:topic + component_input: + source_expression: user_data.output diff --git a/examples/llm/openai_chat.yaml b/examples/llm/openai_chat.yaml index 15fc937c..65e98975 100644 --- a/examples/llm/openai_chat.yaml +++ b/examples/llm/openai_chat.yaml @@ -1,15 +1,26 @@ # This will create a flow like this: # Solace -> OpenAI -> Solace # -# It will subscribe to `demo/joke/subject` and expect an event with the payload: +# It will subscribe to `demo/question` and expect an event with the payload: # +# The input message has the following schema: # { -# "joke": { -# "subject": "" -# } +# "text": "" # } # -# It will then send an event back to Solace with the topic: `demo/joke/subject/response` +# It will then send an event back to Solace with the topic: `demo/question/response` +# +# Dependencies: +# pip install -U langchain_openai openai +# +# required ENV variables: +# - OPENAI_API_KEY +# - OPENAI_API_ENDPOINT +# - MODEL_NAME +# - SOLACE_BROKER_URL +# - SOLACE_BROKER_USERNAME +# - SOLACE_BROKER_PASSWORD +# - SOLACE_BROKER_VPN --- log: @@ -27,49 +38,44 @@ shared_config: # Take from Slack and publish to Solace flows: - - # Slack chat input processing - name: Simple template to LLM components: - # Input from a Solace broker - component_name: solace_sw_broker component_module: broker_input - component_config: + component_config: <<: *broker_connection - broker_queue_name: ed_demo_joke + broker_queue_name: demo_question broker_subscriptions: - - topic: demo/joke/subject + - topic: demo/question qos: 1 payload_encoding: utf-8 payload_format: json - # Go to the LLM and keep history - - component_name: chat_request_llm - component_module: langchain_chat_model_with_history + # + # Do an LLM request + # + - component_name: llm_request + component_module: openai_chat_model component_config: - langchain_module: langchain_openai - langchain_class: ChatOpenAI - langchain_component_config: - openai_api_key: ${OPENAI_API_KEY} - openai_api_base: ${OPENAI_API_ENDPOINT} - model: ${MODEL_NAME} - temperature: 0.01 - history_module: langchain_core.chat_history - history_class: InMemoryChatMessageHistory - history_max_turns: 20 - history_max_length: 6000 + api_key: ${OPENAI_API_KEY} + base_url: ${OPENAI_API_ENDPOINT} + model: ${MODEL_NAME} + temperature: 0.01 input_transforms: - type: copy - source_expression: template:Write a joke about {{text://input.payload:joke.subject}} - dest_expression: user_data.input:messages.0.content + source_expression: | + template:You are a helpful AI assistant. Please help with the user's request below: + + {{text://input.payload:text}} + + dest_expression: user_data.llm_input:messages.0.content - type: copy - source_value: user - dest_expression: user_data.input:messages.0.role + source_expression: static:user + dest_expression: user_data.llm_input:messages.0.role component_input: - source_expression: user_data.input - + source_expression: user_data.llm_input # Send response back to broker - component_name: send_response @@ -88,5 +94,3 @@ flows: dest_expression: user_data.output:topic component_input: source_expression: user_data.output - - diff --git a/examples/llm/vertexai_chat.yaml b/examples/llm/vertexai_chat.yaml new file mode 100644 index 00000000..16201b84 --- /dev/null +++ b/examples/llm/vertexai_chat.yaml @@ -0,0 +1,100 @@ +# This will create a flow like this: +# Solace -> Google Vertex AI -> Solace +# +# It will subscribe to `demo/question` and expect an event with the payload: +# +# The input message has the following schema: +# { +# "text": "" +# } +# +# It will then send an event back to Solace with the topic: `demo/question/response` +# +# Dependencies: +# pip install -U langchain-google-vertexai +# +# required ENV variables: +# - GOOGLE_APPLICATION_CREDENTIALS: the path to a service account JSON file +# - VERTEX_REGION +# - VERTEX_API_ENDPOINT - optional +# - MODEL_NAME +# - SOLACE_BROKER_URL +# - SOLACE_BROKER_USERNAME +# - SOLACE_BROKER_PASSWORD +# - SOLACE_BROKER_VPN + +--- +log: + stdout_log_level: INFO + log_file_level: DEBUG + log_file: solace_ai_connector.log + +shared_config: + - broker_config: &broker_connection + broker_type: solace + broker_url: ${SOLACE_BROKER_URL} + broker_username: ${SOLACE_BROKER_USERNAME} + broker_password: ${SOLACE_BROKER_PASSWORD} + broker_vpn: ${SOLACE_BROKER_VPN} + +# Take from Slack and publish to Solace +flows: + # Slack chat input processing + - name: Simple template to LLM + components: + # Input from a Solace broker + - component_name: solace_sw_broker + component_module: broker_input + component_config: + <<: *broker_connection + broker_queue_name: demo_question + broker_subscriptions: + - topic: demo/question + qos: 1 + payload_encoding: utf-8 + payload_format: json + + # + # Do an LLM request + # + - component_name: llm_request + component_module: langchain_chat_model + component_config: + langchain_module: langchain_google_vertexai + langchain_class: ChatVertexAI + langchain_component_config: + base_url: ${VERTEX_API_ENDPOINT} + location: ${VERTEX_REGION} + model: ${MODEL_NAME} + temperature: 0.01 + input_transforms: + - type: copy + source_expression: | + template:You are a helpful AI assistant. Please help with the user's request below: + + {{text://input.payload:text}} + + dest_expression: user_data.llm_input:messages.0.content + - type: copy + source_expression: static:user + dest_expression: user_data.llm_input:messages.0.role + component_input: + source_expression: user_data.llm_input + + # Send response back to broker + - component_name: send_response + component_module: broker_output + component_config: + <<: *broker_connection + payload_encoding: utf-8 + payload_format: json + copy_user_properties: true + input_transforms: + - type: copy + source_expression: previous + dest_expression: user_data.output:payload + - type: copy + source_expression: template:{{text://input.topic}}/response + dest_expression: user_data.output:topic + component_input: + source_expression: user_data.output diff --git a/examples/milvus_store.yaml b/examples/milvus_store.yaml index 6cc8d105..4c6211b3 100644 --- a/examples/milvus_store.yaml +++ b/examples/milvus_store.yaml @@ -1,21 +1,24 @@ --- # Example configuration file for adding a Milvus vector store and a Cohere embedding model # The input comes from STDIN and goes to STDOUT +# +# Dependencies: +# pip install langchain_milvus pymilvus +# +# required ENV variables: +# - MILVUS_HOST +# - MILVUS_PORT +# - MILVUS_COLLECTION_NAME +# - ACCESS_KEY: AWS access key +# - SECRET_KEY: AWS secret key +# - AWS_BEDROCK_COHERE_EMBED_MODEL_ID +# - AWS_BEDROCK_COHERE_EMBED_REGION + log: stdout_log_level: DEBUG log_file_level: DEBUG log_file: solace_ai_connector.log -shared_config: - - broker_config: &broker_connection - broker_connection_share: ${SOLACE_BROKER_URL} - broker_type: solace - broker_url: ${SOLACE_BROKER_URL} - broker_username: ${SOLACE_BROKER_USERNAME} - broker_password: ${SOLACE_BROKER_PASSWORD} - broker_vpn: ${SOLACE_BROKER_VPN} - - # List of flows flows: - name: test_flow @@ -24,25 +27,24 @@ flows: # Test input from STDIN - component_name: stdin component_module: stdin_input - component_config: - component_name: milvus_cohere_embed component_module: langchain_vector_store_embedding_index component_config: - vector_store_component_path: langchain_community.vectorstores + vector_store_component_path: langchain_milvus vector_store_component_name: Milvus vector_store_component_config: - collection_name: collection_2 + auto_id: true + collection_name: ${MILVUS_COLLECTION_NAME} connection_args: host: ${MILVUS_HOST} port: ${MILVUS_PORT} - # vector_store_index_name: solace-index-3 - embedding_component_path: langchain_community.embeddings + embedding_component_path: langchain_aws embedding_component_name: BedrockEmbeddings embedding_component_config: model_id: ${AWS_BEDROCK_COHERE_EMBED_MODEL_ID} region_name: ${AWS_BEDROCK_COHERE_EMBED_REGION} - credentials_profile_name: default + credentials_profile_name: default # Profile name in ~/.aws/credentials input_transforms: - type: copy source_value: @@ -51,31 +53,30 @@ flows: function: system dest_expression: user_data.vector_input:metadata.system - type: copy - source_value: efunneko + source_value: username dest_expression: user_data.vector_input:metadata.user - type: copy - source_value: input.payload - dest_expression: user_data.vector_input:text + source_expression: input.payload:text + dest_expression: user_data.vector_input:texts component_input: source_expression: user_data.vector_input - component_name: milvus_cohere_embed_search component_module: langchain_vector_store_embedding_search component_config: - vector_store_component_path: langchain_community.vectorstores + vector_store_component_path: langchain_milvus vector_store_component_name: Milvus vector_store_component_config: - collection_name: collection_1 + collection_name: ${MILVUS_COLLECTION_NAME} connection_args: host: ${MILVUS_HOST} port: ${MILVUS_PORT} - # vector_store_index_name: solace-index-3 - embedding_component_path: langchain_community.embeddings + embedding_component_path: langchain_aws embedding_component_name: BedrockEmbeddings embedding_component_config: model_id: ${AWS_BEDROCK_COHERE_EMBED_MODEL_ID} region_name: ${AWS_BEDROCK_COHERE_EMBED_REGION} - credentials_profile_name: default + credentials_profile_name: default # Profile name in ~/.aws/credentials max_results: 5 component_input: source_expression: input.payload diff --git a/examples/request_reply.yaml b/examples/request_reply.yaml index 87d775db..97552036 100644 --- a/examples/request_reply.yaml +++ b/examples/request_reply.yaml @@ -2,6 +2,13 @@ # Example for a request-reply flow # Flow 1: stdin -> broker_request_reply -> stdout # Flow 2: broker_input -> pass_through -> broker_output +# +# required ENV variables: +# - SOLACE_BROKER_URL +# - SOLACE_BROKER_USERNAME +# - SOLACE_BROKER_PASSWORD +# - SOLACE_BROKER_VPN + log: stdout_log_level: INFO log_file_level: INFO diff --git a/examples/vector_store_search.yaml b/examples/vector_store_search.yaml index e4276171..ca4b3f8b 100644 --- a/examples/vector_store_search.yaml +++ b/examples/vector_store_search.yaml @@ -1,28 +1,40 @@ --- -# Example that uses Cohere embeddings and OpenSearch for vector store +# Example that uses Cohere embeddings and Amazon OpenSearch Service Serverless for vector store # This also shows how to use AWS credentials and AWS4Auth for OpenSearch # which involves using 'invoke' to create the required auth objects +# +# +# Follow Boto3 documentation for AWS credentials: +# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#configuration +# https://python.langchain.com/v0.2/docs/integrations/vectorstores/opensearch/#using-aoss-amazon-opensearch-service-serverless +# +# Dependencies: +# pip install -U langchain_community opensearch-py requests_aws4auth +# +# required ENV variables: +# - AWS_BEDROCK_COHERE_EMBED_MODEL_ID +# - AWS_BEDROCK_COHERE_EMBED_REGION +# - AWS_OPENSEARCH_INDEX_NAME +# - AWS_OPENSEARCH_ENDPOINT + log: stdout_log_level: DEBUG log_file_level: DEBUG log_file: solace_ai_connector.log shared_config: - - broker_config: &broker_connection - broker_connection_share: ${SOLACE_BROKER_URL} - broker_type: solace - broker_url: ${SOLACE_BROKER_URL} - broker_username: ${SOLACE_BROKER_USERNAME} - broker_password: ${SOLACE_BROKER_PASSWORD} - broker_vpn: ${SOLACE_BROKER_VPN} - - # Get AWS credentials object + # Get AWS credentials object from .aws credentials + # You can pass the ACCESS/SECRET/SESSION keys directly as ENV variables as well + # eg: aws_secret_access_key: ${AWS_SECRET_ACCESS_KEY} - aws_credentials: &aws_credentials invoke: object: invoke: module: boto3 function: Session + params: + keyword: + profile_name: default # The profile to choose from .aws/credentials function: get_credentials # Get AWS4Auth object @@ -47,42 +59,57 @@ shared_config: object: *aws_credentials attribute: token + # Create a bedrock client for use with AWS components + - bedrock_client_config: &bedrock_client_config + invoke: + module: boto3 + function: client + params: + keyword: + service_name: bedrock-runtime + region_name: ${AWS_BEDROCK_COHERE_EMBED_REGION} + aws_access_key_id: + invoke: + object: *aws_credentials + attribute: access_key + aws_secret_access_key: + invoke: + object: *aws_credentials + attribute: secret_key + # List of flows flows: - name: test_flow trace_level: DEBUG components: - # Input from a Solace broker + # Input from a standard in - component_name: stdin component_module: stdin_input - component_config: - component_name: opensearch_cohere_embed component_module: langchain_vector_store_embedding_search component_config: vector_store_component_path: langchain_community.vectorstores vector_store_component_name: OpenSearchVectorSearch + vector_store_index_name: ${AWS_OPENSEARCH_INDEX_NAME} vector_store_component_config: - index_name: ${AWS_OPENSEARCH_JIRA_INDEX_NAME} - opensearch_url: ${AWS_OPENSEARCH_JIRA_ENDPOINT} + opensearch_url: ${AWS_OPENSEARCH_ENDPOINT} connection_class: invoke: module: opensearchpy attribute: RequestsHttpConnection http_auth: *aws_4_auth_aoss timeout: 300 - vector_store_index_name: solace-index-3 - embedding_component_path: langchain_community.embeddings + embedding_component_path: langchain_aws embedding_component_name: BedrockEmbeddings embedding_component_config: + client: *bedrock_client_config model_id: ${AWS_BEDROCK_COHERE_EMBED_MODEL_ID} region_name: ${AWS_BEDROCK_COHERE_EMBED_REGION} - credentials_profile_name: default max_results: 7 component_input: source_expression: input.payload - - component_name: stdout component_module: stdout_output diff --git a/src/solace_ai_connector/common/utils.py b/src/solace_ai_connector/common/utils.py index 43770474..c04a0ca8 100644 --- a/src/solace_ai_connector/common/utils.py +++ b/src/solace_ai_connector/common/utils.py @@ -118,6 +118,7 @@ def import_module(name, base_path=None, component_package=None): ".components.general", ".components.general.for_testing", ".components.general.langchain", + ".components.general.openai", ".components.inputs_outputs", ".transforms", ".common", diff --git a/src/solace_ai_connector/components/__init__.py b/src/solace_ai_connector/components/__init__.py index a7b8043a..f373125e 100644 --- a/src/solace_ai_connector/components/__init__.py +++ b/src/solace_ai_connector/components/__init__.py @@ -22,7 +22,6 @@ need_ack_input, fail, give_ack_output, - storage_tester, ) from .general.langchain import ( @@ -46,7 +45,6 @@ from .general.for_testing.need_ack_input import NeedAckInput from .general.for_testing.fail import Fail from .general.for_testing.give_ack_output import GiveAckOutput -from .general.for_testing.storage_tester import MemoryTester from .general.pass_through import PassThrough from .general.delay import Delay from .general.iterate import Iterate diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index f7365b57..82203ee2 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -28,7 +28,6 @@ def __init__(self, module_info, **kwargs): self.component_index = kwargs.pop("component_index", None) self.error_queue = kwargs.pop("error_queue", None) self.instance_name = kwargs.pop("instance_name", None) - self.storage_manager = kwargs.pop("storage_manager", None) self.trace_queue = kwargs.pop("trace_queue", False) self.connector = kwargs.pop("connector", None) self.timer_manager = kwargs.pop("timer_manager", None) @@ -311,6 +310,7 @@ def handle_error(self, exception, event): "component_index": self.component_index, }, } + message = None if event and event.event_type == EventType.MESSAGE: message = event.data if message: diff --git a/src/solace_ai_connector/components/general/for_testing/storage_tester.py b/src/solace_ai_connector/components/general/for_testing/storage_tester.py deleted file mode 100644 index 97f1a683..00000000 --- a/src/solace_ai_connector/components/general/for_testing/storage_tester.py +++ /dev/null @@ -1,51 +0,0 @@ -"""Simple memory tester component""" - -from ...component_base import ComponentBase - - -info = { - "class_name": "MemoryTester", - "description": ("A component that will exchange a value from the memory storage"), - "config_parameters": [ - { - "name": "storage_name", - "required": True, - "description": "The name of the storage to use", - "type": "string", - }, - ], - "input_schema": { - "type": "object", - "properties": { - "test_value": { - "type": "string", - "description": "The value to store in the memory storage", - }, - }, - "required": ["test_value"], - }, - "output_schema": { - "type": "object", - "properties": {}, - }, -} - - -class MemoryTester(ComponentBase): - def __init__(self, **kwargs): - super().__init__(info, **kwargs) - - def invoke(self, message, data): - storage = self.storage_manager.get_storage_handler( - self.get_config("storage_name") - ) - storage_data = storage.get("test") - - if storage_data is None: - storage_data = {"test": "initial_value"} - - old_value = storage_data.get("test") - new_value = data.get("test_value") - - storage.put("test", {"test": new_value}) - return {"test_value": old_value} diff --git a/src/solace_ai_connector/components/general/langchain/langchain_chat_model.py b/src/solace_ai_connector/components/general/langchain/langchain_chat_model.py index a025b518..85cd194e 100644 --- a/src/solace_ai_connector/components/general/langchain/langchain_chat_model.py +++ b/src/solace_ai_connector/components/general/langchain/langchain_chat_model.py @@ -13,6 +13,9 @@ class LangChainChatModel(LangChainChatModelBase): + def __init__(self, **kwargs): + super().__init__(info, **kwargs) + def invoke_model( self, input_message, messages, session_id=None, clear_history=False ): diff --git a/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_base.py b/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_base.py index aaa6ad19..f5543ccc 100644 --- a/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_base.py +++ b/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_base.py @@ -1,5 +1,5 @@ # This is the base class for vector store embedding classes - +import inspect from .langchain_base import ( LangChainBase, ) @@ -36,14 +36,42 @@ def init(self): self.vector_store_info["path"], self.vector_store_info["name"] ) - if "index" not in self.vector_store_info["config"]: - self.vector_store_info["config"]["index"] = self.vector_store_info["index"] - self.vector_store_info["config"]["embeddings"] = self.embedding - self.vector_store_info["config"]["embedding_function"] = self.embedding + # Get the expected parameter names of the vector store class + class_init_signature = inspect.signature(vector_store_class.__init__) + class_param_names = [ + param.name + for param in class_init_signature.parameters.values() + if param.name != "self" + ] + + # index is optional - not using it if "index" or "index_name" is provided in the config + if self.vector_store_info["index"] and ( + "index" not in self.vector_store_info["config"] + or "index_name" not in self.vector_store_info["config"] + ): + # Checking if the class expects 'index' or 'index_name' as a parameter + if "index" in class_param_names: + self.vector_store_info["config"]["index"] = self.vector_store_info[ + "index" + ] + elif "index_name" in class_param_names: + self.vector_store_info["config"]["index_name"] = self.vector_store_info[ + "index" + ] + else: + # If not defined, used "index" as a parameter + self.vector_store_info["config"]["index"] = self.vector_store_info[ + "index" + ] - # index is optional - remove it from the config if it is None - if self.vector_store_info["config"]["index"] is None: - del self.vector_store_info["config"]["index"] + # Checking if the vector store uses "embedding_function" or "embeddings" as a parameter + if "embedding_function" in class_param_names: + self.vector_store_info["config"]["embedding_function"] = self.embedding + elif "embeddings" in class_param_names: + self.vector_store_info["config"]["embeddings"] = self.embedding + else: + # If not defined, used "embeddings" as a parameter + self.vector_store_info["config"]["embeddings"] = self.embedding try: self.vector_store = self.create_component( diff --git a/src/solace_ai_connector/components/general/openai/openai_chat_model_base.py b/src/solace_ai_connector/components/general/openai/openai_chat_model_base.py index 1ae3e981..6a0884b3 100644 --- a/src/solace_ai_connector/components/general/openai/openai_chat_model_base.py +++ b/src/solace_ai_connector/components/general/openai/openai_chat_model_base.py @@ -119,7 +119,7 @@ def invoke(self, message, data): response = client.chat.completions.create( messages=messages, model=self.model, temperature=self.temperature ) - return {"content": response.choices[0].message["content"]} + return {"content": response.choices[0].message.content} def invoke_stream(self, client, message, messages): response_uuid = str(uuid.uuid4()) diff --git a/src/solace_ai_connector/flow/flow.py b/src/solace_ai_connector/flow/flow.py index caa934eb..c13ea71d 100644 --- a/src/solace_ai_connector/flow/flow.py +++ b/src/solace_ai_connector/flow/flow.py @@ -42,7 +42,6 @@ def __init__( stop_signal, error_queue=None, instance_name=None, - storage_manager=None, trace_queue=None, flow_instance_index=0, connector=None, @@ -55,7 +54,6 @@ def __init__( self.stop_signal = stop_signal self.error_queue = error_queue self.instance_name = instance_name - self.storage_manager = storage_manager self.trace_queue = trace_queue self.flow_instance_index = flow_instance_index self.connector = connector @@ -120,7 +118,6 @@ def create_component_group(self, component, index): component_index=component_index, error_queue=self.error_queue, instance_name=self.instance_name, - storage_manager=self.storage_manager, trace_queue=self.trace_queue, connector=self.connector, timer_manager=self.connector.timer_manager, diff --git a/src/solace_ai_connector/solace_ai_connector.py b/src/solace_ai_connector/solace_ai_connector.py index 4952f629..83c3c2f6 100644 --- a/src/solace_ai_connector/solace_ai_connector.py +++ b/src/solace_ai_connector/solace_ai_connector.py @@ -9,7 +9,6 @@ from .common.utils import resolve_config_values from .flow.flow import Flow from .flow.timer_manager import TimerManager -from .storage.storage_manager import StorageManager from .common.event import Event, EventType from .services.cache_service import CacheService, create_storage_backend @@ -31,7 +30,6 @@ def __init__(self, config, event_handlers=None, error_queue=None): resolve_config_values(self.config) self.validate_config() self.instance_name = self.config.get("instance_name", "solace_ai_connector") - self.storage_manager = StorageManager(self.config.get("storage", [])) self.timer_manager = TimerManager(self.stop_signal) self.cache_service = self.setup_cache_service() @@ -75,7 +73,6 @@ def create_flow(self, flow: dict, index: int, flow_instance_index: int): stop_signal=self.stop_signal, error_queue=self.error_queue, instance_name=self.instance_name, - storage_manager=self.storage_manager, trace_queue=self.trace_queue, connector=self, ) diff --git a/src/solace_ai_connector/storage/storage.py b/src/solace_ai_connector/storage/storage.py deleted file mode 100644 index 835313ce..00000000 --- a/src/solace_ai_connector/storage/storage.py +++ /dev/null @@ -1,28 +0,0 @@ -"""Top level storage module for the Solace AI Event Connector. This abstracts the -actual storage implementation and provides a common interface for the rest of -the application to use.""" - -from abc import abstractmethod - - -class Storage: - """Abstract storage class for the Solace AI Event Connector.""" - - def __init__(self, config: dict): - """Initialize the storage class.""" - - @abstractmethod - def put(self, key: str, value: dict): - """Put a value into the storage.""" - - @abstractmethod - def get(self, key: str) -> dict: - """Get a value from the storage.""" - - @abstractmethod - def delete(self, key: str): - """Delete a value from the storage.""" - - @abstractmethod - def list(self) -> list: - """List all keys in the storage.""" diff --git a/src/solace_ai_connector/storage/storage_file.py b/src/solace_ai_connector/storage/storage_file.py deleted file mode 100644 index 0ffb0c55..00000000 --- a/src/solace_ai_connector/storage/storage_file.py +++ /dev/null @@ -1,53 +0,0 @@ -"""File storage implementation for the storage interface.""" - -import os -import json -from .storage import Storage - -info = { - "class_name": "StorageFile", - "description": ("File storage class for the Solace AI Event Connector."), - "config_parameters": [ - { - "name": "file", - "required": True, - "description": "The file to use for storage", - "type": "string", - }, - ], -} - - -class StorageFile(Storage): - """File storage class for the Solace AI Event Connector.""" - - def __init__(self, config: dict): - """Initialize the file storage class.""" - self.storage_file = config["file"] - self.storage = {} - if os.path.exists(self.storage_file): - with open(self.storage_file, "r", encoding="utf-8") as file: - self.storage = json.load(file) - else: - with open(self.storage_file, "w", encoding="utf-8") as file: - json.dump(self.storage, file, ensure_ascii=False) - - def put(self, key: str, value: dict): - """Put a value into the file storage as a JSON object.""" - self.storage[key] = value - with open(self.storage_file, "w", encoding="utf-8") as file: - json.dump(self.storage, file, ensure_ascii=False) - - def get(self, key: str) -> dict: - """Get a value from the file storage""" - return self.storage.get(key, None) - - def delete(self, key: str): - """Delete a value from the file storage.""" - del self.storage[key] - with open(self.storage_file, "w", encoding="utf-8") as file: - json.dump(self.storage, file, ensure_ascii=False) - - def list(self) -> list: - """List all keys in the file storage.""" - return list(self.storage.keys()) diff --git a/src/solace_ai_connector/storage/storage_manager.py b/src/solace_ai_connector/storage/storage_manager.py deleted file mode 100644 index 8d47b193..00000000 --- a/src/solace_ai_connector/storage/storage_manager.py +++ /dev/null @@ -1,42 +0,0 @@ -"""Create and hold the storage handlers""" - -from .storage import Storage -from .storage_file import StorageFile -from .storage_memory import StorageMemory -from .storage_s3 import StorageS3 - - -class StorageManager: - """Storage manager class for the Solace AI Event Connector.""" - - def __init__(self, storage_config: dict): - """Initialize the storage manager class.""" - self.storage_handlers = {} - self.create_storage_handlers(storage_config) - - def create_storage_handlers(self, storage_configs: list): - """Create the storage handlers""" - for storage in storage_configs: - storage_handler = self.create_storage_handler(storage) - self.storage_handlers[storage["name"]] = storage_handler - - def create_storage_handler(self, storage_config: dict): - """Create the storage handler""" - storage_handler = self.create_storage(storage_config) - return storage_handler - - def get_storage_handler(self, storage_name: str): - """Get the storage handler""" - return self.storage_handlers.get(storage_name) - - def create_storage(self, config: dict) -> "Storage": - """Static factory method to create a storage object of the correct type.""" - storage_config = config.get("storage_config", {}) - if config["storage_type"] == "file": - return StorageFile(storage_config) - elif config["storage_type"] == "memory": - return StorageMemory(storage_config) - elif config["storage_type"] == "aws_s3": - return StorageS3(storage_config) - else: - raise ValueError(f"Unsupported storage type: {config['storage_type']}") diff --git a/src/solace_ai_connector/storage/storage_memory.py b/src/solace_ai_connector/storage/storage_memory.py deleted file mode 100644 index de8cad6e..00000000 --- a/src/solace_ai_connector/storage/storage_memory.py +++ /dev/null @@ -1,27 +0,0 @@ -"""Memory storage class""" - -from .storage import Storage - - -class StorageMemory(Storage): - """Memory storage class for the Solace AI Event Connector.""" - - def __init__(self, config: dict): - """Initialize the memory storage class.""" - self.storage = {} - - def put(self, key: str, value: str): - """Put a value into the memory storage.""" - self.storage[key] = value - - def get(self, key: str) -> str: - """Get a value from the memory storage.""" - return self.storage.get(key, None) - - def delete(self, key: str): - """Delete a value from the memory storage.""" - del self.storage[key] - - def list(self) -> list: - """List all keys in the memory storage.""" - return list(self.storage.keys()) diff --git a/src/solace_ai_connector/storage/storage_s3.py b/src/solace_ai_connector/storage/storage_s3.py deleted file mode 100644 index 2b2a5828..00000000 --- a/src/solace_ai_connector/storage/storage_s3.py +++ /dev/null @@ -1,31 +0,0 @@ -"""AWS S3 storage implementation for the storage interface.""" - -import boto3 -from .storage import Storage - - -class StorageS3(Storage): - """AWS S3 storage class for the Solace AI Event Connector. The data is stored as JSON""" - - def __init__(self, config: dict): - """Initialize the AWS S3 storage class.""" - self.bucket_name = config["bucket_name"] - self.s3 = boto3.resource("s3") - - def put(self, key: str, value: dict): - """Put a value into the AWS S3 storage as a JSON object.""" - self.s3.Object(self.bucket_name, key).put(Body=value) - - def get(self, key: str) -> dict: - """Get a value from the AWS S3 storage""" - return ( - self.s3.Object(self.bucket_name, key).get()["Body"].read().decode("utf-8") - ) - - def delete(self, key: str): - """Delete a value from the AWS S3 storage.""" - self.s3.Object(self.bucket_name, key).delete() - - def list(self) -> list: - """List all keys in the AWS S3 storage.""" - return [obj.key for obj in self.s3.Bucket(self.bucket_name).objects.all()] diff --git a/tests/test_storage.py b/tests/test_storage.py deleted file mode 100644 index 174c0113..00000000 --- a/tests/test_storage.py +++ /dev/null @@ -1,103 +0,0 @@ -"""This file contains tests for for memory and file storage""" - -import sys -import os - -# import queue - -sys.path.append("src") - -from utils_for_test_files import ( # pylint: disable=wrong-import-position - create_test_flows, - # create_and_run_component, - dispose_connector, - send_message_to_flow, - get_message_from_flow, -) -from solace_ai_connector.common.message import ( # pylint: disable=wrong-import-position - Message, -) - - -def test_memory_storage(): - """Test the memory storage""" - # Create a simple configuration - config_yaml = """ -instance_name: test_instance -log: - log_file_level: DEBUG - log_file: solace_ai_connector.log -storage: - - name: memory - storage_type: memory -flows: - # This will fail with the specified error - - name: flow - components: - - component_name: storage_tester - component_module: storage_tester - component_config: - storage_name: memory - component_input: - source_expression: input.payload - -""" - connector, flows = create_test_flows(config_yaml) - flow = flows[0] - - # Send a message to the input flow - send_message_to_flow(flow, Message(payload={"test_value": "second_value"})) - output_message = get_message_from_flow(flow) - assert output_message.get_data("previous") == {"test_value": "initial_value"} - - send_message_to_flow(flow, Message(payload={"test_value": "third_value"})) - output_message = get_message_from_flow(flow) - assert output_message.get_data("previous") == {"test_value": "second_value"} - - dispose_connector(connector) - - -def test_file_storage(): - """Test the file storage""" - # Create a simple configuration - config_yaml = """ -instance_name: test_instance -log: - log_file_level: DEBUG - log_file: solace_ai_connector.log -storage: - - name: file - storage_type: file - storage_config: - file: test_storage.json -flows: - # This will fail with the specified error - - name: flow - components: - - component_name: storage_tester - component_module: storage_tester - component_config: - storage_name: file - component_input: - source_expression: input.payload - -""" - # If the file exists, delete it - if os.path.exists("test_storage.json"): - os.remove("test_storage.json") - - connector, flows = create_test_flows(config_yaml) - flow = flows[0] - - # Send a message to the input flow - send_message_to_flow(flow, Message(payload={"test_value": "second_value"})) - output_message = get_message_from_flow(flow) - assert output_message.get_data("previous") == {"test_value": "initial_value"} - - send_message_to_flow(flow, Message(payload={"test_value": "third_value"})) - output_message = get_message_from_flow(flow) - assert output_message.get_data("previous") == {"test_value": "second_value"} - - dispose_connector(connector) - - os.remove("test_storage.json") From 31adf198e8d1a3b4138ee0eaa2b77c509fb5c7d7 Mon Sep 17 00:00:00 2001 From: Cyrus Mobini <68962752+cyrus2281@users.noreply.github.com> Date: Wed, 28 Aug 2024 08:05:10 -0400 Subject: [PATCH 02/26] Added support for temporary queue + UUID queue name (#26) --- .../components/inputs_outputs/broker_base.py | 4 ++++ .../components/inputs_outputs/broker_input.py | 13 +++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_base.py b/src/solace_ai_connector/components/inputs_outputs/broker_base.py index df4bb5e9..157177b1 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_base.py @@ -11,6 +11,7 @@ from ..component_base import ComponentBase from ...common.message import Message from ...common.messaging.messaging_builder import MessagingServiceBuilder +import uuid # TBD - at the moment, there is no connection sharing supported. It should be possible # to share a connection between multiple components and even flows. The changes @@ -138,3 +139,6 @@ def get_acknowledgement_callback(self): def start(self): pass + + def generate_uuid(self): + return str(uuid.uuid4()) diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_input.py b/src/solace_ai_connector/components/inputs_outputs/broker_input.py index 507702a0..841c8ee5 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_input.py @@ -38,13 +38,13 @@ }, { "name": "broker_queue_name", - "required": True, - "description": "Queue name for broker", + "required": False, + "description": "Queue name for broker, if not provided it will use a temporary queue", }, { "name": "temporary_queue", "required": False, - "description": "Whether to create a temporary queue that will be deleted after disconnection", + "description": "Whether to create a temporary queue that will be deleted after disconnection, defaulted to True if broker_queue_name is not provided", "default": False, }, { @@ -91,7 +91,12 @@ def __init__(self, **kwargs): super().__init__(info, **kwargs) self.need_acknowledgement = True self.temporary_queue = self.get_config("temporary_queue", False) - self.broker_properties["temporary_queue"] = self.temporary_queue + # If broker_queue_name is not provided, use temporary queue + if not self.get_config("broker_queue_name"): + self.temporary_queue = True + self.broker_properties["temporary_queue"] = True + # Generating a UUID for the queue name + self.broker_properties["queue_name"] = self.generate_uuid() self.connect() def invoke(self, message, data): From 3ede57a88d5eb7a215e104853640b162562c550e Mon Sep 17 00:00:00 2001 From: Cyrus Mobini <68962752+cyrus2281@users.noreply.github.com> Date: Thu, 29 Aug 2024 11:39:11 -0400 Subject: [PATCH 03/26] Add assembly component and auto-generated documents (#27) * Added the assembly component * Auto-generated documents * Added type check * Update the cache service expiry logic + Update the assembly component to use cache expiry for timeout * Moved assembly to the correct place --- docs/components/assembly.md | 50 ++++++++ docs/components/broker_input.md | 4 +- docs/components/index.md | 3 + .../langchain_chat_model_with_history.md | 2 + docs/components/openai_chat_model.md | 62 +++++++++ .../openai_chat_model_with_history.md | 68 ++++++++++ examples/assembly_inputs.yaml | 74 +++++++++++ .../langchain_openai_with_history_chat.yaml | 4 +- examples/llm/openai_chat.yaml | 4 +- .../components/component_base.py | 6 + .../components/general/assembly.py | 119 ++++++++++++++++++ .../services/cache_service.py | 38 ++++-- 12 files changed, 417 insertions(+), 17 deletions(-) create mode 100644 docs/components/assembly.md create mode 100644 docs/components/openai_chat_model.md create mode 100644 docs/components/openai_chat_model_with_history.md create mode 100644 examples/assembly_inputs.yaml create mode 100644 src/solace_ai_connector/components/general/assembly.py diff --git a/docs/components/assembly.md b/docs/components/assembly.md new file mode 100644 index 00000000..5d651925 --- /dev/null +++ b/docs/components/assembly.md @@ -0,0 +1,50 @@ +# Assembly + +Assembles messages till criteria is met, the output will be the assembled message + +## Configuration Parameters + +```yaml +component_name: +component_module: assembly +component_config: + assemble_key: + max_items: + max_time_ms: +``` + +| Parameter | Required | Default | Description | +| --- | --- | --- | --- | +| assemble_key | True | | The key from input message that would cluster the similar messages together | +| max_items | False | 10 | Maximum number of messages to assemble. Once this value is reached, the messages would be flushed to the output | +| max_time_ms | False | 10000 | The timeout in seconds to wait for the messages to assemble. If timeout is reached before the max size is reached, the messages would be flushed to the output | + + +## Component Input Schema + +``` +{ + +} +``` + + +## Component Output Schema + +``` +[ + { + payload: , + topic: , + user_properties: { + + } + }, + ... +] +``` +| Field | Required | Description | +| --- | --- | --- | +| [].payload | False | | +| [].topic | False | | +| [].user_properties | False | | diff --git a/docs/components/broker_input.md b/docs/components/broker_input.md index 51c02d06..ece65fbc 100644 --- a/docs/components/broker_input.md +++ b/docs/components/broker_input.md @@ -27,8 +27,8 @@ component_config: | broker_username | True | | Client username for broker | | broker_password | True | | Client password for broker | | broker_vpn | True | | Client VPN for broker | -| broker_queue_name | True | | Queue name for broker | -| temporary_queue | False | False | Whether to create a temporary queue that will be deleted after disconnection | +| broker_queue_name | False | | Queue name for broker, if not provided it will use a temporary queue | +| temporary_queue | False | False | Whether to create a temporary queue that will be deleted after disconnection, defaulted to True if broker_queue_name is not provided | | broker_subscriptions | True | | Subscriptions for broker | | payload_encoding | False | utf-8 | Encoding for the payload (utf-8, base64, gzip, none) | | payload_format | False | json | Format for the payload (json, yaml, text) | diff --git a/docs/components/index.md b/docs/components/index.md index 58cf8ada..5d61fbd7 100644 --- a/docs/components/index.md +++ b/docs/components/index.md @@ -3,6 +3,7 @@ | Component | Description | | --- | --- | | [aggregate](aggregate.md) | Aggregate messages into one message. | +| [assembly](assembly.md) | Assembles messages till criteria is met, the output will be the assembled message | | [broker_input](broker_input.md) | Connect to a messaging broker and receive messages from it. The component will output the payload, topic, and user properties of the message. | | [broker_output](broker_output.md) | Connect to a messaging broker and send messages to it. Note that this component requires that the data is transformed into the input schema. | | [broker_request_response](broker_request_response.md) | Connect to a messaging broker, send request messages, and receive responses. This component combines the functionality of broker_input and broker_output with additional request-response handling. | @@ -17,6 +18,8 @@ | [langchain_vector_store_embedding_index](langchain_vector_store_embedding_index.md) | Use LangChain Vector Stores to index text for later semantic searches. This will take text, run it through an embedding model and then store it in a vector database. | | [langchain_vector_store_embedding_search](langchain_vector_store_embedding_search.md) | Use LangChain Vector Stores to search a vector store with a semantic search. This will take text, run it through an embedding model with a query embedding and then find the closest matches in the store. | | [message_filter](message_filter.md) | A filtering component. This will apply a user configurable expression. If the expression evaluates to True, the message will be passed on. If the expression evaluates to False, the message will be discarded. If the message is discarded, any previous components that require an acknowledgement will be acknowledged. | +| [openai_chat_model](openai_chat_model.md) | OpenAI chat model component | +| [openai_chat_model_with_history](openai_chat_model_with_history.md) | OpenAI chat model component with conversation history | | [pass_through](pass_through.md) | What goes in comes out | | [stdin_input](stdin_input.md) | STDIN input component. The component will prompt for input, which will then be placed in the message payload using the output schema below. | | [stdout_output](stdout_output.md) | STDOUT output component | diff --git a/docs/components/langchain_chat_model_with_history.md b/docs/components/langchain_chat_model_with_history.md index 100e4298..e66e9a6a 100644 --- a/docs/components/langchain_chat_model_with_history.md +++ b/docs/components/langchain_chat_model_with_history.md @@ -15,6 +15,7 @@ component_config: history_max_turns: history_max_message_size: history_max_tokens: + history_max_time: history_module: history_class: history_config: @@ -33,6 +34,7 @@ component_config: | history_max_turns | False | 20 | The maximum number of turns to keep in the history. If not set, the history will be limited to 20 turns. | | history_max_message_size | False | 1000 | The maximum amount of characters to keep in a single message in the history. | | history_max_tokens | False | 8000 | The maximum number of tokens to keep in the history. If not set, the history will be limited to 8000 tokens. | +| history_max_time | False | None | The maximum time (in seconds) to keep messages in the history. If not set, messages will not expire based on time. | | history_module | False | langchain_community.chat_message_histories | The module that contains the history class. Default: 'langchain_community.chat_message_histories' | | history_class | False | ChatMessageHistory | The class to use for the history. Default: 'ChatMessageHistory' | | history_config | False | | The configuration for the history class. | diff --git a/docs/components/openai_chat_model.md b/docs/components/openai_chat_model.md new file mode 100644 index 00000000..fa8a506e --- /dev/null +++ b/docs/components/openai_chat_model.md @@ -0,0 +1,62 @@ +# OpenAIChatModel + +OpenAI chat model component + +## Configuration Parameters + +```yaml +component_name: +component_module: openai_chat_model +component_config: + api_key: + model: + temperature: + base_url: + stream_to_flow: + llm_mode: + stream_batch_size: + set_response_uuid_in_user_properties: +``` + +| Parameter | Required | Default | Description | +| --- | --- | --- | --- | +| api_key | True | | OpenAI API key | +| model | True | | OpenAI model to use (e.g., 'gpt-3.5-turbo') | +| temperature | False | 0.7 | Sampling temperature to use | +| base_url | False | None | Base URL for OpenAI API | +| stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. | +| llm_mode | False | none | The mode for streaming results: 'sync' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | +| stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | +| set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | + + +## Component Input Schema + +``` +{ + messages: [ + { + role: , + content: + }, + ... + ] +} +``` +| Field | Required | Description | +| --- | --- | --- | +| messages | True | | +| messages[].role | True | | +| messages[].content | True | | + + +## Component Output Schema + +``` +{ + content: +} +``` +| Field | Required | Description | +| --- | --- | --- | +| content | True | The generated response from the model | diff --git a/docs/components/openai_chat_model_with_history.md b/docs/components/openai_chat_model_with_history.md new file mode 100644 index 00000000..262bc72d --- /dev/null +++ b/docs/components/openai_chat_model_with_history.md @@ -0,0 +1,68 @@ +# OpenAIChatModelWithHistory + +OpenAI chat model component with conversation history + +## Configuration Parameters + +```yaml +component_name: +component_module: openai_chat_model_with_history +component_config: + api_key: + model: + temperature: + base_url: + stream_to_flow: + llm_mode: + stream_batch_size: + set_response_uuid_in_user_properties: + history_max_turns: + history_max_time: +``` + +| Parameter | Required | Default | Description | +| --- | --- | --- | --- | +| api_key | True | | OpenAI API key | +| model | True | | OpenAI model to use (e.g., 'gpt-3.5-turbo') | +| temperature | False | 0.7 | Sampling temperature to use | +| base_url | False | None | Base URL for OpenAI API | +| stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. | +| llm_mode | False | none | The mode for streaming results: 'sync' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. | +| stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. | +| set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. | +| history_max_turns | False | 10 | Maximum number of conversation turns to keep in history | +| history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) | + + +## Component Input Schema + +``` +{ + messages: [ + { + role: , + content: + }, + ... + ], + clear_history_but_keep_depth: +} +``` +| Field | Required | Description | +| --- | --- | --- | +| messages | True | | +| messages[].role | True | | +| messages[].content | True | | +| clear_history_but_keep_depth | False | Clear history but keep the last N messages. If 0, clear all history. If not set, do not clear history. | + + +## Component Output Schema + +``` +{ + content: +} +``` +| Field | Required | Description | +| --- | --- | --- | +| content | True | The generated response from the model | diff --git a/examples/assembly_inputs.yaml b/examples/assembly_inputs.yaml new file mode 100644 index 00000000..5a4feb43 --- /dev/null +++ b/examples/assembly_inputs.yaml @@ -0,0 +1,74 @@ +# Example for assembling inputs for a Solace AI connector +# +# It will subscribe to `demo/messages` and expect an event with the payload: +# +# The input message has the following schema: +# { +# "content": "", +# "id": "" +# } +# +# It will then send an event back to Solace with the topic: `demo/messages/assembled` +# +# +# required ENV variables: +# - SOLACE_BROKER_URL +# - SOLACE_BROKER_USERNAME +# - SOLACE_BROKER_PASSWORD +# - SOLACE_BROKER_VPN + +--- +log: + stdout_log_level: INFO + log_file_level: INFO + log_file: solace_ai_connector.log + +shared_config: + - broker_config: &broker_connection + broker_type: solace + broker_url: ${SOLACE_BROKER_URL} + broker_username: ${SOLACE_BROKER_USERNAME} + broker_password: ${SOLACE_BROKER_PASSWORD} + broker_vpn: ${SOLACE_BROKER_VPN} + + +flows: + - name: Simple assemble flow + components: + # Input from a Solace broker + - component_name: solace_input + component_module: broker_input + component_config: + <<: *broker_connection + broker_subscriptions: + - topic: demo/messages + payload_encoding: utf-8 + payload_format: json + + # Assemble messages + - component_name: assemble_messages + component_module: assembly + component_config: + assemble_key: id + max_items: 3 + max_time_ms: 10000 + component_input: + source_expression: input.payload + + # Send assembled messages back to broker + - component_name: send_response + component_module: broker_output + component_config: + <<: *broker_connection + payload_encoding: utf-8 + payload_format: json + copy_user_properties: true + input_transforms: + - type: copy + source_expression: previous + dest_expression: user_data.output:payload + - type: copy + source_expression: template:{{text://input.topic}}/assembled + dest_expression: user_data.output:topic + component_input: + source_expression: user_data.output diff --git a/examples/llm/langchain_openai_with_history_chat.yaml b/examples/llm/langchain_openai_with_history_chat.yaml index 1440b332..639567f7 100644 --- a/examples/llm/langchain_openai_with_history_chat.yaml +++ b/examples/llm/langchain_openai_with_history_chat.yaml @@ -37,9 +37,9 @@ shared_config: broker_password: ${SOLACE_BROKER_PASSWORD} broker_vpn: ${SOLACE_BROKER_VPN} -# Take from Slack and publish to Solace +# Take from input broker and publish back to Solace flows: - # Slack chat input processing + # broker input processing - name: Simple template to LLM components: # Input from a Solace broker diff --git a/examples/llm/openai_chat.yaml b/examples/llm/openai_chat.yaml index 65e98975..4c3f7538 100644 --- a/examples/llm/openai_chat.yaml +++ b/examples/llm/openai_chat.yaml @@ -36,9 +36,9 @@ shared_config: broker_password: ${SOLACE_BROKER_PASSWORD} broker_vpn: ${SOLACE_BROKER_VPN} -# Take from Slack and publish to Solace +# Take from input broker and publish back to Solace flows: - # Slack chat input processing + # broker input processing - name: Simple template to LLM components: # Input from a Solace broker diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 82203ee2..8fc62701 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -126,6 +126,8 @@ def process_event(self, event): self.current_message = None elif event.event_type == EventType.TIMER: self.handle_timer_event(event.data) + elif event.event_type == EventType.CACHE_EXPIRY: + self.handle_cache_expiry_event(event.data) else: log.warning( "%sUnknown event type: %s", self.log_identifier, event.event_type @@ -158,6 +160,10 @@ def handle_timer_event(self, timer_data): # This method can be overridden by components that need to handle timer events pass + def handle_cache_expiry_event(self, timer_data): + # This method can be overridden by components that need to handle cache expiry events + pass + def discard_current_message(self): # If the message is to be discarded, we need to acknowledge any previous components self.current_message_has_been_discarded = True diff --git a/src/solace_ai_connector/components/general/assembly.py b/src/solace_ai_connector/components/general/assembly.py new file mode 100644 index 00000000..72f69b23 --- /dev/null +++ b/src/solace_ai_connector/components/general/assembly.py @@ -0,0 +1,119 @@ +"""Assembly component for the Solace AI Event Connector""" + +from ...common.log import log +from ..component_base import ComponentBase +from ...common.message import Message + + +info = { + "class_name": "Assembly", + "description": ( + "Assembles messages till criteria is met, " + "the output will be the assembled message" + ), + "config_parameters": [ + { + "name": "assemble_key", + "required": True, + "description": "The key from input message that would cluster the similar messages together", + }, + { + "name": "max_items", + "required": False, + "default": 10, + "description": "Maximum number of messages to assemble. Once this value is reached, the messages would be flushed to the output", + }, + { + "name": "max_time_ms", + "required": False, + "default": 10000, + "description": "The timeout in seconds to wait for the messages to assemble. If timeout is reached before the max size is reached, the messages would be flushed to the output", + }, + ], + "input_schema": { + "type": "object", + "properties": {}, + "required": [], + }, + "output_schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "payload": { + "type": "string", + }, + "topic": { + "type": "string", + }, + "user_properties": { + "type": "object", + }, + }, + }, + }, +} + +# Default timeout to flush the messages +DEFAULT_FLUSH_TIMEOUT_MS = 10000 +ASSEMBLY_EXPIRY_ID = "assembly_expiry" + + +class Assembly(ComponentBase): + def __init__(self, **kwargs): + super().__init__(info, **kwargs) + self.assemble_key = self.get_config("assemble_key") + self.max_items = self.get_config("max_items") + self.max_time_ms = self.get_config("max_time_ms", DEFAULT_FLUSH_TIMEOUT_MS) + + def invoke(self, message, data): + # Check if the message has the assemble key + if self.assemble_key not in data or type(data[self.assemble_key]) is not str: + log.error( + f"Message does not have the key {self.assemble_key} or it is not a string" + ) + raise ValueError( + f"Message does not have the key {self.assemble_key} or it is not a string" + ) + + event_key = data[self.assemble_key] + # Fetch the current assembly from cache + current_assembly = self.cache_service.get_data(event_key) + + # Set expiry timeout only on cache creation (not on update) + expiry = None + # Create a new assembly if not present + if not current_assembly: + expiry = self.max_time_ms / 1000 + current_assembly = { + "list": [], + "message": Message(), + } + + # Update cache with the new data + message.combine_with_message(current_assembly["message"]) + current_assembly["message"] = message + current_assembly["list"].append(data) + self.cache_service.add_data( + event_key, + current_assembly, + expiry=expiry, + metadata=ASSEMBLY_EXPIRY_ID, + component=self, + ) + + # Flush the assembly if the max size is reached + if len(current_assembly["list"]) >= self.max_items: + log.debug(f"Flushing data by size - {len(current_assembly['list'])} items") + return self.flush_assembly(event_key)["list"] + + def handle_cache_expiry_event(self, data): + if data["metadata"] == ASSEMBLY_EXPIRY_ID: + assembled_data = data["expired_data"] + log.debug(f"Flushing data by timeout - {len(assembled_data['list'])} items") + self.process_post_invoke(assembled_data["list"], assembled_data["message"]) + + def flush_assembly(self, assemble_key): + assembly = self.cache_service.get_data(assemble_key) + self.cache_service.remove_data(assemble_key) + return assembly diff --git a/src/solace_ai_connector/services/cache_service.py b/src/solace_ai_connector/services/cache_service.py index 885ec294..19f08bbd 100644 --- a/src/solace_ai_connector/services/cache_service.py +++ b/src/solace_ai_connector/services/cache_service.py @@ -13,7 +13,7 @@ class CacheStorageBackend(ABC): @abstractmethod - def get(self, key: str) -> Any: + def get(self, key: str, include_meta=False) -> Any: pass @abstractmethod @@ -40,7 +40,7 @@ def __init__(self): self.store: Dict[str, Dict[str, Any]] = {} self.lock = Lock() - def get(self, key: str) -> Any: + def get(self, key: str, include_meta=False) -> Any: with self.lock: item = self.store.get(key) if item is None: @@ -48,7 +48,7 @@ def get(self, key: str) -> Any: if item["expiry"] and time.time() > item["expiry"]: del self.store[key] return None - return item["value"] + return item if include_meta else item["value"] def set( self, @@ -61,7 +61,7 @@ def set( with self.lock: self.store[key] = { "value": value, - "expiry": time.time() + expiry if expiry else None, + "expiry": expiry, "metadata": metadata, "component": component, } @@ -103,7 +103,7 @@ def __init__(self, connection_string: str): Base.metadata.create_all(self.engine) self.Session = sessionmaker(bind=self.engine) - def get(self, key: str) -> Any: + def get(self, key: str, include_meta=False) -> Any: session = self.Session() try: item = session.query(CacheItem).filter_by(key=key).first() @@ -113,6 +113,13 @@ def get(self, key: str) -> Any: session.delete(item) session.commit() return None + if include_meta: + return { + "value": pickle.loads(item.value), + "metadata": pickle.loads(item.item_metadata) if item.item_metadata else None, + "expiry": item.expiry, + "component": self._get_component_from_reference(item.component_reference), + } return pickle.loads(item.value), ( pickle.loads(item.item_metadata) if item.item_metadata else None ) @@ -207,6 +214,16 @@ def add_data( metadata: Optional[Dict] = None, component=None, ): + # Calculate the expiry time + expiry = time.time() + expiry if expiry else None + + # Check if the key already exists + cache = self.storage.get(key, include_meta=True) + if cache: + # Use the cache data to combine with the new data + expiry = expiry or cache["expiry"] + metadata = metadata or cache["metadata"] + component = component or cache["component"] self.storage.set(key, value, expiry, metadata, component) with self.lock: if expiry: @@ -242,23 +259,22 @@ def _check_expirations(self): # Use the storage backend to get all items all_items = self.storage.get_all() - for key, (value, metadata, expiry, component) in all_items.items(): if expiry and current_time > expiry: - expired_keys.append((key, metadata, component)) + expired_keys.append((key, metadata, component, value)) elif expiry and (next_expiry is None or expiry < next_expiry): next_expiry = expiry with self.lock: - for key, _, _ in expired_keys: + for key, _, _, _ in expired_keys: self.storage.delete(key) self.next_expiry = next_expiry - - for key, metadata, component in expired_keys: + + for key, metadata, component, value in expired_keys: if component: event = Event( - EventType.CACHE_EXPIRY, {"key": key, "metadata": metadata} + EventType.CACHE_EXPIRY, {"key": key, "metadata": metadata, "expired_data": value} ) component.enqueue(event) From c95b7f39bb1485a14988df474c95751d6a6039e3 Mon Sep 17 00:00:00 2001 From: Cyrus Mobini <68962752+cyrus2281@users.noreply.github.com> Date: Thu, 29 Aug 2024 13:08:32 -0400 Subject: [PATCH 04/26] Added MoA Example + UUID Invoke Function (#28) * MoA example: Broadcast to multiple agents * Added MoA event manager, added uuid invoke_function + test, updated auto-generated docs * Added assembly layer to MoA example --- docs/configuration.md | 2 + examples/llm/mixture_of_agents.yaml | 486 ++++++++++++++++++ .../common/invoke_functions.py | 4 +- .../components/inputs_outputs/broker_base.py | 2 +- tests/test_invoke.py | 20 + 5 files changed, 512 insertions(+), 2 deletions(-) create mode 100644 examples/llm/mixture_of_agents.yaml diff --git a/docs/configuration.md b/docs/configuration.md index 841ea216..5e6e5e80 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -116,6 +116,8 @@ There is a module named `invoke_functions` that has a list of functions that can - `empty_tuple`: Return an empty tuple - `empty_float`: Return 0.0 - `empty_int`: Return 0 +- `if_else`: If the first value is true, return the second value, otherwise return the third value +- `uuid`: returns a universally unique identifier (UUID) Use positional parameters to pass values to the functions that expect arguments. Here is an example of using the `invoke_functions` module to do some simple operations: diff --git a/examples/llm/mixture_of_agents.yaml b/examples/llm/mixture_of_agents.yaml new file mode 100644 index 00000000..27af1f6f --- /dev/null +++ b/examples/llm/mixture_of_agents.yaml @@ -0,0 +1,486 @@ +# This will create a flow using the mixture of agents pattern (https://arxiv.org/abs/2406.04692) +# +# It will subscribe to `moa/question` and expect an event with the payload: +# The input message has the following schema: +# { +# "query": "" +# } +# +# It will then send an event back to Solace with the topic: `moa/question/response` +# +# NOTE: For horizontal scaling, partitioned queues must be used. This is not implemented in this example. +# +# Dependencies: +# pip install -U langchain-google-vertexai langchain_anthropic langchain_openai openai +# +# required ENV variables: +# - GOOGLE_APPLICATION_CREDENTIALS: the path to a service account JSON file +# - VERTEX_REGION +# - VERTEX_API_ENDPOINT - optional +# - VERTEX_MODEL_NAME +# - OPENAI_API_KEY +# - OPENAI_API_ENDPOINT - optional +# - OPENAI_MODEL_NAME +# - ANTHROPIC_API_KEY +# - ANTHROPIC_API_ENDPOINT - optional +# - ANTHROPIC_MODEL_NAME +# - SOLACE_BROKER_URL +# - SOLACE_BROKER_USERNAME +# - SOLACE_BROKER_PASSWORD +# - SOLACE_BROKER_VPN +# - NUMBER_OF_MOA_LAYERS: the number of layers in the mixture of agents + +--- +log: + stdout_log_level: INFO + log_file_level: INFO + log_file: solace_ai_connector.log + +shared_config: + # Broker connection configuration + - broker_config: &broker_connection + broker_type: solace + broker_url: ${SOLACE_BROKER_URL} + broker_username: ${SOLACE_BROKER_USERNAME} + broker_password: ${SOLACE_BROKER_PASSWORD} + broker_vpn: ${SOLACE_BROKER_VPN} + + # Agent broker input configuration + - agent_broker_input: &agent_broker_input + component_name: solace_agent_broker + component_module: broker_input + component_config: + <<: *broker_connection + broker_subscriptions: + - topic: moa/broadcast + qos: 1 + payload_encoding: utf-8 + payload_format: json + + # Agent broker output configuration + - agent_broker_output: &agent_broker_output + component_name: solace_agent_broker + component_module: broker_output + component_config: + <<: *broker_connection + payload_encoding: utf-8 + payload_format: json + copy_user_properties: true + input_transforms: + # Copy the contents of the input event (query, id, layer_number) + - type: copy + source_expression: input.payload + dest_expression: user_data.output:payload + # Copy the output from the LLM + - type: copy + source_expression: user_data.formatted_response:content + dest_expression: user_data.output:payload.content + # Copy the agent name + - type: copy + source_expression: user_data.formatted_response:agent + dest_expression: user_data.output:payload.agent + # Copy the response topic based on input topic + - type: copy + source_expression: template:{{text://input.topic}}/next + dest_expression: user_data.output:topic + component_input: + source_expression: user_data.output + + # Agent input transformations + - agent_input_transformations: &agent_input_transformations + input_transforms: + - type: copy + source_expression: | + template:You are a helpful AI assistant. Please help with the user's request below: + + {{text://input.payload:query}} + + dest_expression: user_data.llm_input:messages.0.content + - type: copy + source_expression: static:user + dest_expression: user_data.llm_input:messages.0.role + component_input: + source_expression: user_data.llm_input + +flows: + # Event manager - Updates user message and send to all agents + - name: event manager + components: + # Broker input for user query + - component_name: user_query_input + component_module: broker_input + component_config: + <<: *broker_connection + broker_subscriptions: + - topic: moa/question + qos: 1 + - topic: moa/question/aggregate + qos: 1 + payload_encoding: utf-8 + payload_format: json + + # Broker output for agents - Update input event with layer number and UUID + - component_name: solace_agent_broker + component_module: broker_output + component_config: + <<: *broker_connection + payload_encoding: utf-8 + payload_format: json + copy_user_properties: true + input_transforms: + # Copy the original user query + - type: copy + source_expression: input.payload:query + dest_expression: user_data.output:payload.query + # Increase layer number by 1, if none exists, set to 1 + - type: copy + source_expression: + invoke: + # Check if layer number exists + module: invoke_functions + function: if_else + params: + positional: + - source_expression(input.payload:layer_number) + - invoke: + # Add 1 to the layer number + module: invoke_functions + function: add + params: + positional: + - invoke: + # Default to zero + module: invoke_functions + function: or_op + params: + positional: + - source_expression(input.payload:layer_number) + - 0 + - 1 + # No layer number, set to 1 + - 1 + dest_expression: user_data.output:payload.layer_number + + # Copy over the UUID, if doesn't exists create one + - type: copy + source_value: + invoke: + module: invoke_functions + function: if_else + params: + positional: + - source_expression(input.payload:id) + - source_expression(input.payload:id) + - invoke: + module: invoke_functions + function: uuid + dest_expression: user_data.output:payload.id + + # Copy the response topic based on input topic + - type: copy + source_value: moa/broadcast + dest_expression: user_data.output:topic + + component_input: + source_expression: user_data.output + + # Agent 1 - Google Vertex AI + - name: Agent 1 - Google Vertex AI + components: + # Broker input for Vertex AI + - <<: *agent_broker_input + + # Vertex AI LLM Request + - component_name: llm_request + component_module: langchain_chat_model + component_config: + langchain_module: langchain_google_vertexai + langchain_class: ChatVertexAI + langchain_component_config: + base_url: ${VERTEX_API_ENDPOINT} + location: ${VERTEX_REGION} + model: ${VERTEX_MODEL_NAME} + temperature: 0.01 + <<: *agent_input_transformations + + # Format Vertex AI response for broker output + - component_name: format_response + component_module: pass_through + input_transforms: + - type: copy + source_value: vertex_ai + dest_expression: user_data.formatted_response:agent + - type: copy + source_expression: previous + dest_expression: user_data.formatted_response:content + component_input: + source_expression: user_data.formatted_response + + # Broker output for Vertex AI + - <<: *agent_broker_output + + # Agent 2 - OpenAI + - name: Agent 2 - OpenAI + components: + # Broker input for OpenAI + - <<: *agent_broker_input + + # OpenAI LLM Request + - component_name: llm_request + component_module: openai_chat_model + component_config: + api_key: ${OPENAI_API_KEY} + base_url: ${OPENAI_API_ENDPOINT} + model: ${OPENAI_MODEL_NAME} + temperature: 0.01 + <<: *agent_input_transformations + + # Format OpenAI response for broker output + - component_name: format_response + component_module: pass_through + input_transforms: + - type: copy + source_value: openai + dest_expression: user_data.formatted_response:agent + - type: copy + source_expression: previous:content + dest_expression: user_data.formatted_response:content + component_input: + source_expression: user_data.formatted_response + + # Broker output for OpenAI + - <<: *agent_broker_output + + # Agent 3 - Anthropic + - name: Agent 3 - Anthropic + components: + # Broker input for Anthropic + - <<: *agent_broker_input + + # Anthropic LLM Request + - component_name: llm_request + component_module: langchain_chat_model + component_config: + langchain_module: langchain_anthropic + langchain_class: ChatAnthropic + langchain_component_config: + api_key: ${ANTHROPIC_API_KEY} + base_url: ${ANTHROPIC_API_ENDPOINT} + model: ${ANTHROPIC_MODEL_NAME} + temperature: 0.01 + <<: *agent_input_transformations + + # Format Anthropic response for broker output + - component_name: format_response + component_module: pass_through + input_transforms: + - type: copy + source_value: anthropic + dest_expression: user_data.formatted_response:agent + - type: copy + source_expression: previous + dest_expression: user_data.formatted_response:content + component_input: + source_expression: user_data.formatted_response + + # Broker output for Anthropic + - <<: *agent_broker_output + + # Assemble the responses and send to user/next layer + - name: Assemble agent responses + components: + # Agents responses from solace broker + - component_name: agent_responses + component_module: broker_input + component_config: + <<: *broker_connection + broker_subscriptions: + - topic: moa/broadcast/next + qos: 1 + payload_encoding: utf-8 + payload_format: json + + # Assemble Agent responses + - component_name: assemble_responses + component_module: assembly + component_config: + assemble_key: id + max_time_ms: 30000 + max_items: 3 # Number of Agents + component_input: + source_expression: input.payload + + # Format response for the LLM request + - component_name: format_response + component_module: pass_through + input_transforms: + # Copy the ID + - type: copy + source_expression: previous:0.id + dest_expression: user_data.aggregated_data:id + # Copy the layer number + - type: copy + source_expression: previous:0.layer_number + dest_expression: user_data.aggregated_data:layer_number + # Copy the initial user query + - type: copy + source_expression: previous:0.query + dest_expression: user_data.aggregated_data:query + # Transform each response to use the template + - type: map + source_list_expression: previous + source_expression: | + template: + {{text://item:content}} + \n + dest_list_expression: user_data.temp:responses + # Transform and reduce the responses to one message + - type: reduce + source_list_expression: user_data.temp:responses + source_expression: item + initial_value: "" + accumulator_function: + invoke: + module: invoke_functions + function: add + params: + positional: + - source_expression(keyword_args:accumulated_value) + - source_expression(keyword_args:current_value) + dest_expression: user_data.aggregated_data:responses + component_input: + source_expression: user_data.aggregated_data + + # Aggregate all the outcomes from the agents + - component_name: aggregate_generations + component_module: openai_chat_model + component_config: + api_key: ${OPENAI_API_KEY} + base_url: ${OPENAI_API_ENDPOINT} + model: ${OPENAI_MODEL_NAME} + temperature: 0.01 + input_transforms: + - type: copy + source_expression: | + template:You have been provided with a set of responses from various large language models to a user query. Your task is to synthesize these responses into a single, high-quality response. It is crucial to critically evaluate the information provided in these responses, recognizing that some of it may be biased or incorrect. Your response should not simply replicate the given answers but should offer a refined, accurate, and comprehensive reply to the instruction. Ensure your response is well-structured, coherent, and adheres to the highest standards of accuracy and reliability. Do not add any extra comments about how you created the response, just synthesize these responses as instructed. Do not mention that the result is created of multiple responses. + + User Query: + + + {{text://user_data.aggregated_data:query}} + <\user-query> + + Responses: + + {{text://user_data.aggregated_data:responses}} + dest_expression: user_data.llm_input:messages.0.content + - type: copy + source_expression: static:user + dest_expression: user_data.llm_input:messages.0.role + component_input: + source_expression: user_data.llm_input + + - component_name: aggregator_output + component_module: broker_output + component_config: + <<: *broker_connection + payload_encoding: utf-8 + payload_format: json + copy_user_properties: true + input_transforms: + # Copy the contents of the required info for next layer + - type: copy + source_expression: user_data.aggregated_data:id + dest_expression: user_data.output:payload.id + - type: copy + source_expression: user_data.aggregated_data:layer_number + dest_expression: user_data.output:payload.layer_number + # Copy to a temporary location the modified query for the next layer + - type: copy + source_expression: | + template: For the given query, the following draft response is created. Update and enhance the response to be logically accurate, sound natural, and free of errors. + Think before you reply. And only reply with the updated response. Do not add any extra comments. + + + {{text://user_data.aggregated_data:query}} + <\query> + + + {{text://previous:content}} + <\response> + dest_expression: user_data.temp:new_query + # Copy the results of the aggregation by LLM + # The LLM result is added under query for the next layer + - type: copy + source_expression: + invoke: + # If the layer number is less than the number of layers, + # modify the response for the next layer of agents + module: invoke_functions + function: if_else + params: + positional: + - invoke: + module: invoke_functions + function: less_than + params: + positional: + - source_expression(user_data.aggregated_data:layer_number, int) + - ${NUMBER_OF_MOA_LAYERS} + - source_expression(user_data.temp:new_query) + - source_expression(previous:content) + dest_expression: user_data.output:payload.query + # Copy the response topic based on layer number + - type: copy + source_expression: + invoke: + # If the layer number is less than the number of layers, + # send to the next layer, otherwise send to the user + module: invoke_functions + function: if_else + params: + positional: + - invoke: + module: invoke_functions + function: less_than + params: + positional: + - source_expression(user_data.aggregated_data:layer_number, int) + - ${NUMBER_OF_MOA_LAYERS} + - moa/question/aggregate + - moa/question/cleanup + dest_expression: user_data.output:topic + component_input: + source_expression: user_data.output + + # Cleanup the responses from the assembly and send to the user + - name: Cleanup assembled responses + components: + # Response from the assembly + - component_name: assembly_response + component_module: broker_input + component_config: + <<: *broker_connection + broker_subscriptions: + - topic: moa/question/cleanup + qos: 1 + payload_encoding: utf-8 + payload_format: json + + - component_name: aggregator_output + component_module: broker_output + component_config: + <<: *broker_connection + payload_encoding: utf-8 + payload_format: json + copy_user_properties: true + input_transforms: + # Copy the user query and response for the final response + - type: copy + source_expression: input.payload:query + dest_expression: user_data.output:payload.response + - type: copy + source_value: moa/question/response + dest_expression: user_data.output:topic + component_input: + source_expression: user_data.output diff --git a/src/solace_ai_connector/common/invoke_functions.py b/src/solace_ai_connector/common/invoke_functions.py index db0365dd..bdde26e1 100644 --- a/src/solace_ai_connector/common/invoke_functions.py +++ b/src/solace_ai_connector/common/invoke_functions.py @@ -1,5 +1,7 @@ """Set of simple functions to take the place of operators in the config file""" +import uuid as uuid_module + add = lambda x, y: x + y append = lambda x, y: x + [y] subtract = lambda x, y: x - y @@ -26,7 +28,7 @@ empty_float = lambda: 0.0 empty_int = lambda: 0 if_else = lambda x, y, z: y if x else z - +uuid = lambda: str(uuid_module.uuid4()) # A few test functions def _test_positional_and_keyword_args(*args, **kwargs): diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_base.py b/src/solace_ai_connector/components/inputs_outputs/broker_base.py index 157177b1..6b91a018 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_base.py @@ -4,6 +4,7 @@ import gzip import json import yaml +import uuid from abc import abstractmethod @@ -11,7 +12,6 @@ from ..component_base import ComponentBase from ...common.message import Message from ...common.messaging.messaging_builder import MessagingServiceBuilder -import uuid # TBD - at the moment, there is no connection sharing supported. It should be possible # to share a connection between multiple components and even flows. The changes diff --git a/tests/test_invoke.py b/tests/test_invoke.py index 46c6267f..48346048 100644 --- a/tests/test_invoke.py +++ b/tests/test_invoke.py @@ -1076,3 +1076,23 @@ def test_filter_transform_sub_field_greater_than_2(): assert output_message.get_data("user_data.temp") == { "new_list": [{"my_val": 3}, {"my_val": 4}] } + + +def test_invoke_with_uuid_generator(): + """Verify that the uuid invoke_function returns an ID""" + response = resolve_config_values( + { + "a": { + "invoke": { + "module": "invoke_functions", + "function": "uuid" + }, + }, + } + ) + + # Check if the output is of type string + assert type(response["a"]) == str + + # Check if the output is a valid UUID + assert len(response["a"]) == 36 \ No newline at end of file From 8fa025eb43d3084e2beabba3d1563fa99d721064 Mon Sep 17 00:00:00 2001 From: Cyrus Mobini <68962752+cyrus2281@users.noreply.github.com> Date: Tue, 3 Sep 2024 08:11:12 -0400 Subject: [PATCH 05/26] Update documentation for new users + Refactored component_input & source_expression (#29) * Refactored component_input to input_selection * Updated, added, and enhanced the documentation with new users in mind * Refactored source_expression function to evaluate_expression (backward compatible) * Added tips and tricks section + info and examples on custom modules * tiny format update * tiny update --- README.md | 12 +- config.yaml | 6 +- docs/components/aggregate.md | 19 + docs/components/error_input.md | 2 +- docs/components/index.md | 2 +- docs/components/iterate.md | 13 + docs/configuration.md | 698 +++++++++++------- docs/getting_started.md | 6 +- docs/index.md | 10 +- docs/overview.md | 32 +- docs/tips_and_tricks.md | 153 ++++ docs/transforms/filter.md | 4 +- docs/transforms/map.md | 6 +- docs/transforms/reduce.md | 6 +- docs/usage.md | 85 --- examples/ack_test.yaml | 6 +- examples/anthropic_bedrock.yaml | 4 +- examples/assembly_inputs.yaml | 4 +- examples/chat_model_with_history.yaml | 2 +- examples/error_handler.yaml | 12 +- examples/llm/anthropic_chat.yaml | 4 +- examples/llm/bedrock_anthropic_chat.yaml | 2 +- .../langchain_openai_with_history_chat.yaml | 4 +- examples/llm/mixture_of_agents.yaml | 42 +- examples/llm/openai_chat.yaml | 4 +- examples/llm/vertexai_chat.yaml | 4 +- examples/milvus_store.yaml | 4 +- examples/request_reply.yaml | 6 +- examples/vector_store_search.yaml | 2 +- src/solace_ai_connector/common/utils.py | 28 +- .../components/component_base.py | 12 +- .../components/general/aggregate.py | 23 +- .../components/general/iterate.py | 10 + .../components/inputs_outputs/error_input.py | 2 +- .../services/cache_service.py | 3 +- src/solace_ai_connector/transforms/filter.py | 6 +- src/solace_ai_connector/transforms/map.py | 8 +- src/solace_ai_connector/transforms/reduce.py | 8 +- tests/test_aggregate.py | 6 +- tests/test_config_file.py | 4 +- tests/test_error_flows.py | 2 +- tests/test_filter.py | 4 +- tests/test_flows.py | 12 +- tests/test_invoke.py | 108 +-- tests/test_iterate.py | 4 +- tests/test_timer_input.py | 2 +- tests/test_transforms.py | 22 +- 47 files changed, 866 insertions(+), 552 deletions(-) create mode 100644 docs/tips_and_tricks.md delete mode 100644 docs/usage.md diff --git a/README.md b/README.md index bad79090..994a5c11 100644 --- a/README.md +++ b/README.md @@ -6,27 +6,27 @@ This project provides a standalone, Python-based application to allow Solace eve a wide range of AI models and services. The application is designed to be easily extensible to support new AI models and services. -## Getting started quickly - -Please see the [getting started guide](docs/getting_started.md) for instructions on how to get started quickly. - ## Documentation Please see the [documentation](docs/index.md) for more information. +## Getting started quickly + +Please see the [getting started guide](docs/getting_started.md) for instructions on how to get started quickly. + ## Support This is not an officially supported Solace product. For more information try these resources: + - Ask the [Solace Community](https://solace.community) - The Solace Developer Portal website at: https://solace.dev - ## Contributing Contributions are encouraged! Please read [CONTRIBUTING](CONTRIBUTING.md) for details on our code of conduct, and the process for submitting pull requests to us. - ## License + See the LICENSE file for details. diff --git a/config.yaml b/config.yaml index 0f0d7a1f..fcb46677 100644 --- a/config.yaml +++ b/config.yaml @@ -41,7 +41,7 @@ flows: - type: copy source_expression: input.payload dest_expression: user_data.temp:text - component_input: + input_selection: source_expression: user_data.temp:text - component_name: solace_sw_broker @@ -49,8 +49,6 @@ flows: component_config: <<: *broker_connection payload_format: json - component_input: - source_expression: user_data.output input_transforms: - type: copy source_expression: input.payload @@ -67,5 +65,5 @@ flows: - type: copy source_expression: user_data.temp dest_expression: user_data.output:user_properties - component_input: + input_selection: source_expression: user_data.output diff --git a/docs/components/aggregate.md b/docs/components/aggregate.md index d1bcf876..825c1b37 100644 --- a/docs/components/aggregate.md +++ b/docs/components/aggregate.md @@ -1,6 +1,7 @@ # Aggregate Take multiple messages and aggregate them into one. The output of this component is a list of the exact structure of the input data. +This can be useful for batch processing or for aggregating events together before processing them. The Aggregate component will take a sequence of events and combine them into a single event before enqueuing it to the next component in the flow so that it can perform batch processing. ## Configuration Parameters @@ -37,3 +38,21 @@ component_config: ... ] ``` + + +## Example Configuration + + +```yaml + - component_name: aggretator_example + component_module: aggregate + component_config: + # The maximum number of items to aggregate before sending the data to the next component + max_items: 3 + # The maximum time to wait before sending the data to the next component + max_time_ms: 1000 + input_selection: + # Take the text field from the message and use it as the input to the aggregator + source_expression: input.payload:text +``` + diff --git a/docs/components/error_input.md b/docs/components/error_input.md index c0d81274..de06a883 100644 --- a/docs/components/error_input.md +++ b/docs/components/error_input.md @@ -1,6 +1,6 @@ # ErrorInput -Receive processing errors from the Solace AI Event Connector. Note that the component_input configuration is ignored. This component should be used to create a flow that handles errors from other flows. +Receive processing errors from the Solace AI Event Connector. Note that the input_selection configuration is ignored. This component should be used to create a flow that handles errors from other flows. ## Configuration Parameters diff --git a/docs/components/index.md b/docs/components/index.md index 5d61fbd7..5832e4df 100644 --- a/docs/components/index.md +++ b/docs/components/index.md @@ -8,7 +8,7 @@ | [broker_output](broker_output.md) | Connect to a messaging broker and send messages to it. Note that this component requires that the data is transformed into the input schema. | | [broker_request_response](broker_request_response.md) | Connect to a messaging broker, send request messages, and receive responses. This component combines the functionality of broker_input and broker_output with additional request-response handling. | | [delay](delay.md) | A simple component that simply passes the input to the output, but with a configurable delay. | -| [error_input](error_input.md) | Receive processing errors from the Solace AI Event Connector. Note that the component_input configuration is ignored. This component should be used to create a flow that handles errors from other flows. | +| [error_input](error_input.md) | Receive processing errors from the Solace AI Event Connector. Note that the input_selection configuration is ignored. This component should be used to create a flow that handles errors from other flows. | | [file_output](file_output.md) | File output component | | [iterate](iterate.md) | Take a single message that is a list and output each item in that list as a separate message | | [langchain_chat_model](langchain_chat_model.md) | Provide access to all the LangChain chat models via configuration | diff --git a/docs/components/iterate.md b/docs/components/iterate.md index 157943fc..53aad801 100644 --- a/docs/components/iterate.md +++ b/docs/components/iterate.md @@ -32,3 +32,16 @@ No configuration parameters } ``` + + +## Example Configuration + + +```yaml + - component_name: iterate_example + component_module: iterate + component_config: + input_selection: + # Take the list field from the message and use it as the input to the iterator + source_expression: input.payload:embeddings +``` diff --git a/docs/configuration.md b/docs/configuration.md index 5e6e5e80..8c2b5113 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1,16 +1,52 @@ # Configuration for the AI Event Connector +Table of Contents + +- [Configuration for the AI Event Connector](#configuration-for-the-ai-event-connector) + - [Configuration File Format and Rules](#configuration-file-format-and-rules) + - [Special values](#special-values) + - [Configuration File Structure](#configuration-file-structure) + - [Log Configuration](#log-configuration) + - [Trace Configuration](#trace-configuration) + - [Shared Configurations](#shared-configurations) + - [Flow Configuration](#flow-configuration) + - [Message Data](#message-data) + - [Expression Syntax](#expression-syntax) + - [Templates](#templates) + - [Component Configuration](#component-configuration) + - [component\_module](#component_module) + - [component\_config](#component_config) + - [input\_transforms](#input_transforms) + - [input\_selection](#input_selection) + - [queue\_depth](#queue_depth) + - [num\_instances](#num_instances) + - [Built-in components](#built-in-components) + - [Invoke Keyword](#invoke-keyword) + - [Invoke with custom function](#invoke-with-custom-function) + - [invoke\_functions](#invoke_functions) + - [evaluate\_expression()](#evaluate_expression) + - [user\_processor Component and invoke](#user_processor-component-and-invoke) + - [Usecase Examples](#usecase-examples) + The AI Event Connector is highly configurable. You can define the components of each flow, the queue depths between them, and the number of instances of each component. The configuration is done through a YAML file that is loaded when the connector starts. This allows you to easily change the configuration without having to modify the code. ## Configuration File Format and Rules -The configuration file is a YAML file that is loaded when the connector starts. +The configuration file is a YAML file that is loaded when the connector starts. Multiple YAML files can be passed to the connector at startup. The files will be merged, the latest file will overwrite the previous duplicate keys. Arrays will be concatenated. Useful to separate flows. + +For example, if you have two files: + +```bash +python3 -m solace_ai_connector.main config1.yaml config2.yaml +``` + +Since this application usings `pyyaml`, it is possible to use the `!include` directive to include the template from a file. This can be useful for very large templates or for templates that are shared across multiple components. ### Special values Within the configuration, you can have simple static values, environment variables, or dynamic values using the `invoke` keyword. -#### Environment Variables +- ***Environment Variables*** You can use environment variables in the configuration file by using the `${}` syntax. For example, if you have an environment variable `MY_VAR` you can use it in the configuration file like this: @@ -18,193 +54,31 @@ You can use environment variables in the configuration file by using the `${}` s my_key: ${MY_VAR} ``` -#### Dynamic Values (invoke keyword) +- ***Dynamic Values (invoke keyword)*** You can use dynamic values in the configuration file by using the `invoke` keyword. This allows you to do such things as import a module, instantiate a class and call a function to get the value. For example, if you want to get the operating system type you can use it in the configuration file like this: ```yaml -os_type: +os_type: invoke: module: platform function: system ``` -An `invoke` block works by specifying an 'object' to act on with one (and only one) of the following keys: -- `module`: The name of the module to import in normal Python import syntax (e.g. `os.path`) -- `object`: An object to call a function on or get an attribute from. Note that this must have an `invoke` block itself to create the object. Objects can be nested to build up complex objects. An object is the returned value from a function call or attribute get from a module or a nested object. - -It is also acceptable to specify neither `module` nor `object` if you are calling a function that is in the global namespace. - -In addition to the object specifier, you can specify one (and only one) of the following keys: -- `function`: The name of the function to call on the object -- `attribute`: The name of the attribute to get from the object - -In the case of a function, you can also specify a `params` key to pass parameters to the function. The params value has the following keys: -- `positional`: A list of positional parameters to pass to the function -- `keyword`: A dictionary of keyword parameters to pass to the function - -`invoke` blocks can be nested to build up complex objects and call functions on them. - -Here is an example of a complex `invoke` block that could be used to get AWS credentials: - -```yaml - # Get AWS credentials and give it a name to reference later - - aws_credentials: &aws_credentials - invoke: - object: - invoke: - # import boto3 - module: boto3 - # Get the session object -> boto3.Session() - function: Session - # Call the get_credentials function on the session object -> session.get_credentials() - function: get_credentials - - - aws_4_auth: - invoke: - # import requests_aws4auth - module: requests_aws4auth - # Get the AWS4Auth object -> requests_aws4auth.AWS4Auth() - function: AWS4Auth - params: - positional: - # Access key - - invoke: - object: *aws_credentials - attribute: access_key - # Secret key - - invoke: - object: *aws_credentials - attribute: secret_key - # Region (from environment variable) - - ${AWS_REGION} - # Service name (from environment variable) - - ${AWS_SERVICE} - keyword: - # Pass the session token if it exists -> session_token= - session_token: - invoke: - object: *aws_credentials - attribute: token -``` - -##### invoke_functions - -There is a module named `invoke_functions` that has a list of functions that can take the place of python operators. This is useful for when you want to use an operator in a configuration file. The following functions are available: -- `add`: param1 + param2 - can be used to add or concatenate two strings or lists -- `append`: Append the second value to the first -- `subtract`: Subtract the second number from the first -- `multiply`: Multiply two numbers together -- `divide`: Divide the first number by the second -- `modulus`: Get the modulus of the first number by the second -- `power`: Raise the first number to the power of the second -- `equal`: Check if two values are equal -- `not_equal`: Check if two values are not equal -- `greater_than`: Check if the first value is greater than the second -- `greater_than_or_equal`: Check if the first value is greater than or equal to the second -- `less_than`: Check if the first value is less than the second -- `less_than_or_equal`: Check if the first value is less than or equal to the second -- `and_op`: Check if both values are true -- `or_op`: Check if either value is true -- `not_op`: Check if the value is false -- `in_op`: Check if the first value is in the second value -- `negate`: Negate the value -- `empty_list`: Return an empty list -- `empty_dict`: Return an empty dictionary -- `empty_string`: Return an empty string -- `empty_set`: Return an empty set -- `empty_tuple`: Return an empty tuple -- `empty_float`: Return 0.0 -- `empty_int`: Return 0 -- `if_else`: If the first value is true, return the second value, otherwise return the third value -- `uuid`: returns a universally unique identifier (UUID) - -Use positional parameters to pass values to the functions that expect arguments. -Here is an example of using the `invoke_functions` module to do some simple operations: - -```yaml - # Use the invoke_functions module to do some simple operations - - simple_operations: - invoke: - module: invoke_functions - function: add - params: - positional: - - 1 - - 2 -``` - -##### source_expression() - -If the `invoke` block is used within an area of the configuration that relates to message processing -(e.g. input_transforms), an invoke function call can use the special function `source_expression([, type])` for -any of its parameters. This function will be replaced with the value of the source expression at runtime. -It is an error to use `source_expression()` outside of a message processing. The second parameter is optional -and will convert the result to the specified type. The following types are supported: -- `int` -- `float` -- `bool` -- `str` -If the value is a dict or list, the type request will be ignored - -Example: -```yaml --flows: - -my_flow: - -my_component: - input_transforms: - -type: copy - source_expression: - invoke: - module: invoke_functions - function: add - params: - positional: - - source_expression(input.payload:my_obj.val1, int) - - 2 - dest_expression: user_data.my_obj:result -``` - -In the above example, the `source_expression()` function is used to get the value of `input.payload:my_obj.val1`, -convert it to an `int` and add 2 to it. - -**Note:** In places where the yaml keys `source_expression` and `dest_expressions` are used, you can use the same type of expression to access a value. Check [Expression Syntax](#expression-syntax) for more details. - -##### user_processor component and invoke - -The `user_processor` component is a special component that allows you to define a user-defined function to process the message. This is useful for when you want to do some processing on the input message that is not possible with the built-in transforms or other components. In order to specify the user-defined function, you must define the `component_processing` property with an `invoke` block. - -Here is an example of using the `user_processor` component with an `invoke` block: - -```yaml - - my_user_processor: - component_name: my_user_processor - component_module: user_processor - component_processing: - invoke: - module: my_module - function: my_function - params: - positional: - - source_expression(input.payload:my_key) - - 2 -``` - - - - +More details [here](#invoke-keyword). ## Configuration File Structure The configuration file is a YAML file with these top-level keys: - `log`: Configuration of logging for the connector +- `trace`: Configuration of tracing for the connector - `shared_config`: Named configurations that can be used by multiple components later in the file -- `flows`: A list of flow configurations. +- `flows`: A list of flow configurations. ### Log Configuration -The `log` configuration section is used to configure the logging for the connector. It configures the logging behaviour for stdout and file logs. It has the following keys: +The `log` configuration section is used to configure the logging for the connector. It configures the logging behavior for stdout and file logs. It has the following keys: - `stdout_log_level`: - The log level for the stdout log - `log_file_level`: - The log level for the file log @@ -219,6 +93,15 @@ log: log_file: /var/log/ai_event_connector.log ``` +### Trace Configuration + +The trace option will output logs to a trace log that has all the detail of the message at each point. It gives an output when a message is pulled out of an input queue and another one before invoke is called (i.e. after transforms). + +```yaml +trace: + trace_file: /var/log/ai_event_connector_trace.log +``` + ### Shared Configurations The `shared_config` section is used to define configurations that can be used by multiple components later in the file. It is a dictionary of named configurations. Each named configuration is a dictionary of configuration values. Here is an example of a shared configuration: @@ -240,158 +123,451 @@ Later in the file, you can reference this shared configuration like this: ### Flow Configuration +A flow is an instance of a pipeline that processes events in a sequential manner. Each `flow` is completely independent of the others and can have its own set of components and configurations. + +Flows can be communicating together if programmed to do so. For example, a flow can send a message to a broker and another flow can subscribe to the same topic to receive the message. + +flows can be spread across multiple configuration files. The connector will merge the flows from all the files and run them together. + The `flows` section is a list of flow configurations. Each flow configuration is a dictionary with the following keys: + - `name`: - The unique name of the flow -- `components`: A list of component configurations +- `components`: A list of component configurations. Check [Component Configuration](#component-configuration) for more details + +## Message Data + +Between each component in a flow, a message is passed. This message is a dictionary that is used to pass data between components within the same flow. The message object has different properties, some are available throughout the whole flow, some only between two immediate components, and some have other characteristics. + +The message object has the following properties: + +- `input`: The Solace broker input message. It has the following properties: + - `payload`: The payload of the input message + - `topic`: The topic of the input message + - `topic_levels`: A list of the levels of the topic of the input message + - `user_properties`: The user properties of the input message + +This data type is available only after a topic subscription and then it will be available from that component onwards till overwritten by another input message. + +- `user_data`: The user data object. This is a storage where the user can write and read values to be used at the different places. It is an object that is passed through the flows, and can hold any valid Python data type. To write to this object, you can use the `dest_expression` in the configuration file. To read from this object, you can use the `source_expression` in the configuration file. (This object is also available in the `evaluate_expression()` function). + +- `previous`: The complete output of the previous component in the flow. This can be used to completely forward the output of the previous component as an input to the next component or be modified in the `input_transforms` section of the next component. + +- transform specific variables: Some transforms function will add specific variables to the message object that are ONLY accessible in that transform. For example, the [`map` transform](./transforms/map.md) will add `item`, `index`, and `source_list` to the message object or the [`reduce` transform](./transforms/reduce.md) will add `accumulated_value`, `current_value`, and `source_list` to the message object. You can find these details in each transform documentation. + +## Expression Syntax -#### Component Configuration +The `source_expression` and `dest_expression` values in the configuration file use a simple expression syntax to reference values in the input message and to store values in the output message. The format of the expression is: + +**`[.][:]`** + +Where: + +- `data_type`: - The type of data to reference. This can be one of the [message data type Check](#message-data) or one of the following: + - message data type: input, user_data, previous, etc mentioned in the [Message Data](#message-data) section + - `static`: A static value (e.g. `static:my_value`) + - `template`: A template ([see more below](#templates)) + + +- `qualifier`: - The qualifier to use to reference the data. This is specific to the `data_type` and is optional. If not specified, the entire data type will be used. + +- `index`: - Where to get the data in the data type. This is optional and is specific to the `data_type`. For templates, it is the template. For other data types, it is a dot separated string or an integer index. The index will be split on dots and used to traverse the data type. If it is an integer, it will be used as an index into the data type. If it is a string, it will be used as a key to get the value from the data type. + +Here are some examples of expressions: + +- `input.payload:my_key` - Get the value of `my_key` from the input payload +- `user_data.my_obj:my_key` - Get the value of `my_key` from the `my_obj` object in the user data +- `static:my_value` - Use the static value `my_value` +- `user_data:my_obj2:my_list.2.my_key` - Get the value of `my_key` from the 3rd item in the `my_list` list in the `my_obj2` object in the user data + +When using expressions for destination expressions, lists and objects will be created as needed. If the destination expression is a list index, the list will be extended to the index if it is not long enough. If the destination expression is an object key, the object will be created if it does not exist. + +### Templates + +The `template` data type is a special data type that allows you to use a template to create a value. The template is a string that can contain expressions to reference values in the input message. The format of the template is: + +**`template:text text text {{template_expression}} text text text`** + +Where: + +- `template:` is the template data type indicator. +- `{{template_expression}}` - An expression to reference values in the input message. It has the format: + + **`://`** + + Where: + + - `encoding`: - The encoding/formatting to use to print out the value. This can be one of the following (Optional, defaulted to `text`): + + - `base64`: Use base64 encoding + - `json`: Use json format + - `yaml`: Use yaml format + - `text`: Use string format + - `datauri:`: Use data uri encoding with the specified mime type + + - `source_expression`: - An expression to reference values in the input message. This has the same format as the `source_expression` in the configuration file described above. + +Here is an example of a template: + +```yaml +input_transforms: + - type: copy + source_expression: | + template:Write me a dry joke about: + {{text://input.payload}} + + Write the joke in the voice of {{text://input.user_properties:comedian}} + + dest_expression: user_data.llm_input:messages.0.content + - type: copy + source_value: user + dest_expression: user_data.llm_input:messages.0.role +``` + +In this example, the `source_expression` for the first transform is a template that uses the `text` encoding to create a string. + + +## Component Configuration Each component configuration is a dictionary with the following keys: -- `component_name`: - The unique name of the component within the flow -- `component_module`: - The module that contains the component class (python import syntax) -- `component_config`: - The configuration for the component. Its format is specific to the component -- `input_transforms`: - A list of transforms to apply to the input message before sending it to the component -- `component_input`: - A source_expression or source_value to use as the input to the component. -- `queue_depth`: - The depth of the input queue for the component -- `num_instances`: - The number of instances of the component to run -**Note: For a list of all built-in components, see the [Components](components/index.md) documentation.** +- `component_name`: - The unique name of the component within the flow. +- `component_module`: - The module that contains the component class (python import syntax) or the name of the [built-in component](#built-in-components) +- `component_config`: - The configuration for the component. Its format is specific to the component. [Optional: if the component does not require configuration] +- `input_transforms`: - A list of transforms to apply to the input message before sending it to the component. This is to ensure that the input message is in the correct format for the component. [Optional] +- `input_selection`: - A `source_expression` or `source_value` to use as the input to the component. Check [Expression Syntax](#expression-syntax) for more details. [Optional: If not specified, the complete previous component output will be used] +- `queue_depth`: - The depth of the input queue for the component. +- `num_instances`: - The number of instances of the component to run (Starts multiple threads to process messages) + + +### component_module + +The `component_module` is a string that specifies the module that contains the component class. + +Solace-ai-connector comes with a number of flexible and highly customizable [built-in components](./components/index.md) that should cover a wide range of use cases. To use a built-in component, you can specify the name of the component in the `component_module` key and configure it using the `component_config` key. For example, to use the `aggregate` component, you would specify the following: + +```yaml +- my_component: + component_module: aggregate + component_config: + max_items: 3 + max_time_ms: 1000 +``` + +The `component_module` can also be the python import syntax for the module. When using with a custom component, you can also use `component_base_path` to specify the base path of the python module. + +You're module file should also export a variable named `info` that has the name of the class to instantiate under the key `class_name`. -##### component_config +For example: -The `component_config` is a dictionary of configuration values specific to the component. The format of this dictionary is specific to the component. You must refer to the component's documentation for the specific configuration values. +```python +from solace_ai_connector.components.component_base import ComponentBase -##### input_transforms +info = { + "class_name": "CustomClass", +} + +class CustomClass(ComponentBase): + def __init__(self, **kwargs): + super().__init__(info, **kwargs) + + def invoke(self, _, data): + return data["text"] + " + custom class" +``` + +For example, if the component class is in a module named `my_module` in `src` directory, you can use it in the configuration file like this: + +```yaml + - component_name: custom_module_example + component_base_path: . + component_module: src.my_module +``` + +You can find an example of a custom component in the [tips and tricks](tips_and_tricks.md/#using-custom-modules-with-the-ai-connector) section. + +**Note:** If you are using a custom component, you must ensure that you're using proper relative paths or your paths are in the correct level to as where you're running the connector from. + +### component_config + +The `component_config` is a dictionary of configuration values specific to the component. The format of this dictionary is specific to the component. You must refer to the component's documentation for the specific configuration values. for example, the [`aggregate` component](./components/aggregate.md) has the following configuration: + +```yaml + component_module: aggregate + component_config: + max_items: 3 + max_time_ms: 1000 +``` + +### input_transforms The `input_transforms` is a list of transforms to apply to the input message before sending it to the component. Each transform is a dictionary with the following keys: + - `type`: - The type of transform -- `source_expression|source_value`: - The source expression or value to use as the input to the transform +- `source_expression|source_value`: - The source expression or static value to use as the input to the transform - `dest_expression`: - The destination expression for where to store the transformation output -For a list of all available transform functions check [Transforms](transforms/index.md) page. +The AI Event Connector comes with a number of built-in transforms that can be used to process messages. **For a list of all built-in transforms, see the [Transforms](transforms/index.md) documentation.** Here is an example of a component configuration with input transforms: ```yaml - - my_component: - component_module: my_module.my_component - component_config: - my_key: my_value - input_transforms: - - type: copy - # Extract the my_key value from the input payload - source_expression: input.payload:my_key - # Store the value in the newly created my_obj object in the my_keys list - # at index 2 (i.e. my_obj.my_keys[2].my_key = input.payload.my_key) - dest_expression: user_data.my_obj:my_keys.2.my_key +- my_component: + component_module: my_module.my_component + component_config: + my_key: my_value + input_transforms: + - type: copy + # Extract the my_key value from the input payload + source_expression: input.payload:my_key + # Store the value in the newly created my_obj object in the my_keys list + # at index 2 (i.e. my_obj.my_keys[2].my_key = input.payload.my_key) + dest_expression: user_data.my_obj:my_keys.2.my_key + - type: copy + # Use a static value + source_value: my_static_value + # Store the value in the newly created my_obj object in the my_keys list + # at index 3 (i.e. my_obj.my_keys[3].my_key = my_static_value) + dest_expression: user_data.my_obj:my_keys.3.my_key ``` -###### Built-in Transforms -The AI Event Connector comes with a number of built-in transforms that can be used to process messages. For a list of all built-in transforms, see the [Transforms](transforms/index.md) documentation. +### input_selection -##### component_input +The `input_selection` is a dictionary with one (and only one) of the following keys: -The `component_input` is a dictionary with one (and only one) of the following keys: - `source_expression`: - An expression to use as the input to the component (see below for expression syntax) -- `source_value`: - A value to use as the input to the component. +- `source_value`: - A static value to use as the input to the component. -Note that, as for all values in the config file, you can use the `invoke` keyword to get dynamic values +Note that, as for all values in the config file, you can use the [`invoke`](#invoke-keyword) keyword to get dynamic values Here is an example of a component configuration with a source expression: ```yaml - - my_component: - component_module: my_module.my_component - component_config: - my_key: my_value - component_input: - source_expression: input.payload:my_key +- my_component: + component_module: my_module.my_component + component_config: + my_key: my_value + input_selection: + source_expression: input.payload:my_key ``` -##### queue_depth +### queue_depth The `queue_depth` is an integer that specifies the depth of the input queue for the component. This is the number of messages that can be buffered in the queue before the component will start to block. By default, the queue depth is 100. - -##### num_instances +### num_instances The `num_instances` is an integer that specifies the number of instances of the component to run. This is the number of threads that will be started to process messages from the input queue. By default, the number of instances is 1. -#### Built-in components +### Built-in components The AI Event Connector comes with a number of built-in components that can be used to process messages. For a list of all built-in components, see the [Components](components/index.md) documentation. -### Expression Syntax +## Invoke Keyword -The `source_expression` and `dest_expression` values in the configuration file use a simple expression syntax to reference values in the input message and to store values in the output message. The format of the expression is: +The `invoke` keyword is used to get dynamic values in the configuration file. An `invoke` block works by specifying an 'object' to act on with one (and only one) of the following keys: -`[.][:]` +- `module`: The name of the module to import in normal Python import syntax (e.g. `os.path`) +- `object`: An object to call a function on or get an attribute from. Note that this must have an `invoke` block itself to create the object. Objects can be nested to build up complex objects. An object is the returned value from a function call or get attribute from a module or a nested object. -Where: +It is also acceptable to specify neither `module` nor `object` if you are calling a function that is in the global namespace. -- `data_type`: - The type of data to reference. This can be one of the following: - - `input`: The input message. It supports the qualifiers: - - `payload`: The payload of the input message - - `topic`: The topic of the input message - - `topic_levels`: A list of the levels of the topic of the input message - - `user_properties`: The user properties of the input message - - `user_data`: The user data object. The qualifier is required to specify the name of the user data object. `user_data` is an object that is passed through the flows, where the user can read and write values to it to be accessed at the different places. - - `static`: A static value (e.g. `static:my_value`) - - `template`: A template ([see more below](#templates)) - - `previous`: The output from the previous component in the flow. This could be of any type depending on the previous component +In addition to the object specifier, you can specify one (and only one) of the following keys: -- `qualifier`: - The qualifier to use to reference the data. This is specific to the `data_type` and is optional. If not specified, the entire data type will be used. +- `function`: The name of the function to call on the object +- `attribute`: The name of the attribute to get from the object -- `index`: - Where to get the data in the data type. This is optional and is specific to the `data_type`. For templates, it is the template. For other data types, it is a dot separated string or an integer index. The index will be split on dots and used to traverse the data type. If it is an integer, it will be used as an index into the data type. If it is a string, it will be used as a key to get the value from the data type. +In the case of a function, you can also specify a `params` key to pass parameters to the function. The params value has the following keys: -Here are some examples of expressions: +- `positional`: A list of positional parameters to pass to the function +- `keyword`: A dictionary of keyword parameters to pass to the function -- `input.payload:my_key` - Get the value of `my_key` from the input payload -- `user_data.my_obj:my_key` - Get the value of `my_key` from the `my_obj` object in the user data -- `static:my_value` - Use the static value `my_value` -- `user_data:my_obj2:my_list.2.my_key` - Get the value of `my_key` from the 3rd item in the `my_list` list in the `my_obj2` object in the user data +`invoke` blocks can be nested to build up complex objects and call functions on them. -When using expressions for destination expressions, lists and objects will be created as needed. If the destination expression is a list index, the list will be extended to the index if it is not long enough. If the destination expression is an object key, the object will be created if it does not exist. +Here is an example of a complex `invoke` block that could be used to get AWS credentials: -#### Templates +```yaml +# Get AWS credentials and give it a name to reference later +- aws_credentials: &aws_credentials + invoke: + object: + invoke: + # import boto3 + module: boto3 + # Get the session object -> boto3.Session() + function: Session + # Passing a parameter to the Session function + params: + keyword: + # Using a keyword parameter + profile_name: default + # Call the get_credentials function on the session object -> session.get_credentials() + function: get_credentials + +- aws_4_auth: + invoke: + # import requests_aws4auth + module: requests_aws4auth + # Get the AWS4Auth object -> requests_aws4auth.AWS4Auth() + function: AWS4Auth + params: + positional: + # Access key + - invoke: + object: *aws_credentials + attribute: access_key + # Secret key + - invoke: + object: *aws_credentials + attribute: secret_key + # Region (from environment variable) + - ${AWS_REGION} + # Service name (from environment variable) + - ${AWS_SERVICE} + keyword: + # Pass the session token if it exists -> session_token= + session_token: + invoke: + object: *aws_credentials + attribute: token +``` -The `template` data type is a special data type that allows you to use a template to create a value. The template is a string that can contain expressions to reference values in the input message. The format of the template is: +**Note:** The function parameters do not support expression syntax outside of the `evaluate_expression()` function. If you need to use an expression like template, you'd have to write it to a temporary user data value and reference it in the `source_expression` function. -`template:text text text {{template_expression}} text text text` +### Invoke with custom function -Where: +You can use invoke with your own custom functions. When using a custom functions, you can use the `path` to specify the base path of the python module. -- `template:` is the template data type indicator. -- `{{template_expression}}` - An expression to reference values in the input message. It has the format: +For example, if you have a custom function in a module named `my_module` in `src` directory and the function is named `my_function`, you can use it in the configuration file like this: - `://` +```yaml +- my_custom_function: + invoke: + path: . + module: src.my_module + function: my_function + params: + positional: + - 1 + - 2 +``` - Where: - - `encoding`: - The encoding/formatting to use to print out the value. This can be one of the following (Optional, defaulted to `text`): - - `base64`: Use base64 encoding - - `json`: Use json format - - `yaml`: Use yaml format - - `text`: Use string format - - `datauri:`: Use data uri encoding with the specified mime type +### invoke_functions - - `source_expression`: - An expression to reference values in the input message. This has the same format as the `source_expression` in the configuration file described above. +There is a module named `invoke_functions` that has a list of functions that can take the place of python operators used inside of `invoke`. This is useful for when you want to use an operator in a configuration file. -Here is an example of a template: +The following functions are available: + +- `add`: param1 + param2 - can be used to add or concatenate two strings or lists +- `append`: Append the second value to the first +- `subtract`: Subtract the second number from the first +- `multiply`: Multiply two numbers together +- `divide`: Divide the first number by the second +- `modulus`: Get the modulus of the first number by the second +- `power`: Raise the first number to the power of the second +- `equal`: Check if two values are equal +- `not_equal`: Check if two values are not equal +- `greater_than`: Check if the first value is greater than the second +- `greater_than_or_equal`: Check if the first value is greater than or equal to the second +- `less_than`: Check if the first value is less than the second +- `less_than_or_equal`: Check if the first value is less than or equal to the second +- `and_op`: Check if both values are true +- `or_op`: Check if either value is true +- `not_op`: Check if the value is false +- `in_op`: Check if the first value is in the second value +- `negate`: Negate the value +- `empty_list`: Return an empty list +- `empty_dict`: Return an empty dictionary +- `empty_string`: Return an empty string +- `empty_set`: Return an empty set +- `empty_tuple`: Return an empty tuple +- `empty_float`: Return 0.0 +- `empty_int`: Return 0 +- `if_else`: If the first value is true, return the second value, otherwise return the third value +- `uuid`: returns a universally unique identifier (UUID) + +Use positional parameters to pass values to the functions that expect arguments. + +Here is an example of using the `invoke_functions` module to do some simple operations: ```yaml - input_transforms: - - type: copy - source_expression: | - template:Write me a dry joke about: - {{text://input.payload}} - Write the joke in the voice of {{text://input.user_properties:comedian}} - dest_expression: user_data.llm_input:messages.0.content - - type: copy - source_value: user - dest_expression: user_data.llm_input:messages.0.role +# Use the invoke_functions module to do some simple operations +- simple_operations: + invoke: + module: invoke_functions + function: add + params: + positional: + - 1 + - 2 ``` -In this example, the `source_expression` for the first transform is a template that uses the `text` encoding to create a string. +### evaluate_expression() + +If the `invoke` block is used within an area of the configuration that relates to message processing +(e.g. input_transforms), an invoke function call can use the special function `evaluate_expression([, type])` for any of its parameters. This function will be replaced with the value of the source expression at runtime. + +It is an error to use `evaluate_expression()` outside of a message processing. The second parameter is optional +and will convert the result to the specified type. The following types are supported: + +- `int` +- `float` +- `bool` +- `str` + +If the value is a dict or list, the type request will be ignored + +Example: + +```yaml +- flows: + - my_flow: + - my_component: + input_transforms: + -type: copy + source_expression: + invoke: + module: invoke_functions + function: add + params: + positional: + - evaluate_expression(input.payload:my_obj.val1, int) + - 2 + dest_expression: user_data.my_obj:result +``` + +In the above example, the `evaluate_expression()` function is used to get the value of `input.payload:my_obj.val1`, +convert it to an `int` and add 2 to it. + +**Note:** In places where the yaml keys `source_expression` and `dest_expressions` are used, you can use the same type of expression to access a value. Check [Expression Syntax](#expression-syntax) for more details. + +### user_processor Component and invoke + +The `user_processor` component is a special component that allows you to define a user-defined function to process the message. This is useful for when you want to do some processing on the input message that is not possible with the built-in transforms or other components. In order to specify the user-defined function, you must define the `component_processing` property with an `invoke` block. + +Here is an example of using the `user_processor` component with an `invoke` block: + +```yaml +- my_user_processor: + component_name: my_user_processor + component_module: user_processor + component_processing: + invoke: + module: my_module + function: my_function + params: + positional: + - evaluate_expression(input.payload:my_key) + - 2 +``` + +## Usecase Examples + +You can find various usecase examples in the [examples directory](../examples/) + + + +--- + +Checkout [components.md](./components/index.md), [transforms.md](./transforms/index.md), or [tips_and_tricks](tips_and_tricks.md) next. diff --git a/docs/getting_started.md b/docs/getting_started.md index f2f7306b..14b88456 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -75,7 +75,7 @@ Download the OpenAI connector example configuration file: curl https://raw.githubusercontent.com/SolaceLabs/solace-ai-connector/main/examples/llm/openai_chat.yaml > openai_chat.yaml ``` -For this one, you need to also define the following environment variables: +For this one, you need to also define the following additional environment variables: ```sh export OPENAI_API_KEY= @@ -174,4 +174,6 @@ To build a Docker image, run the following command: make build ``` -Please now visit the [Documentation Page](index.md) for more information +--- + +Checkout [configuration.md](configuration.md) or [overview.md](overview.md) next \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index 85baa9e2..64501650 100644 --- a/docs/index.md +++ b/docs/index.md @@ -7,11 +7,9 @@ This connector application makes it easy to connect your AI/ML models to Solace - [Overview](overview.md) - [Getting Started](getting_started.md) - [Configuration](configuration.md) -- [Usage](usage.md) -- [Components](components/index.md) -- [Transforms](transforms/index.md) + - [Components](components/index.md) + - [Transforms](transforms/index.md) +- [Tips and Tricks](tips_and_tricks.md) +- [Examples](../examples/) - [Contributing](../CONTRIBUTING.md) - [License](../LICENSE) - - - diff --git a/docs/overview.md b/docs/overview.md index b8e8efbe..67540f88 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -1,5 +1,14 @@ # AI Event Connector for Solace event brokers - Overview +- [AI Event Connector for Solace event brokers - Overview](#ai-event-connector-for-solace-event-brokers---overview) + - [Architecture](#architecture) + - [Components](#components) + - [Built-in Components](#built-in-components) + - [Configuration](#configuration) + - [Extensibility](#extensibility) + - [Resilience and Fault Tolerance](#resilience-and-fault-tolerance) + - [Scalability](#scalability) + The AI Event Connector makes it easy to connect your AI/ML models to Solace event brokers. It provides a simple way to build pipelines that consume events from Solace, process them with your models, and then publish the results back to Solace. By doing this, you can greatly enhance the value of your event-driven architecture by adding AI capabilities to your event-driven applications. This connector is built using Python and the Solace Python API. It also heavily leverages LangChain, a Python library for abstracting the interface to many AI models. This allows you to easily swap out different AI models and model providers without having to change your pipeline. @@ -25,25 +34,20 @@ As shown in the flow diagram above, each flow is comprised of a sequence of comp ![Component](images/parts_of_a_component.png) The component is made up of the following parts: - - **Input Queue**: This is the queue that the component reads from. It is where the events are buffered as they flow through the system. Note that if there are multiple instances of the same component, they will all read from the same queue. - - - **Input Transforms**: This is an optional step that allows you to transform the event before it is processed. This can be useful for normalizing the data or for adding additional context to the event. - - - **Input Selection**: This selects what data should be processed by the component. The data selected should conform to the input schema of the component. It is normal to use Input Transforms and Input Selection together to ensure that the data is in the correct format for the component. If the Input Selection configuration is omitted, the component will select "previous" as the default, which will take the exact output of the previous component in the flow as the input to this component. - - **Processing**: This is where the actual processing of the event happens, such as where the AI model would be called to process the event. This is the only required part of the component. +- **Input Queue**: This is a python built queue that the component reads from. It is where the events are buffered as they flow through the system. Note that if there are multiple instances of the same component, they will all read from the same queue. - After these steps, the component will write the result to the next component's queue. The data written should conform to the output schema of the component. Some components are output components and will send the data to a Solace broker or other data sink. +- **Input Transforms**: This is an optional step that allows you to transform the event before it is processed. This can be useful for normalizing the data or for adding additional context to the event. In the yaml config file this is indicated by the **`input_transforms`** key. +- **Input Selection**: This selects what data should be processed by the component. The data selected should conform to the input schema of the component. It is normal to use Input Transforms and Input Selection together to ensure that the data is in the correct format for the component. If the Input Selection configuration is omitted, the component will select **"previous" as the default**, which will take the exact output of the previous component in the flow as the input to this component. In the yaml config file this is indicated by the **`input_selection`** key. -### Iterate and Aggregate Components +- **Processing**: This is where the actual processing of the event happens, such as where the AI model would be called to process the event. This is the only required part of the component. In the yaml config file this is controlled by the **`component_module`** and **`component_config`** keys. -In addition to the standard components, there are two special components that can be used to iterate over a list of events and to aggregate a list of events. These components can be used to process multiple events in a single component. This can be useful for batch processing or for aggregating events together before processing them. +After these steps, the component will write the result to the next component's queue. The data written should conform to the output schema of the component. Some components are output components and will send the data to a Solace broker or other data sink. -The Iterate component will take a list of events and enqueue each one individually to the next component. The Aggregate component will take a sequence of events and combine them into a single event before enqueuing it to the next component in the flow so that it can perform batch processing. - -Example usage of the Iterate and Aggregate components can be found in the [Usage](usage.md) section. +### Built-in Components +In addition to the standard components, there are a series of other built-in components that can be used to help process events. You can find a list of all built-in components in the [Components](components/index.md) section. ## Configuration @@ -63,3 +67,7 @@ The AI Event Connector is designed to be resilient and fault-tolerant. It uses q The AI Event Connector is designed to be scalable. You can increase the number of instances of a component to handle more load. This allows you to scale your pipelines to handle more events and process them faster. Additionally, you can run multiple flows or even multiple connectors that connect to the same broker queue to handle more events in parallel. Note that for Solace broker queues, they must be configured to be non-exclusive to have multiple flows receive messages. + +--- + +Checkout [Getting Started](getting_started.md) next. diff --git a/docs/tips_and_tricks.md b/docs/tips_and_tricks.md new file mode 100644 index 00000000..221ae1dc --- /dev/null +++ b/docs/tips_and_tricks.md @@ -0,0 +1,153 @@ +# Some tips and tricks for using the Solace AI Connector + +- [Some tips and tricks for using the Solace AI Connector](#some-tips-and-tricks-for-using-the-solace-ai-connector) + - [Using `user_data` as temporary storage](#using-user_data-as-temporary-storage) + - [Using custom modules with the AI Connector](#using-custom-modules-with-the-ai-connector) + + +## Using `user_data` as temporary storage + +Some times you might need to chain multiple transforms together, but transforms do not support nesting. For example if you'd want to `map` through a list of strings first and then reduce them to a single string, you can write your transforms sequentially and write to a temporary place in `user_data` like `user_data.temp`: + +For example: + +```yaml + input_transforms: + # Transform each response to use the template + - type: map + source_list_expression: previous + source_expression: | + template: + {{text://item:content}} + \n + dest_list_expression: user_data.temp:responses # Temporary storage + + # Transform and reduce the responses to one message + - type: reduce + source_list_expression: user_data.temp:responses # Using the value in temporary storage + source_expression: item + initial_value: "" + accumulator_function: + invoke: + module: invoke_functions + function: add + params: + positional: + - evaluate_expression(keyword_args:accumulated_value) + - evaluate_expression(keyword_args:current_value) + dest_expression: user_data.output:responses # Value to be used in the component + + input_selection: + source_expression: user_data.output +``` + +## Using custom modules with the AI Connector + +This is a simple example that utilizes a custom component, a class based transform and a function based transform. + +First follow the following steps to create a repository to run the ai connector: + +```bash +mkdir -p module-example/src +cd module-example +python3 -m venv env +source env/bin/activate +pip install solace-ai-connector +touch config.yaml src/custom_component.py src/custom_function.py src/__init__.py +``` + +Write the following code to `src/custom_function.py`: + +```python +def custom_function(input_data): + return input_data + " + custom function value" + +class CustomFunctionClass: + def get_custom_value(self, input_data): + return input_data + " + custom function class" +``` + +Write the following code to `src/custom_component.py`: + +```python +from solace_ai_connector.components.component_base import ComponentBase + +info = { + "class_name": "CustomClass", +} + +class CustomClass(ComponentBase): + def __init__(self, **kwargs): + super().__init__(info, **kwargs) + + def invoke(self, _, data): + return data["text"] + " + custom class" + +``` + +Write the following config to `config.yaml`: + +```yaml +log: + stdout_log_level: INFO + log_file_level: INFO + log_file: solace_ai_connector.log + +flows: + - name: custom_module_flow + components: + # Input from a standard in + - component_name: stdin + component_module: stdin_input + + # Using Custom component + - component_name: custom_component_example + component_base_path: . + component_module: src.custom_component + input_selection: + source_expression: previous + + # Output to a standard out + - component_name: stdout + component_module: stdout_output + # Using custom transforms + input_transforms: + # Instantiating a class and calling its function + - type: copy + source_expression: + invoke: + # Creating an object of the class + object: + invoke: + path: . + module: src.custom_function + function: CustomFunctionClass + # Calling the function of the class + function: get_custom_value + params: + positional: + - source_expression(previous) + dest_expression: user_data.output:class + # Calling a function directly + - type: copy + source_expression: + invoke: + module: src.custom_function + function: custom_function + params: + positional: + - source_expression(previous) + dest_expression: user_data.output:function + component_input: + source_expression: user_data.output +``` + +Then run the AI connector with the following command: + +```bash +solace-ai-connector config.yaml +``` + +--- + +Find more examples in the [examples](../examples/) directory. diff --git a/docs/transforms/filter.md b/docs/transforms/filter.md index 310bba93..bd88f061 100644 --- a/docs/transforms/filter.md +++ b/docs/transforms/filter.md @@ -8,7 +8,7 @@ In the filter function, you have access to the following keyword arguments: * current_value: The value of the current item in the source list * source_list: The source list -These should be accessed using `source_expression(keyword_args:)`. For example, `source_expression(keyword_args:current_value)`. See the example below for more detail. +These should be accessed using `evaluate_expression(keyword_args:)`. For example, `evaluate_expression(keyword_args:current_value)`. See the example below for more detail. ## Configuration Parameters @@ -46,7 +46,7 @@ input_transforms: function: greater_than params: positional: - - source_expression(keyword_args:current_value.my_val) + - evaluate_expression(keyword_args:current_value.my_val) - 2 dest_expression: user_data.output:new_list ``` diff --git a/docs/transforms/map.md b/docs/transforms/map.md index f6f56b80..90d0d41f 100644 --- a/docs/transforms/map.md +++ b/docs/transforms/map.md @@ -6,7 +6,7 @@ This is a map transform where a list is iterated over. For each item, it is poss * current_value: The value of the current item in the source list * source_list: The source list -These should be accessed using `source_expression(keyword_args:)`. For example, `source_expression(keyword_args:current_value)`. See the example below for more detail. +These should be accessed using `evaluate_expression(keyword_args:)`. For example, `evaluate_expression(keyword_args:current_value)`. See the example below for more detail. ## Configuration Parameters @@ -44,9 +44,9 @@ input_transforms: function: add params: positional: - - source_expression(keyword_args:current_value) + - evaluate_expression(keyword_args:current_value) - 2 - dest_expression: user_data.output:new_list + dest_list_expression: user_data.output:new_list ``` This transform would take a payload like this: diff --git a/docs/transforms/reduce.md b/docs/transforms/reduce.md index e23dc7fd..5401308d 100644 --- a/docs/transforms/reduce.md +++ b/docs/transforms/reduce.md @@ -9,7 +9,7 @@ In the accumulator function, you have access to the following keyword arguments: * current_value: The value of the current item in the source list * source_list: The source list -These should be accessed using `source_expression(keyword_args:)`. For example, `source_expression(keyword_args:current_value)`. See the example below for more detail. +These should be accessed using `evaluate_expression(keyword_args:)`. For example, `evaluate_expression(keyword_args:current_value)`. See the example below for more detail. ## Configuration Parameters @@ -48,8 +48,8 @@ input_transforms: function: add params: positional: - - source_expression(keyword_args:accumulated_value) - - source_expression(keyword_args:current_value) + - evaluate_expression(keyword_args:accumulated_value) + - evaluate_expression(keyword_args:current_value) dest_expression: user_data.output:my_obj.sum ``` This transform would take a payload like this: diff --git a/docs/usage.md b/docs/usage.md deleted file mode 100644 index 7abe92f2..00000000 --- a/docs/usage.md +++ /dev/null @@ -1,85 +0,0 @@ -## Selecting Data - -Within the configuration, it is necessary to select data for processing. For example, this happens -in the `component_input` section of the configuration or for the source data for input transforms. -The selection of data uses a simple expression language that allows you to select data from the -input message. - -The details of the expression language can be found in the [Configuration](configuration.md#expression-syntax) page in the Expression Syntax section. The expression language allows for the detailed selection of data from the input message or for the -creation of new data. It even supports filling a template with data from the input message as described in detail in the next section. - -### Selecting Data by Filling Templates - -As part of the data selection expressions, it is possible to provide a full template string that will be filled with data from the input message. This is very useful for components that are interacting with Large Language Models (LLMs) or other AI models that typically take large amounts of text with some additional metadata sprinkled in. - -Here is an example configuration that uses a template to provide data for a component: - -```yaml - - component_name: template_example - component_module: some_llm_component - component_input: - # Take the text field from the message and use it as the input to the component - source_expression: | - template:You are a helpful assistant who is an expert in animal husbandry. I would like you to answer this - question by using the information following the question. Make sure to include the links to the data sources - that you used to answer the question. - - Question: {{input.payload:question}} - - Context: {{user_data.vector_store_results:results}} - -``` - -In this example, a previous component did a vector store lookup on the question to get some context data. -Those results in addition to the original question are used to fill in the template for the LLM component. - -Since this application usings `pyyaml`, it is possible to use the `!include` directive to include the template from -a file. This can be useful for very large templates or for templates that are shared across multiple components. - -## Built-in Components - -### Aggregating Messages - -The AI Event Connector has a special component called the `Aggregate` component that can be used to combine multiple events into a single event. This can be useful for batch processing or for aggregating events together before processing them. The `Aggregate` component will take a sequence of events and combine them into a single event before enqueuing it to the next component in the flow so that it can perform batch processing. - -The `Aggregate` component has the following configuration options: - - max_items: The maximum number of items to aggregate before sending the data to the next component - - max_time_ms: The maximum time to wait (in milliseconds) before sending the data to the next component - - -Example Configuration: - -```yaml - - component_name: aggretator_example - component_module: aggregate - component_config: - # The maximum number of items to aggregate before sending the data to the next component - max_items: 3 - # The maximum time to wait before sending the data to the next component - max_time_ms: 1000 - component_input: - # Take the text field from the message and use it as the input to the aggregator - source_expression: input.payload:text -``` - - -### Iterating Over Messages - -The AI Event Connector has a special component called the `Iterate` component that can be used to iterate over a list within one message to create many messages for the next component. - -There is no specific configuration for the Iterate component other than the normal component_input configuration. That source must select a list of items to iterate over. - -Example Configuration: - -```yaml - - component_name: iterate_example - component_module: iterate - component_config: - component_input: - # Take the list field from the message and use it as the input to the iterator - source_expression: input.payload:embeddings -``` - -**Note: For a list of all built-in components, see the [Components](components/index.md) documentation.** - -In addition to these, you also can create your own custom components. \ No newline at end of file diff --git a/examples/ack_test.yaml b/examples/ack_test.yaml index 278f9f04..08314aa9 100644 --- a/examples/ack_test.yaml +++ b/examples/ack_test.yaml @@ -47,7 +47,7 @@ flows: - type: copy source_expression: input.payload dest_expression: user_data.temp:text - component_input: + input_selection: source_expression: user_data.temp:text - component_name: solace_sw_broker @@ -55,8 +55,6 @@ flows: component_config: <<: *broker_connection payload_format: json - component_input: - source_expression: user_data.output input_transforms: - type: copy source_expression: input.payload @@ -73,5 +71,5 @@ flows: - type: copy source_expression: user_data.temp dest_expression: user_data.output:user_properties - component_input: + input_selection: source_expression: user_data.output diff --git a/examples/anthropic_bedrock.yaml b/examples/anthropic_bedrock.yaml index 1f3c60a9..7c35bb60 100644 --- a/examples/anthropic_bedrock.yaml +++ b/examples/anthropic_bedrock.yaml @@ -75,7 +75,7 @@ flows: - type: copy source_expression: static:user dest_expression: user_data.llm_input:messages.0.role - component_input: + input_selection: source_expression: user_data.llm_input - component_name: solace_sw_broker @@ -96,5 +96,5 @@ flows: - type: copy source_expression: template:response/{{text://input.topic}} dest_expression: user_data.output:topic - component_input: + input_selection: source_expression: user_data.output diff --git a/examples/assembly_inputs.yaml b/examples/assembly_inputs.yaml index 5a4feb43..b215cd68 100644 --- a/examples/assembly_inputs.yaml +++ b/examples/assembly_inputs.yaml @@ -52,7 +52,7 @@ flows: assemble_key: id max_items: 3 max_time_ms: 10000 - component_input: + input_selection: source_expression: input.payload # Send assembled messages back to broker @@ -70,5 +70,5 @@ flows: - type: copy source_expression: template:{{text://input.topic}}/assembled dest_expression: user_data.output:topic - component_input: + input_selection: source_expression: user_data.output diff --git a/examples/chat_model_with_history.yaml b/examples/chat_model_with_history.yaml index c889ea56..820d232d 100644 --- a/examples/chat_model_with_history.yaml +++ b/examples/chat_model_with_history.yaml @@ -50,7 +50,7 @@ flows: - type: copy source_value: user dest_expression: user_data.temp:messages.1.role - component_input: + input_selection: source_expression: user_data.temp - component_name: stdout diff --git a/examples/error_handler.yaml b/examples/error_handler.yaml index 416d1bcd..a8c700e6 100644 --- a/examples/error_handler.yaml +++ b/examples/error_handler.yaml @@ -51,7 +51,7 @@ flows: - type: copy source_value: error_log.log dest_expression: user_data.log:file_path - component_input: + input_selection: source_expression: user_data.log - component_name: solace_sw_broker component_module: broker_output @@ -68,7 +68,7 @@ flows: - type: copy source_expression: input.user_properties dest_expression: user_data.output:user_properties - component_input: + input_selection: source_expression: user_data.output @@ -95,7 +95,7 @@ flows: - type: copy source_expression: input.payload dest_expression: user_data.temp:text - component_input: + input_selection: source_expression: user_data.temp:text - component_name: solace_sw_broker @@ -103,8 +103,6 @@ flows: component_config: <<: *broker_connection payload_format: json - component_input: - source_expression: user_data.output input_transforms: - type: copy source_expression: input.payload @@ -116,7 +114,7 @@ flows: function: power params: positional: - - source_expression(input.payload:value) # This will throw an error if value is not a number + - evaluate_expression(input.payload:value) # This will throw an error if value is not a number - 2 dest_expression: user_data.output:payload.valueSquared - type: copy @@ -131,5 +129,5 @@ flows: - type: copy source_expression: user_data.temp dest_expression: user_data.output:user_properties - component_input: + input_selection: source_expression: user_data.output diff --git a/examples/llm/anthropic_chat.yaml b/examples/llm/anthropic_chat.yaml index 5bc1c164..76fe7ef0 100644 --- a/examples/llm/anthropic_chat.yaml +++ b/examples/llm/anthropic_chat.yaml @@ -77,7 +77,7 @@ flows: - type: copy source_expression: static:user dest_expression: user_data.llm_input:messages.0.role - component_input: + input_selection: source_expression: user_data.llm_input # Send response back to broker @@ -95,5 +95,5 @@ flows: - type: copy source_expression: template:{{text://input.topic}}/response dest_expression: user_data.output:topic - component_input: + input_selection: source_expression: user_data.output diff --git a/examples/llm/bedrock_anthropic_chat.yaml b/examples/llm/bedrock_anthropic_chat.yaml index 879bb03b..421ce428 100644 --- a/examples/llm/bedrock_anthropic_chat.yaml +++ b/examples/llm/bedrock_anthropic_chat.yaml @@ -59,7 +59,7 @@ flows: - type: copy source_expression: static:user dest_expression: user_data.llm_input:messages.0.role - component_input: + input_selection: source_expression: user_data.llm_input # diff --git a/examples/llm/langchain_openai_with_history_chat.yaml b/examples/llm/langchain_openai_with_history_chat.yaml index 639567f7..bd922d3c 100644 --- a/examples/llm/langchain_openai_with_history_chat.yaml +++ b/examples/llm/langchain_openai_with_history_chat.yaml @@ -76,7 +76,7 @@ flows: - type: copy source_value: user dest_expression: user_data.input:messages.0.role - component_input: + input_selection: source_expression: user_data.input # Send response back to broker @@ -94,5 +94,5 @@ flows: - type: copy source_expression: template:{{text://input.topic}}/response dest_expression: user_data.output:topic - component_input: + input_selection: source_expression: user_data.output diff --git a/examples/llm/mixture_of_agents.yaml b/examples/llm/mixture_of_agents.yaml index 27af1f6f..d71f46c1 100644 --- a/examples/llm/mixture_of_agents.yaml +++ b/examples/llm/mixture_of_agents.yaml @@ -83,7 +83,7 @@ shared_config: - type: copy source_expression: template:{{text://input.topic}}/next dest_expression: user_data.output:topic - component_input: + input_selection: source_expression: user_data.output # Agent input transformations @@ -99,7 +99,7 @@ shared_config: - type: copy source_expression: static:user dest_expression: user_data.llm_input:messages.0.role - component_input: + input_selection: source_expression: user_data.llm_input flows: @@ -141,7 +141,7 @@ flows: function: if_else params: positional: - - source_expression(input.payload:layer_number) + - evaluate_expression(input.payload:layer_number) - invoke: # Add 1 to the layer number module: invoke_functions @@ -154,7 +154,7 @@ flows: function: or_op params: positional: - - source_expression(input.payload:layer_number) + - evaluate_expression(input.payload:layer_number) - 0 - 1 # No layer number, set to 1 @@ -169,8 +169,8 @@ flows: function: if_else params: positional: - - source_expression(input.payload:id) - - source_expression(input.payload:id) + - evaluate_expression(input.payload:id) + - evaluate_expression(input.payload:id) - invoke: module: invoke_functions function: uuid @@ -181,7 +181,7 @@ flows: source_value: moa/broadcast dest_expression: user_data.output:topic - component_input: + input_selection: source_expression: user_data.output # Agent 1 - Google Vertex AI @@ -213,7 +213,7 @@ flows: - type: copy source_expression: previous dest_expression: user_data.formatted_response:content - component_input: + input_selection: source_expression: user_data.formatted_response # Broker output for Vertex AI @@ -245,7 +245,7 @@ flows: - type: copy source_expression: previous:content dest_expression: user_data.formatted_response:content - component_input: + input_selection: source_expression: user_data.formatted_response # Broker output for OpenAI @@ -280,7 +280,7 @@ flows: - type: copy source_expression: previous dest_expression: user_data.formatted_response:content - component_input: + input_selection: source_expression: user_data.formatted_response # Broker output for Anthropic @@ -307,7 +307,7 @@ flows: assemble_key: id max_time_ms: 30000 max_items: 3 # Number of Agents - component_input: + input_selection: source_expression: input.payload # Format response for the LLM request @@ -345,10 +345,10 @@ flows: function: add params: positional: - - source_expression(keyword_args:accumulated_value) - - source_expression(keyword_args:current_value) + - evaluate_expression(keyword_args:accumulated_value) + - evaluate_expression(keyword_args:current_value) dest_expression: user_data.aggregated_data:responses - component_input: + input_selection: source_expression: user_data.aggregated_data # Aggregate all the outcomes from the agents @@ -377,7 +377,7 @@ flows: - type: copy source_expression: static:user dest_expression: user_data.llm_input:messages.0.role - component_input: + input_selection: source_expression: user_data.llm_input - component_name: aggregator_output @@ -425,10 +425,10 @@ flows: function: less_than params: positional: - - source_expression(user_data.aggregated_data:layer_number, int) + - evaluate_expression(user_data.aggregated_data:layer_number, int) - ${NUMBER_OF_MOA_LAYERS} - - source_expression(user_data.temp:new_query) - - source_expression(previous:content) + - evaluate_expression(user_data.temp:new_query) + - evaluate_expression(previous:content) dest_expression: user_data.output:payload.query # Copy the response topic based on layer number - type: copy @@ -445,12 +445,12 @@ flows: function: less_than params: positional: - - source_expression(user_data.aggregated_data:layer_number, int) + - evaluate_expression(user_data.aggregated_data:layer_number, int) - ${NUMBER_OF_MOA_LAYERS} - moa/question/aggregate - moa/question/cleanup dest_expression: user_data.output:topic - component_input: + input_selection: source_expression: user_data.output # Cleanup the responses from the assembly and send to the user @@ -482,5 +482,5 @@ flows: - type: copy source_value: moa/question/response dest_expression: user_data.output:topic - component_input: + input_selection: source_expression: user_data.output diff --git a/examples/llm/openai_chat.yaml b/examples/llm/openai_chat.yaml index 4c3f7538..038903cf 100644 --- a/examples/llm/openai_chat.yaml +++ b/examples/llm/openai_chat.yaml @@ -74,7 +74,7 @@ flows: - type: copy source_expression: static:user dest_expression: user_data.llm_input:messages.0.role - component_input: + input_selection: source_expression: user_data.llm_input # Send response back to broker @@ -92,5 +92,5 @@ flows: - type: copy source_expression: template:{{text://input.topic}}/response dest_expression: user_data.output:topic - component_input: + input_selection: source_expression: user_data.output diff --git a/examples/llm/vertexai_chat.yaml b/examples/llm/vertexai_chat.yaml index 16201b84..19e77ece 100644 --- a/examples/llm/vertexai_chat.yaml +++ b/examples/llm/vertexai_chat.yaml @@ -78,7 +78,7 @@ flows: - type: copy source_expression: static:user dest_expression: user_data.llm_input:messages.0.role - component_input: + input_selection: source_expression: user_data.llm_input # Send response back to broker @@ -96,5 +96,5 @@ flows: - type: copy source_expression: template:{{text://input.topic}}/response dest_expression: user_data.output:topic - component_input: + input_selection: source_expression: user_data.output diff --git a/examples/milvus_store.yaml b/examples/milvus_store.yaml index 4c6211b3..c1b707c9 100644 --- a/examples/milvus_store.yaml +++ b/examples/milvus_store.yaml @@ -58,7 +58,7 @@ flows: - type: copy source_expression: input.payload:text dest_expression: user_data.vector_input:texts - component_input: + input_selection: source_expression: user_data.vector_input - component_name: milvus_cohere_embed_search @@ -78,7 +78,7 @@ flows: region_name: ${AWS_BEDROCK_COHERE_EMBED_REGION} credentials_profile_name: default # Profile name in ~/.aws/credentials max_results: 5 - component_input: + input_selection: source_expression: input.payload - component_name: stdout diff --git a/examples/request_reply.yaml b/examples/request_reply.yaml index 97552036..3cdae477 100644 --- a/examples/request_reply.yaml +++ b/examples/request_reply.yaml @@ -41,7 +41,7 @@ flows: - type: copy source_value: request/topic dest_expression: user_data.request:topic - component_input: + input_selection: source_expression: user_data.request - component_name: stdout @@ -63,7 +63,7 @@ flows: - type: copy source_expression: input.payload dest_expression: user_data.reply:payload.wrapper - component_input: + input_selection: source_expression: user_data.reply:payload - component_name: broker_output @@ -80,6 +80,6 @@ flows: - type: copy source_expression: input.user_properties:__solace_ai_connector_broker_request_reply_topic__ dest_expression: user_data.output:topic - component_input: + input_selection: source_expression: user_data.output diff --git a/examples/vector_store_search.yaml b/examples/vector_store_search.yaml index ca4b3f8b..3316d8c8 100644 --- a/examples/vector_store_search.yaml +++ b/examples/vector_store_search.yaml @@ -108,7 +108,7 @@ flows: model_id: ${AWS_BEDROCK_COHERE_EMBED_MODEL_ID} region_name: ${AWS_BEDROCK_COHERE_EMBED_REGION} max_results: 7 - component_input: + input_selection: source_expression: input.payload - component_name: stdout diff --git a/src/solace_ai_connector/common/utils.py b/src/solace_ai_connector/common/utils.py index c04a0ca8..003e2ff7 100644 --- a/src/solace_ai_connector/common/utils.py +++ b/src/solace_ai_connector/common/utils.py @@ -82,7 +82,7 @@ def resolve_config_values(config, allow_source_expression=False): log.debug("Resolved config value to %s", config) return config for key, value in config.items(): - # If the key is source_expression, we sub config to use the 'source_expression()' value in + # If the key is source_expression, we sub config to use the 'evaluate_expression()' value in # invoke parameters config[key] = resolve_config_values( value, @@ -202,12 +202,13 @@ def call_function(function, params, allow_source_expression): have_lambda = False if positional: for index, value in enumerate(positional): - if isinstance(value, str) and value.startswith("source_expression("): + # source_expression check for backwards compatibility + if isinstance(value, str) and (value.startswith("evaluate_expression(") or value.startswith("source_expression(")): # if not allow_source_expression: # raise ValueError( - # "source_expression() is not allowed in this context" + # "evaluate_expression() is not allowed in this context" # ) - (expression, data_type) = extract_source_expression(value) + (expression, data_type) = extract_evaluate_expression(value) positional[index] = create_lambda_function_for_source_expression( expression, data_type=data_type ) @@ -216,12 +217,13 @@ def call_function(function, params, allow_source_expression): have_lambda = True if keyword: for key, value in keyword.items(): - if isinstance(value, str) and value.startswith("source_expression("): + # source_expression check for backwards compatibility + if isinstance(value, str) and (value.startswith("evaluate_expression(") or value.startswith("source_expression(")): if not allow_source_expression: raise ValueError( - "source_expression() is not allowed in this context" + "evaluate_expression() is not allowed in this context" ) - (expression, data_type) = extract_source_expression(value) + (expression, data_type) = extract_evaluate_expression(value) keyword[key] = create_lambda_function_for_source_expression( expression, data_type=data_type ) @@ -250,16 +252,20 @@ def install_package(package_name): subprocess.run(["pip", "install", package_name], check=True) -def extract_source_expression(se_call): - # First remove the source_expression( and the trailing ) +def extract_evaluate_expression(se_call): + # First remove the evaluate_expression( and the trailing ) # Account for possible whitespace - expression = se_call.split("source_expression(")[1].split(")")[0].strip() + if (se_call.startswith("evaluate_expression(")): + expression = se_call.split("evaluate_expression(")[1].split(")")[0].strip() + else: + # For backwards compatibility + expression = se_call.split("source_expression(")[1].split(")")[0].strip() data_type = None if "," in expression: (expression, data_type) = re.split(r"\s*,\s*", expression) if not expression: - raise ValueError("source_expression() must contain an expression") + raise ValueError("evaluate_expression() must contain an expression") return (expression, data_type) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 8fc62701..bd4c52c4 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -173,10 +173,12 @@ def get_acknowledgement_callback(self): return None def get_input_data(self, message): - component_input = self.config.get("component_input") or { - "source_expression": "previous" - } - source_expression = get_source_expression(component_input) + input_selection = ( + self.config.get("input_selection") + or self.config.get("component_input") + or {"source_expression": "previous"} + ) + source_expression = get_source_expression(input_selection) # This should be overridden by the component if it needs to extract data from the message return message.get_data(source_expression, self) @@ -216,7 +218,7 @@ def get_config(self, key=None, default=None): if self.current_message is None: raise ValueError( f"Component {self.log_identifier} is trying to use an `invoke` config " - "that contains a 'source_expression()' in a context that does not " + "that contains a 'evaluate_expression()' in a context that does not " "have a message available. This is likely a bug in the " "component's configuration." ) diff --git a/src/solace_ai_connector/components/general/aggregate.py b/src/solace_ai_connector/components/general/aggregate.py index ef517734..3a7b33c2 100644 --- a/src/solace_ai_connector/components/general/aggregate.py +++ b/src/solace_ai_connector/components/general/aggregate.py @@ -8,7 +8,11 @@ "class_name": "Aggregate", "description": "Take multiple messages and aggregate them into one. " "The output of this component is a list of the exact structure " - "of the input data.", + "of the input data.\n" + "This can be useful for batch processing or for aggregating events " + "together before processing them. The Aggregate component will take a " + "sequence of events and combine them into a single event before enqueuing " + "it to the next component in the flow so that it can perform batch processing.", "short_description": "Aggregate messages into one message.", "config_parameters": [ { @@ -38,6 +42,21 @@ "type": "object", }, }, + "example_config": """ +```yaml + - component_name: aggretator_example + component_module: aggregate + component_config: + # The maximum number of items to aggregate before sending the data to the next component + max_items: 3 + # The maximum time to wait before sending the data to the next component + max_time_ms: 1000 + input_selection: + # Take the text field from the message and use it as the input to the aggregator + source_expression: input.payload:text +``` + +""", } @@ -50,7 +69,7 @@ def __init__(self, **kwargs): self.max_items = self.get_config("max_items") def invoke(self, message, data): - # The passed in data is the date specified by component_input + # The passed in data is the date specified by input_selection # from the config file if self.current_aggregation is None: self.current_aggregation = self.start_new_aggregation() diff --git a/src/solace_ai_connector/components/general/iterate.py b/src/solace_ai_connector/components/general/iterate.py index e0af98e8..43e9588c 100644 --- a/src/solace_ai_connector/components/general/iterate.py +++ b/src/solace_ai_connector/components/general/iterate.py @@ -20,6 +20,16 @@ "type": "object", "properties": {}, }, + "example_config": """ +```yaml + - component_name: iterate_example + component_module: iterate + component_config: + input_selection: + # Take the list field from the message and use it as the input to the iterator + source_expression: input.payload:embeddings +``` +""", } diff --git a/src/solace_ai_connector/components/inputs_outputs/error_input.py b/src/solace_ai_connector/components/inputs_outputs/error_input.py index 88a68289..62720134 100644 --- a/src/solace_ai_connector/components/inputs_outputs/error_input.py +++ b/src/solace_ai_connector/components/inputs_outputs/error_input.py @@ -15,7 +15,7 @@ "class_name": "ErrorInput", "description": ( "Receive processing errors from the Solace AI Event Connector. Note that " - "the component_input configuration is ignored. " + "the input_selection configuration is ignored. " "This component should be used to create a flow that handles errors from other flows. " ), "config_parameters": [ diff --git a/src/solace_ai_connector/services/cache_service.py b/src/solace_ai_connector/services/cache_service.py index 19f08bbd..0b0ff3f0 100644 --- a/src/solace_ai_connector/services/cache_service.py +++ b/src/solace_ai_connector/services/cache_service.py @@ -5,8 +5,7 @@ from typing import Any, Optional, Dict, Tuple from threading import Lock from sqlalchemy import create_engine, Column, String, Float, LargeBinary -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import sessionmaker +from sqlalchemy.orm import declarative_base, sessionmaker from ..common.event import Event, EventType from ..common.log import log diff --git a/src/solace_ai_connector/transforms/filter.py b/src/solace_ai_connector/transforms/filter.py index a1e17c7c..3f7fa90b 100644 --- a/src/solace_ai_connector/transforms/filter.py +++ b/src/solace_ai_connector/transforms/filter.py @@ -13,8 +13,8 @@ " * index: The index of the current item in the source list\n" " * current_value: The value of the current item in the source list\n" " * source_list: The source list\n\n" - "These should be accessed using `source_expression(keyword_args:)`. " - "For example, `source_expression(keyword_args:current_value)`. " + "These should be accessed using `evaluate_expression(keyword_args:)`. " + "For example, `evaluate_expression(keyword_args:current_value)`. " "See the example below for more detail." ), "short_description": "Filter a list based on a filter function", @@ -62,7 +62,7 @@ function: greater_than params: positional: - - source_expression(keyword_args:current_value.my_val) + - evaluate_expression(keyword_args:current_value.my_val) - 2 dest_expression: user_data.output:new_list ``` diff --git a/src/solace_ai_connector/transforms/map.py b/src/solace_ai_connector/transforms/map.py index faf7359a..b09865ea 100644 --- a/src/solace_ai_connector/transforms/map.py +++ b/src/solace_ai_connector/transforms/map.py @@ -17,8 +17,8 @@ " * index: The index of the current item in the source list\n" " * current_value: The value of the current item in the source list\n" " * source_list: The source list\n\n" - "These should be accessed using `source_expression(keyword_args:)`. " - "For example, `source_expression(keyword_args:current_value)`. " + "These should be accessed using `evaluate_expression(keyword_args:)`. " + "For example, `evaluate_expression(keyword_args:current_value)`. " "See the example below for more detail." ), "short_description": ( @@ -69,9 +69,9 @@ function: add params: positional: - - source_expression(keyword_args:current_value) + - evaluate_expression(keyword_args:current_value) - 2 - dest_expression: user_data.output:new_list + dest_list_expression: user_data.output:new_list ``` This transform would take a payload like this: diff --git a/src/solace_ai_connector/transforms/reduce.py b/src/solace_ai_connector/transforms/reduce.py index 681e0415..de08210b 100644 --- a/src/solace_ai_connector/transforms/reduce.py +++ b/src/solace_ai_connector/transforms/reduce.py @@ -14,8 +14,8 @@ " * accumulated_value: The current accumulated value\n" " * current_value: The value of the current item in the source list\n" " * source_list: The source list\n\n" - "These should be accessed using `source_expression(keyword_args:)`. " - "For example, `source_expression(keyword_args:current_value)`. " + "These should be accessed using `evaluate_expression(keyword_args:)`. " + "For example, `evaluate_expression(keyword_args:current_value)`. " "See the example below for more detail." ), "short_description": "Reduce a list to a single value", @@ -64,8 +64,8 @@ function: add params: positional: - - source_expression(keyword_args:accumulated_value) - - source_expression(keyword_args:current_value) + - evaluate_expression(keyword_args:accumulated_value) + - evaluate_expression(keyword_args:current_value) dest_expression: user_data.output:my_obj.sum ``` This transform would take a payload like this: diff --git a/tests/test_aggregate.py b/tests/test_aggregate.py index 6cccd411..a288410e 100644 --- a/tests/test_aggregate.py +++ b/tests/test_aggregate.py @@ -28,7 +28,7 @@ def test_aggregate_by_time(): component_config: max_items: 10 max_time_ms: {TIMEOUT_MS} - component_input: + input_selection: source_expression: input.payload """ connector, flows = create_test_flows(config_yaml) @@ -78,7 +78,7 @@ def test_aggregate_by_items(): component_config: max_items: 3 max_time_ms: 1000 - component_input: + input_selection: source_expression: input.payload """ connector, flows = create_test_flows(config_yaml) @@ -126,7 +126,7 @@ def test_both_items_and_time(): component_config: max_items: 3 max_time_ms: {MAX_TIME_MS} - component_input: + input_selection: source_expression: input.payload """ connector, flows = create_test_flows(config_yaml) diff --git a/tests/test_config_file.py b/tests/test_config_file.py index 54bd4a3a..5bd34f73 100644 --- a/tests/test_config_file.py +++ b/tests/test_config_file.py @@ -58,7 +58,7 @@ def test_no_flow_name(): - type: append source_expression: self:component_index dest_expression: user_data.path:my_path - component_input: + input_selection: source_expression: input.payload:text """ SolaceAiConnector( @@ -114,7 +114,7 @@ def test_no_component_name(): - name: test_flow components: - component_module: delay - component_input: + input_selection: source_expression: input.payload:text """ SolaceAiConnector( diff --git a/tests/test_error_flows.py b/tests/test_error_flows.py index a2ee53fa..b8ff4d72 100644 --- a/tests/test_error_flows.py +++ b/tests/test_error_flows.py @@ -43,7 +43,7 @@ def test_basic_error_flow(): component_module: error_input - component_name: pass_through component_module: pass_through - component_input: + input_selection: source_expression: previous:error.text """ connector, flows = create_test_flows(config_yaml) diff --git a/tests/test_filter.py b/tests/test_filter.py index 550114c3..b43b72f8 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -33,7 +33,7 @@ def test_simple_filter(): function: equal params: positional: - - source_expression(input.payload:my_list.1) + - evaluate_expression(input.payload:my_list.1) - 2 """ connector, flows = create_test_flows(config_yaml) @@ -77,7 +77,7 @@ def test_missing_item_filter(): function: not_equal params: positional: - - source_expression(input.payload:my_list) + - evaluate_expression(input.payload:my_list) - null """ connector, flows = create_test_flows(config_yaml) diff --git a/tests/test_flows.py b/tests/test_flows.py index 5500f36c..6196ae1e 100644 --- a/tests/test_flows.py +++ b/tests/test_flows.py @@ -46,7 +46,7 @@ # - type: append # source_expression: self:component_index # dest_expression: user_data.path:my_path -# component_input: +# input_selection: # source_expression: input.payload:text # - component_name: delay2 # component_module: delay @@ -63,7 +63,7 @@ # - type: append # source_expression: self:component_index # dest_expression: user_data.path:my_path -# component_input: +# input_selection: # source_expression: user_data.temp # - component_name: delay3 # component_module: delay @@ -74,7 +74,7 @@ # - type: append # source_expression: self:component_index # dest_expression: user_data.path:my_path -# component_input: +# input_selection: # source_expression: previous # """ # ], @@ -131,13 +131,13 @@ def test_on_flow_creation_event(): components: - component_name: delay1 component_module: delay - component_input: + input_selection: source_expression: input.payload:text - name: test_flow2 components: - component_name: delay2 component_module: delay - component_input: + input_selection: source_expression: input.payload:text """ event_handler_called = False @@ -174,7 +174,7 @@ def test_multiple_flow_instances(): components: - component_name: delay1 component_module: delay - component_input: + input_selection: source_expression: input.payload:text """ # Create the connector diff --git a/tests/test_invoke.py b/tests/test_invoke.py index 48346048..58d0b771 100644 --- a/tests/test_invoke.py +++ b/tests/test_invoke.py @@ -580,8 +580,8 @@ def test_invoke_import_os_module(): ) == {"a": "posix"} -def test_invoke_with_source_expression_simple(): - """Verify that the source expression is evaluated""" +def test_invoke_with_evaluate_expression_simple(): + """Verify that the evaluate expression is evaluated""" config = resolve_config_values( { "source_expression": { @@ -590,8 +590,8 @@ def test_invoke_with_source_expression_simple(): "function": "add", "params": { "positional": [ - "source_expression(input.payload:my_obj.val1)", - "source_expression(input.payload:my_obj.val2)", + "evaluate_expression(input.payload:my_obj.val1)", + "evaluate_expression(input.payload:my_obj.val2)", ], }, }, @@ -603,8 +603,8 @@ def test_invoke_with_source_expression_simple(): assert config == {"source_expression": 3} -def test_invoke_with_source_expression_cast_to_int(): - """Verify that the source expression is evaluated""" +def test_invoke_with_evaluate_expression_cast_to_int(): + """Verify that the evaluate expression is evaluated""" config = resolve_config_values( { "source_expression": { @@ -613,7 +613,7 @@ def test_invoke_with_source_expression_cast_to_int(): "function": "add", "params": { "positional": [ - "source_expression(input.payload:my_obj.val1, int )", + "evaluate_expression(input.payload:my_obj.val1, int )", 2, ], }, @@ -626,8 +626,8 @@ def test_invoke_with_source_expression_cast_to_int(): assert config == {"source_expression": 3} -def test_invoke_with_source_expression_cast_to_float(): - """Verify that the source expression is evaluated""" +def test_invoke_with_evaluate_expression_cast_to_float(): + """Verify that the evaluate expression is evaluated""" config = resolve_config_values( { "source_expression": { @@ -636,7 +636,7 @@ def test_invoke_with_source_expression_cast_to_float(): "function": "add", "params": { "positional": [ - "source_expression(input.payload:my_obj.val1, float )", + "evaluate_expression(input.payload:my_obj.val1, float )", 2, ], }, @@ -659,7 +659,7 @@ def test_invoke_with_source_expression_cast_to_bool(): "function": "and_op", "params": { "positional": [ - "source_expression(input.payload:my_obj.val1 , bool )", + "evaluate_expression(input.payload:my_obj.val1 , bool )", True, ], }, @@ -672,8 +672,8 @@ def test_invoke_with_source_expression_cast_to_bool(): assert config == {"source_expression": True} -def test_invoke_with_source_expression_cast_to_str(): - """Verify that the source expression is evaluated""" +def test_invoke_with_evaluate_expression_cast_to_str(): + """Verify that the evaluate expression is evaluated""" config = resolve_config_values( { "source_expression": { @@ -682,7 +682,7 @@ def test_invoke_with_source_expression_cast_to_str(): "function": "add", "params": { "positional": [ - "source_expression(input.payload:my_obj.val1,str)", + "evaluate_expression(input.payload:my_obj.val1,str)", "2", ], }, @@ -695,8 +695,8 @@ def test_invoke_with_source_expression_cast_to_str(): assert config == {"source_expression": "12"} -def test_invoke_with_source_expression_keyword(): - """Verify that the source expression is evaluated""" +def test_invoke_with_evaluate_expression_keyword(): + """Verify that the evaluate expression is evaluated""" config = resolve_config_values( { "source_value": { @@ -705,8 +705,8 @@ def test_invoke_with_source_expression_keyword(): "function": "_test_keyword_args", "params": { "keyword": { - "x": "source_expression(input.payload:my_obj.val1)", - "y": "source_expression(input.payload:my_obj.val2)", + "x": "evaluate_expression(input.payload:my_obj.val1)", + "y": "evaluate_expression(input.payload:my_obj.val2)", }, }, }, @@ -718,8 +718,8 @@ def test_invoke_with_source_expression_keyword(): assert config == {"source_value": {"x": 1, "y": 2}} -def test_invoke_with_source_expression_complex(): - """Verify that the source expression is evaluated""" +def test_invoke_with_evaluate_expression_complex(): + """Verify that the evaluate expression is evaluated""" config = resolve_config_values( { "source_expression": { @@ -728,22 +728,22 @@ def test_invoke_with_source_expression_complex(): "function": "_test_positional_and_keyword_args", "params": { "positional": [ - "source_expression(input.payload:my_obj.val1)", + "evaluate_expression(input.payload:my_obj.val1)", { "invoke": { "module": "invoke_functions", "function": "add", "params": { "positional": [ - "source_expression(input.payload:my_obj.val2)", + "evaluate_expression(input.payload:my_obj.val2)", { "invoke": { "module": "invoke_functions", "function": "multiply", "params": { "positional": [ - "source_expression(input.payload:my_obj.val2)", - "source_expression(input.payload:my_obj.val2)", + "evaluate_expression(input.payload:my_obj.val2)", + "evaluate_expression(input.payload:my_obj.val2)", ], }, }, @@ -754,15 +754,15 @@ def test_invoke_with_source_expression_complex(): }, ], "keyword": { - "x": "source_expression(input.payload:my_obj.val1)", + "x": "evaluate_expression(input.payload:my_obj.val1)", "y": { "invoke": { "module": "invoke_functions", "function": "subtract", "params": { "positional": [ - "source_expression(input.payload:my_obj.val2)", - "source_expression(input.payload:my_obj.val3)", + "evaluate_expression(input.payload:my_obj.val2)", + "evaluate_expression(input.payload:my_obj.val3)", ], }, }, @@ -778,8 +778,8 @@ def test_invoke_with_source_expression_complex(): assert config == {"source_expression": ((1, 6), {"x": 1, "y": -1})} -def test_invoke_with_source_expression_missing(): - """Verify that the source expression is evaluated""" +def test_invoke_with_evaluate_expression_missing(): + """Verify that the evaluate expression is evaluated""" config = resolve_config_values( { "source_expression": { @@ -788,8 +788,8 @@ def test_invoke_with_source_expression_missing(): "function": "add", "params": { "positional": [ - "source_expression(input.payload:my_obj.val1)", - "source_expression(input.payload:my_obj.val2)", + "evaluate_expression(input.payload:my_obj.val1)", + "evaluate_expression(input.payload:my_obj.val2)", ], }, }, @@ -803,10 +803,10 @@ def test_invoke_with_source_expression_missing(): config["source_expression"] = config["source_expression"](message) -def test_invoke_with_source_expression_no_source_expression(): - """Verify that the source expression is evaluated""" +def test_invoke_with_source_expression_no_evaluate_expression(): + """Verify that the evaluated expression is evaluated""" with pytest.raises( - ValueError, match=r"source_expression\(\) must contain an expression" + ValueError, match=r"evaluate_expression\(\) must contain an expression" ): resolve_config_values( { @@ -816,7 +816,7 @@ def test_invoke_with_source_expression_no_source_expression(): "function": "add", "params": { "positional": [ - "source_expression()", + "evaluate_expression()", 2, ], }, @@ -826,8 +826,8 @@ def test_invoke_with_source_expression_no_source_expression(): ) -def test_invoke_with_source_expression_with_real_flow(): - """Verify that the source expression is evaluated properly in transforms and component_input""" +def test_invoke_with_evaluate_expression_with_real_flow(): + """Verify that the evaluate expression is evaluated properly in transforms and input_selection""" config_yaml = """ instance_name: test_instance log: @@ -846,17 +846,17 @@ def test_invoke_with_source_expression_with_real_flow(): function: add params: positional: - - source_expression(input.payload:my_obj.val1.1) + - evaluate_expression(input.payload:my_obj.val1.1) - 2 dest_expression: user_data.temp:my_val - component_input: + input_selection: source_expression: invoke: module: invoke_functions function: add params: positional: - - source_expression(input.payload:my_obj.obj2) + - evaluate_expression(input.payload:my_obj.obj2) - " test" """ message = Message(payload={"my_obj": {"val1": [1, 2, 3], "obj2": "Hello, World!"}}) @@ -868,7 +868,7 @@ def test_invoke_with_source_expression_with_real_flow(): } # The copy transform assert ( output_message.get_data("previous") == "Hello, World! test" - ) # The component_input + ) # The input_selection def atest_user_processing_component(): @@ -886,7 +886,7 @@ def atest_user_processing_component(): function: add params: positional: - - source_expression(input.payload:my_obj.val1.1) + - evaluate_expression(input.payload:my_obj.val1.1) - 2 """ @@ -919,10 +919,10 @@ def test_reduce_transform_accumulator(): function: add params: positional: - - source_expression(keyword_args:accumulated_value) - - source_expression(keyword_args:current_value) + - evaluate_expression(keyword_args:accumulated_value) + - evaluate_expression(keyword_args:current_value) dest_expression: user_data.temp:my_val - component_input: + input_selection: source_expression: user_data.temp:my_val """ message = Message(payload={"my_list": [1, 2, 3, 4, 5]}) @@ -957,10 +957,10 @@ def test_reduce_transform_make_list(): function: append params: positional: - - source_expression(keyword_args:accumulated_value) - - source_expression(keyword_args:current_value) + - evaluate_expression(keyword_args:accumulated_value) + - evaluate_expression(keyword_args:current_value) dest_expression: user_data.temp:my_val - component_input: + input_selection: source_expression: user_data.temp:my_val """ message = Message(payload={"my_list": [1, 2, 3, 4, 5]}) @@ -991,10 +991,10 @@ def test_map_transform_add_2(): function: add params: positional: - - source_expression(keyword_args:current_value) + - evaluate_expression(keyword_args:current_value) - 2 dest_list_expression: user_data.temp:new_list - component_input: + input_selection: source_expression: user_data.temp:new_list """ message = Message(payload={"my_list": [1, 2, 3, 4, 5]}) @@ -1025,10 +1025,10 @@ def test_filter_transform_greater_than_2(): function: greater_than params: positional: - - source_expression(keyword_args:current_value) + - evaluate_expression(keyword_args:current_value) - 2 dest_list_expression: user_data.temp:new_list - component_input: + input_selection: source_expression: user_data.temp:new_list """ message = Message(payload={"my_list": [1, 2, 3, 4, 5]}) @@ -1059,10 +1059,10 @@ def test_filter_transform_sub_field_greater_than_2(): function: greater_than params: positional: - - source_expression(keyword_args:current_value.my_val) + - evaluate_expression(keyword_args:current_value.my_val) - 2 dest_list_expression: user_data.temp:new_list - component_input: + input_selection: source_expression: user_data.temp:new_list """ message = Message( diff --git a/tests/test_iterate.py b/tests/test_iterate.py index 0dc947b6..a33baccc 100644 --- a/tests/test_iterate.py +++ b/tests/test_iterate.py @@ -26,7 +26,7 @@ def test_small_list(): components: - component_name: iterate component_module: iterate - component_input: + input_selection: source_expression: input.payload:my_list """ connector, flows = create_test_flows(config_yaml) @@ -56,7 +56,7 @@ def test_large_list(): components: - component_name: iterate component_module: iterate - component_input: + input_selection: source_expression: input.payload:my_list """ connector, flows = create_test_flows(config_yaml) diff --git a/tests/test_timer_input.py b/tests/test_timer_input.py index c1935385..343a7d87 100644 --- a/tests/test_timer_input.py +++ b/tests/test_timer_input.py @@ -40,7 +40,7 @@ def test_basic_timer(): module: time function: time dest_expression: user_data.timestamp - component_input: + input_selection: source_expression: user_data.timestamp """ diff --git a/tests/test_transforms.py b/tests/test_transforms.py index d9ed555a..9b001505 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -30,7 +30,7 @@ def test_basic_copy_transform(): - type: copy source_value: "Static Greeting!" dest_expression: user_data.temp:payload.greeting - component_input: + input_selection: source_expression: user_data.temp:payload.text """ @@ -64,7 +64,7 @@ def test_basic_map_transform(): source_expression: item dest_list_expression: user_data.temp:my_list dest_expression: my_obj.full - component_input: + input_selection: source_expression: user_data.temp """ @@ -101,7 +101,7 @@ def test_map_with_index_transform(): source_expression: index dest_list_expression: user_data.temp:my_list dest_expression: my_obj.index - component_input: + input_selection: source_expression: user_data.temp """ @@ -138,7 +138,7 @@ def test_map_with_message_source_expression(): source_expression: input.payload:my_obj.two dest_list_expression: user_data.temp:my_list dest_expression: my_obj.my_obj_two - component_input: + input_selection: source_expression: user_data.temp """ @@ -176,7 +176,7 @@ def test_basic_append_transform(): - type: append source_expression: input.payload:three dest_expression: user_data.temp:my_list - component_input: + input_selection: source_expression: user_data.temp """ @@ -204,7 +204,7 @@ def test_overwrite_non_list_with_list(): - type: append source_expression: input.payload:one dest_expression: user_data.temp:my_list - component_input: + input_selection: source_expression: user_data.temp """ @@ -228,7 +228,7 @@ def test_transform_without_a_type(): input_transforms: - source_expression: input.payload:one dest_expression: user_data.temp:my_list - component_input: + input_selection: source_expression: user_data.temp """ create_connector(config_yaml) @@ -249,7 +249,7 @@ def test_transform_with_unknown_type(): - type: unknown source_expression: input.payload:one dest_expression: user_data.temp:my_list - component_input: + input_selection: source_expression: user_data.temp """ create_connector(config_yaml) @@ -270,7 +270,7 @@ def test_missing_source_expression(): input_transforms: - type: copy dest_expression: user_data.temp:my_list - component_input: + input_selection: source_expression: user_data.temp """ create_connector(config_yaml) @@ -291,7 +291,7 @@ def test_missing_dest_expression(): input_transforms: - type: copy source_expression: input.payload:one - component_input: + input_selection: source_expression: user_data.temp """ create_connector(config_yaml) @@ -314,7 +314,7 @@ def test_source_value_as_an_object(): one: 1 two: 2 dest_expression: user_data.temp:my_obj - component_input: + input_selection: source_expression: user_data.temp """ From 3f339855db057880c70430a38ac2d21bb82a112d Mon Sep 17 00:00:00 2001 From: Cyrus Mobini <68962752+cyrus2281@users.noreply.github.com> Date: Tue, 3 Sep 2024 15:08:49 -0400 Subject: [PATCH 06/26] Fixed solace disconnection issues on shutting down (#30) --- examples/llm/mixture_of_agents.yaml | 3 +++ .../components/inputs_outputs/broker_base.py | 3 +++ src/solace_ai_connector/flow/flow.py | 4 +++- src/solace_ai_connector/main.py | 2 ++ src/solace_ai_connector/solace_ai_connector.py | 8 ++++---- 5 files changed, 15 insertions(+), 5 deletions(-) diff --git a/examples/llm/mixture_of_agents.yaml b/examples/llm/mixture_of_agents.yaml index d71f46c1..dd72e93e 100644 --- a/examples/llm/mixture_of_agents.yaml +++ b/examples/llm/mixture_of_agents.yaml @@ -193,6 +193,7 @@ flows: # Vertex AI LLM Request - component_name: llm_request component_module: langchain_chat_model + num_instances: 3 component_config: langchain_module: langchain_google_vertexai langchain_class: ChatVertexAI @@ -228,6 +229,7 @@ flows: # OpenAI LLM Request - component_name: llm_request component_module: openai_chat_model + num_instances: 3 component_config: api_key: ${OPENAI_API_KEY} base_url: ${OPENAI_API_ENDPOINT} @@ -260,6 +262,7 @@ flows: # Anthropic LLM Request - component_name: llm_request component_module: langchain_chat_model + num_instances: 3 component_config: langchain_module: langchain_anthropic langchain_class: ChatAnthropic diff --git a/src/solace_ai_connector/components/inputs_outputs/broker_base.py b/src/solace_ai_connector/components/inputs_outputs/broker_base.py index 6b91a018..b6fd1137 100644 --- a/src/solace_ai_connector/components/inputs_outputs/broker_base.py +++ b/src/solace_ai_connector/components/inputs_outputs/broker_base.py @@ -59,6 +59,9 @@ def disconnect(self): self.messaging_service.disconnect() self.connected = False + def stop_component(self): + self.disconnect() + def decode_payload(self, payload): encoding = self.get_config("payload_encoding") payload_format = self.get_config("payload_format") diff --git a/src/solace_ai_connector/flow/flow.py b/src/solace_ai_connector/flow/flow.py index c13ea71d..9adfb27e 100644 --- a/src/solace_ai_connector/flow/flow.py +++ b/src/solace_ai_connector/flow/flow.py @@ -1,7 +1,9 @@ """Main class for the flow""" import threading +from typing import List +from ..components.component_base import ComponentBase from ..common.log import log from ..common.utils import import_module @@ -48,7 +50,7 @@ def __init__( ): self.flow_config = flow_config self.flow_index = flow_index - self.component_groups = [] + self.component_groups: List[List[ComponentBase]] = [] self.name = flow_config.get("name") self.module_info = None self.stop_signal = stop_signal diff --git a/src/solace_ai_connector/main.py b/src/solace_ai_connector/main.py index fd785887..0adb4143 100644 --- a/src/solace_ai_connector/main.py +++ b/src/solace_ai_connector/main.py @@ -66,6 +66,8 @@ def main(): app.wait_for_flows() + print("Solace AI Connector exited successfully!") + if __name__ == "__main__": # Read in the configuration yaml filenames from the args diff --git a/src/solace_ai_connector/solace_ai_connector.py b/src/solace_ai_connector/solace_ai_connector.py index 83c3c2f6..f41f50f4 100644 --- a/src/solace_ai_connector/solace_ai_connector.py +++ b/src/solace_ai_connector/solace_ai_connector.py @@ -2,9 +2,9 @@ import threading import queue -import time from datetime import datetime +from typing import List from .common.log import log, setup_log from .common.utils import resolve_config_values from .flow.flow import Flow @@ -18,7 +18,7 @@ class SolaceAiConnector: def __init__(self, config, event_handlers=None, error_queue=None): self.config = config or {} - self.flows = [] + self.flows: List[Flow] = [] self.trace_queue = None self.trace_thread = None self.flow_input_queues = {} @@ -95,8 +95,8 @@ def wait_for_flows(self): break except KeyboardInterrupt: log.info("Received keyboard interrupt - stopping") - self.stop_signal.set() - # sys.exit(0) + self.stop() + self.cleanup() def stop(self): """Stop the Solace AI Event Connector""" From 8c9434f2d7e96f10cb204cefe77cd60509fb22e3 Mon Sep 17 00:00:00 2001 From: Cyrus Mobini <68962752+cyrus2281@users.noreply.github.com> Date: Wed, 4 Sep 2024 12:54:04 -0400 Subject: [PATCH 07/26] Add RAG example for AI connector + delete action for vector index (#31) * Added a RAG example for AI connector * Added delete option to vectordb * Changed id to ids --- .../langchain_vector_store_embedding_index.md | 9 +- ...langchain_vector_store_embedding_search.md | 2 +- examples/llm/openai_chat.yaml | 4 +- examples/llm/openai_chroma_rag.yaml | 183 ++++++++++++++++++ examples/milvus_store.yaml | 4 +- .../langchain_vector_store_embedding_base.py | 6 +- .../langchain_vector_store_embedding_index.py | 41 +++- ...langchain_vector_store_embedding_search.py | 3 +- 8 files changed, 240 insertions(+), 12 deletions(-) create mode 100644 examples/llm/openai_chroma_rag.yaml diff --git a/docs/components/langchain_vector_store_embedding_index.md b/docs/components/langchain_vector_store_embedding_index.md index a9b3c9d1..5566b8b8 100644 --- a/docs/components/langchain_vector_store_embedding_index.md +++ b/docs/components/langchain_vector_store_embedding_index.md @@ -41,13 +41,20 @@ component_config: }, ... - ] + ], + ids: [ + , + ... + ], + action: } ``` | Field | Required | Description | | --- | --- | --- | | texts | True | | | metadatas | False | | +| ids | False | The ID of the text to add to the index. required for 'delete' action | +| action | False | The action to perform on the index from one of 'add', 'delete' | ## Component Output Schema diff --git a/docs/components/langchain_vector_store_embedding_search.md b/docs/components/langchain_vector_store_embedding_search.md index 7baf102c..68c96782 100644 --- a/docs/components/langchain_vector_store_embedding_search.md +++ b/docs/components/langchain_vector_store_embedding_search.md @@ -28,7 +28,7 @@ component_config: | embedding_component_path | True | | The embedding library path - e.g. 'langchain_community.embeddings' | | embedding_component_name | True | | The embedding model to use - e.g. BedrockEmbeddings | | embedding_component_config | True | | Model specific configuration for the embedding model. See documentation for valid parameter names. | -| max_results | True | | The maximum number of results to return | +| max_results | True | 3 | The maximum number of results to return | | combine_context_from_same_source | False | True | Set to False if you don't want to combine all the context from the same source. Default is True | diff --git a/examples/llm/openai_chat.yaml b/examples/llm/openai_chat.yaml index 038903cf..71aee14b 100644 --- a/examples/llm/openai_chat.yaml +++ b/examples/llm/openai_chat.yaml @@ -16,7 +16,7 @@ # required ENV variables: # - OPENAI_API_KEY # - OPENAI_API_ENDPOINT -# - MODEL_NAME +# - OPENAI_MODEL_NAME # - SOLACE_BROKER_URL # - SOLACE_BROKER_USERNAME # - SOLACE_BROKER_PASSWORD @@ -61,7 +61,7 @@ flows: component_config: api_key: ${OPENAI_API_KEY} base_url: ${OPENAI_API_ENDPOINT} - model: ${MODEL_NAME} + model: ${OPENAI_MODEL_NAME} temperature: 0.01 input_transforms: - type: copy diff --git a/examples/llm/openai_chroma_rag.yaml b/examples/llm/openai_chroma_rag.yaml new file mode 100644 index 00000000..f78bfc09 --- /dev/null +++ b/examples/llm/openai_chroma_rag.yaml @@ -0,0 +1,183 @@ +# OpenAI RAG (Retrieval Augmented Generation) example using ChromaDB +# This will create 2 flows like these: +# +# Solace[topic:demo/rag/data] -> embed and store in ChromaDB +# Solace[topic:demo/rag/query] -> search in ChromaDB -> OpenAI -> Solace[topic:demo/rag/query/response] +# +# Load Data: +# Send data to Solace topic `demo/rag/data` with the following payload format: +# { +# "texts": [. , ...] +# } +# +# RAG Query: +# Send query to Solace topic `demo/rag/query` with the following payload format: +# { +# "query": "" +# } +# The response will be sent to Solace topic `demo/rag/query/response` +# +# Dependencies: +# pip install -U langchain_openai openai chromadb langchain-chroma +# +# Required ENV variables: +# - OPENAI_API_KEY +# - OPENAI_API_ENDPOINT +# - OPENAI_EMBEDDING_MODEL_NAME +# - OPENAI_MODEL_NAME +# - SOLACE_BROKER_URL +# - SOLACE_BROKER_USERNAME +# - SOLACE_BROKER_PASSWORD +# - SOLACE_BROKER_VPN + +--- +log: + stdout_log_level: INFO + log_file_level: INFO + log_file: solace_ai_connector.log + +shared_config: + - broker_config: &broker_connection + broker_type: solace + broker_url: ${SOLACE_BROKER_URL} + broker_username: ${SOLACE_BROKER_USERNAME} + broker_password: ${SOLACE_BROKER_PASSWORD} + broker_vpn: ${SOLACE_BROKER_VPN} + +# Data ingestion and augmented inference flows +flows: + # Data ingestion to chromaDB for RAG + - name: chroma_ingest + components: + # Data Input from a Solace broker for ingestion + - component_name: solace_data_input + component_module: broker_input + component_config: + <<: *broker_connection + broker_queue_name: demo_rag_data + broker_subscriptions: + - topic: demo/rag/data + qos: 1 + payload_encoding: utf-8 + payload_format: json + + # Embedding data & ChromaDB ingest + - component_name: chroma_embed + component_module: langchain_vector_store_embedding_index + component_config: + vector_store_component_path: langchain_chroma + vector_store_component_name: Chroma + vector_store_component_config: + persist_directory: ./chroma_data + collection_name: rag + embedding_component_path: langchain_openai + embedding_component_name: OpenAIEmbeddings + embedding_component_config: + api_key: ${OPENAI_API_KEY} + base_url: ${OPENAI_API_ENDPOINT} + model: ${OPENAI_EMBEDDING_MODEL_NAME} + input_transforms: + - type: copy + source_value: topic:demo/rag/data + dest_expression: user_data.vector_input:metadatas.source + - type: copy + source_expression: input.payload:texts + dest_expression: user_data.vector_input:texts + input_selection: + source_expression: user_data.vector_input + + # RAG Inference flow + - name: OpenAI_RAG + components: + # Inference Input from a Solace broker for completion + - component_name: solace_completion_broker + component_module: broker_input + component_config: + <<: *broker_connection + broker_queue_name: demo_rag_query + broker_subscriptions: + - topic: demo/rag/query + qos: 1 + payload_encoding: utf-8 + payload_format: json + + # Retrieve the top-k documents from ChromaDB + - component_name: chroma_search + component_module: langchain_vector_store_embedding_search + component_config: + vector_store_component_path: langchain_chroma + vector_store_component_name: Chroma + vector_store_component_config: + persist_directory: ./chroma_data + collection_name: rag + embedding_component_path: langchain_openai + embedding_component_name: OpenAIEmbeddings + embedding_component_config: + api_key: ${OPENAI_API_KEY} + base_url: ${OPENAI_API_ENDPOINT} + model: ${OPENAI_EMBEDDING_MODEL_NAME} + max_results: 5 + input_transforms: + - type: copy + source_expression: input.payload:query + dest_expression: user_data.vector_input:text + input_selection: + source_expression: user_data.vector_input + + # Generate response using the retrieved data + - component_name: llm_request + component_module: openai_chat_model + component_config: + api_key: ${OPENAI_API_KEY} + base_url: ${OPENAI_API_ENDPOINT} + model: ${OPENAI_MODEL_NAME} + temperature: 0.01 + input_transforms: + # Extract and format the retrieved data + - type: map + source_list_expression: previous:result + source_expression: | + template:{{text://item:text}}\n\n + dest_list_expression: user_data.retrieved_data + + - type: copy + source_expression: | + template:You are a helpful AI assistant. Using the provided context, help with the user's request below. Refrain to use any knowledge outside from the provided context. If the user query can not be answered using the provided context, reject user's query. + + + {{text://user_data.retrieved_data}} + + + + {{text://input.payload:query}} + + dest_expression: user_data.llm_input:messages.0.content + - type: copy + source_expression: static:user + dest_expression: user_data.llm_input:messages.0.role + input_selection: + source_expression: user_data.llm_input + + # Send response back to broker with completion and retrieved data + - component_name: send_response + component_module: broker_output + component_config: + <<: *broker_connection + payload_encoding: utf-8 + payload_format: json + copy_user_properties: true + input_transforms: + - type: copy + source_expression: previous:content + dest_expression: user_data.output:payload.response + - type: copy + source_expression: input.payload:query + dest_expression: user_data.output:payload.query + - type: copy + source_expression: user_data.retrieved_data + dest_expression: user_data.output:payload.retrieved_data + - type: copy + source_expression: template:{{text://input.topic}}/response + dest_expression: user_data.output:topic + input_selection: + source_expression: user_data.output diff --git a/examples/milvus_store.yaml b/examples/milvus_store.yaml index c1b707c9..50c98adb 100644 --- a/examples/milvus_store.yaml +++ b/examples/milvus_store.yaml @@ -51,10 +51,10 @@ flows: invoke: module: platform function: system - dest_expression: user_data.vector_input:metadata.system + dest_expression: user_data.vector_input:metadatas.system - type: copy source_value: username - dest_expression: user_data.vector_input:metadata.user + dest_expression: user_data.vector_input:metadatas.user - type: copy source_expression: input.payload:text dest_expression: user_data.vector_input:texts diff --git a/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_base.py b/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_base.py index f5543ccc..5288fb11 100644 --- a/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_base.py +++ b/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_base.py @@ -78,8 +78,10 @@ def init(self): self.vector_store_info["config"], vector_store_class ) except Exception: # pylint: disable=broad-except - del self.vector_store_info["config"]["embeddings"] - del self.vector_store_info["config"]["embedding_function"] + if "embeddings" in self.vector_store_info["config"]: + del self.vector_store_info["config"]["embeddings"] + if "embedding_function" in self.vector_store_info["config"]: + del self.vector_store_info["config"]["embedding_function"] self.vector_store = vector_store_class.from_texts( [], self.embedding, **self.vector_store_info["config"] ) diff --git a/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_index.py b/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_index.py index 9e41b541..79064eda 100644 --- a/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_index.py +++ b/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_index.py @@ -74,6 +74,18 @@ "type": "object", }, }, + "ids": { + "type": "array", + "items": { + "type": "string", + }, + "description": "The ID of the text to add to the index. required for 'delete' action", + }, + "action": { + "type": "string", + "default": "add", + "description": "The action to perform on the index from one of 'add', 'delete'", + }, }, "required": ["texts"], }, @@ -116,12 +128,35 @@ def invoke(self, message, data): # Get the metadatas if they exist metadatas = data.get("metadatas", None) - args = [texts] if metadatas is not None: if not isinstance(metadatas, list): metadatas = [metadatas] - args.append(metadatas) + # Get the ids if they exist + ids = data.get("ids", None) + if ids is not None: + if not isinstance(ids, list): + ids = [ids] + + action = data.get("action", "add") + match action: + case "add": + return self.add_data(texts, metadatas, ids) + case "delete": + return self.delete_data(ids) + case _: + raise ValueError("Invalid action: {}".format(action)) + + def add_data(self, texts, metadatas=None, ids=None): # Add the texts to the vector store - self.vector_store.add_texts(*args) + args = [texts] + if metadatas is not None: + args.append(metadatas) + self.vector_store.add_texts(*args, ids=ids) + return {"result": "OK"} + + def delete_data(self, ids): + if not ids: + raise ValueError("No IDs provided to delete") + self.vector_store.delete(ids) return {"result": "OK"} diff --git a/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_search.py b/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_search.py index ef179d93..1b974eb0 100644 --- a/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_search.py +++ b/src/solace_ai_connector/components/general/langchain/langchain_vector_store_embedding_search.py @@ -59,6 +59,7 @@ "name": "max_results", "required": True, "description": "The maximum number of results to return", + "default": 3, }, { "name": "combine_context_from_same_source", @@ -92,7 +93,7 @@ def __init__(self, **kwargs): def invoke(self, message, data): text = data["text"] - k = self.get_config("max_results") + k = self.get_config("max_results", 3) combine_context_from_same_source = self.get_config( "combine_context_from_same_source" ) From 3c9b8ba006880b9dccc12456305c81b018089091 Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Wed, 4 Sep 2024 20:19:41 -0400 Subject: [PATCH 08/26] chore: Refactor make_history_start_with_user_message method (#32) Fix the method to not trim the first entry if it is a "system" role --- .../openai/openai_chat_model_with_history.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/solace_ai_connector/components/general/openai/openai_chat_model_with_history.py b/src/solace_ai_connector/components/general/openai/openai_chat_model_with_history.py index cca2e7b1..5fde36fa 100644 --- a/src/solace_ai_connector/components/general/openai/openai_chat_model_with_history.py +++ b/src/solace_ai_connector/components/general/openai/openai_chat_model_with_history.py @@ -132,11 +132,20 @@ def clear_history_but_keep_depth(self, session_id: str, depth: int, history): def make_history_start_with_user_message(self, session_id, history): if session_id in history: - while ( - history[session_id]["messages"] - and history[session_id]["messages"][0]["role"] != "user" - ): - history[session_id]["messages"].pop(0) + messages = history[session_id]["messages"] + if messages: + if messages[0]["role"] == "system": + # Start from the second message if the first is "system" + start_index = 1 + else: + # Start from the first message otherwise + start_index = 0 + + while ( + start_index < len(messages) + and messages[start_index]["role"] != "user" + ): + messages.pop(start_index) def handle_timer_event(self, timer_data): if timer_data["timer_id"] == "history_cleanup": From fc26447bc8b5a1faaabae2e18537589b2560e806 Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Mon, 9 Sep 2024 09:59:10 -0400 Subject: [PATCH 09/26] Keep history depth needs to be a positive integer and test refactor (#33) * chore: Refactor clear_history_but_keep_depth method to handle negative depth values * chore: small change to how this is solved * chore: one more try * refactor: move utils_for_test_files.py to solace_ai_connector module * refactor: removed the orginal utils_for_test_files.py * refactor: update import statements in test files * refactor: add sys.path.append("src") to test files * refactor: standardize import order and sys.path.append in test files * refactor: a bit more test infrastructure changes * feat: allow component_module to accept module objects directly * feat: add types module import to utils.py * test: add static import and object config test * refactor: update test_static_import_and_object_config to use create_test_flows * refactor: Improve test structure and remove duplicate test case * fix: remove duplicate import of yaml module * refactor: Modify test config to use dict instead of YAML string * refactor: convert config_yaml from string to dictionary * refactor: update static import test to use pass_through component * test: Add delay component message passing test * feat: add test for delay component message processing * feat: Added a new test function (test_one_component) to make it very easy to just run some quick tests on a single input -> expected output tests on a single component * feat: added input_transforms to the test_one_component so that input transforms can be tested with it * chore: a bit of cleanup and new tests for test_one_component * chore: rename test_one_component because it was being picked up as a test by the pytest scanner * fix: fixed a typo --- src/solace_ai_connector/common/utils.py | 22 ++--- .../openai/openai_chat_model_with_history.py | 5 ++ src/solace_ai_connector/flow/flow.py | 3 - .../test_utils}/utils_for_test_files.py | 87 +++++++++++++++++-- tests/test_acks.py | 4 +- tests/test_aggregate.py | 5 +- tests/test_config_file.py | 51 ++++++++++- tests/test_error_flows.py | 6 +- tests/test_filter.py | 6 +- tests/test_flows.py | 5 +- tests/test_invoke.py | 18 ++-- tests/test_iterate.py | 6 +- tests/test_message_get_set_data.py | 4 +- tests/test_timer_input.py | 5 +- tests/test_transforms.py | 62 ++++++++++++- 15 files changed, 243 insertions(+), 46 deletions(-) rename {tests => src/solace_ai_connector/test_utils}/utils_for_test_files.py (62%) diff --git a/src/solace_ai_connector/common/utils.py b/src/solace_ai_connector/common/utils.py index 003e2ff7..4996c3b1 100644 --- a/src/solace_ai_connector/common/utils.py +++ b/src/solace_ai_connector/common/utils.py @@ -6,6 +6,7 @@ import re import builtins import subprocess +import types from .log import log @@ -94,8 +95,11 @@ def resolve_config_values(config, allow_source_expression=False): return config -def import_module(name, base_path=None, component_package=None): - """Import a module by name""" +def import_module(module, base_path=None, component_package=None): + """Import a module by name or return the module object if it's already imported""" + + if isinstance(module, types.ModuleType): + return module if component_package: install_package(component_package) @@ -104,14 +108,13 @@ def import_module(name, base_path=None, component_package=None): if base_path not in sys.path: sys.path.append(base_path) try: - module = importlib.import_module(name) - return module + return importlib.import_module(module) except ModuleNotFoundError as exc: # If the module does not have a path associated with it, try # importing it from the known prefixes - annoying that this # is necessary. It seems you can't dynamically import a module # that is listed in an __init__.py file :( - if "." not in name: + if "." not in module: for prefix_prefix in ["solace_ai_connector", "."]: for prefix in [ ".components", @@ -123,22 +126,21 @@ def import_module(name, base_path=None, component_package=None): ".transforms", ".common", ]: - full_name = f"{prefix_prefix}{prefix}.{name}" + full_name = f"{prefix_prefix}{prefix}.{module}" try: if full_name.startswith("."): - module = importlib.import_module( + return importlib.import_module( full_name, package=__package__ ) else: - module = importlib.import_module(full_name) - return module + return importlib.import_module(full_name) except ModuleNotFoundError: pass except Exception as e: raise ImportError( f"Module load error for {full_name}: {e}" ) from e - raise ModuleNotFoundError(f"Module '{name}' not found") from exc + raise ModuleNotFoundError(f"Module '{module}' not found") from exc def invoke_config(config, allow_source_expression=False): diff --git a/src/solace_ai_connector/components/general/openai/openai_chat_model_with_history.py b/src/solace_ai_connector/components/general/openai/openai_chat_model_with_history.py index 5fde36fa..ba7fe646 100644 --- a/src/solace_ai_connector/components/general/openai/openai_chat_model_with_history.py +++ b/src/solace_ai_connector/components/general/openai/openai_chat_model_with_history.py @@ -46,6 +46,11 @@ def __init__(self, **kwargs): def invoke(self, message, data): session_id = data.get("session_id") clear_history_but_keep_depth = data.get("clear_history_but_keep_depth") + try: + if clear_history_but_keep_depth is not None: + clear_history_but_keep_depth = max(0, int(clear_history_but_keep_depth)) + except (TypeError, ValueError): + clear_history_but_keep_depth = 0 messages = data.get("messages", []) with self.get_lock(self.history_key): diff --git a/src/solace_ai_connector/flow/flow.py b/src/solace_ai_connector/flow/flow.py index 9adfb27e..782c7ce8 100644 --- a/src/solace_ai_connector/flow/flow.py +++ b/src/solace_ai_connector/flow/flow.py @@ -90,10 +90,7 @@ def create_component_group(self, component, index): base_path = component.get("component_base_path", None) component_package = component.get("component_package", None) num_instances = component.get("num_instances", 1) - # component_config = component.get("component_config", {}) - # component_name = component.get("component_name", "") - # imported_module = import_from_directories(component_module) imported_module = import_module(component_module, base_path, component_package) try: diff --git a/tests/utils_for_test_files.py b/src/solace_ai_connector/test_utils/utils_for_test_files.py similarity index 62% rename from tests/utils_for_test_files.py rename to src/solace_ai_connector/test_utils/utils_for_test_files.py index 2bb9dfc9..fec9bad2 100644 --- a/tests/utils_for_test_files.py +++ b/src/solace_ai_connector/test_utils/utils_for_test_files.py @@ -1,8 +1,6 @@ -"""Collection of functions to be used in test files""" - +import os import queue import sys -import os import yaml sys.path.insert(0, os.path.abspath("src")) @@ -10,6 +8,7 @@ from solace_ai_connector.solace_ai_connector import SolaceAiConnector from solace_ai_connector.common.log import log from solace_ai_connector.common.event import Event, EventType +from solace_ai_connector.common.message import Message # from solace_ai_connector.common.message import Message @@ -61,12 +60,16 @@ def enqueue(self, message): self.next_component_queue.put(event) -def create_connector(config_yaml, event_handlers=None, error_queue=None): - """Create a connector from a config""" +def create_connector(config_or_yaml, event_handlers=None, error_queue=None): + """Create a connector from a config that can be an object or a yaml string""" + + config = config_or_yaml + if isinstance(config_or_yaml, str): + config = yaml.safe_load(config_or_yaml) # Create the connector connector = SolaceAiConnector( - yaml.safe_load(config_yaml), + config, event_handlers=event_handlers, error_queue=error_queue, ) @@ -76,9 +79,77 @@ def create_connector(config_yaml, event_handlers=None, error_queue=None): return connector -def create_test_flows(config_yaml, queue_timeout=None, error_queue=None, queue_size=0): +def run_component_test( + module_or_name, + validation_func, + component_config=None, + input_data=None, + input_messages=None, + input_selection=None, + input_transforms=None, +): + if not input_data and not input_messages: + raise ValueError("Either input_data or input_messages must be provided") + + if input_data and input_messages: + raise ValueError("Only one of input_data or input_messages can be provided") + + if input_data and not isinstance(input_data, list): + input_data = [input_data] + + if input_messages and not isinstance(input_messages, list): + input_messages = [input_messages] + + if not input_messages: + input_messages = [] + + if input_selection: + if isinstance(input_selection, str): + input_selection = {"source_expression": input_selection} + + connector = None + try: + connector, flows = create_test_flows( + { + "flows": [ + { + "name": "test_flow", + "components": [ + { + "component_name": "test_component", + "component_module": module_or_name, + "component_config": component_config or {}, + "input_selection": input_selection, + "input_transforms": input_transforms, + } + ], + } + ] + } + ) + + if input_data: + for data in input_data: + message = Message(payload=data) + message.set_previous(data) + input_messages.append(message) + + # Send each message through, one at a time + for message in input_messages: + send_message_to_flow(flows[0], message) + output_message = get_message_from_flow(flows[0]) + validation_func(output_message.get_previous(), output_message, message) + + finally: + if connector: + dispose_connector(connector) + + +def create_test_flows( + config_or_yaml, queue_timeout=None, error_queue=None, queue_size=0 +): # Create the connector - connector = create_connector(config_yaml, error_queue=error_queue) + connector = create_connector(config_or_yaml, error_queue=error_queue) flows = connector.get_flows() diff --git a/tests/test_acks.py b/tests/test_acks.py index c067fb56..bf0b1eae 100644 --- a/tests/test_acks.py +++ b/tests/test_acks.py @@ -1,11 +1,11 @@ """This file tests acks in a flow""" import sys -import queue sys.path.append("src") +import queue -from utils_for_test_files import ( # pylint: disable=wrong-import-position +from solace_ai_connector.test_utils.utils_for_test_files import ( # pylint: disable=wrong-import-position # create_connector, # create_and_run_component, dispose_connector, diff --git a/tests/test_aggregate.py b/tests/test_aggregate.py index a288410e..8826f6ab 100644 --- a/tests/test_aggregate.py +++ b/tests/test_aggregate.py @@ -1,8 +1,11 @@ """Some tests to verify the aggregate component works as expected""" +import sys + +sys.path.append("src") import time -from utils_for_test_files import ( +from solace_ai_connector.test_utils.utils_for_test_files import ( create_test_flows, dispose_connector, send_message_to_flow, diff --git a/tests/test_config_file.py b/tests/test_config_file.py index 5bd34f73..593bc0e9 100644 --- a/tests/test_config_file.py +++ b/tests/test_config_file.py @@ -1,19 +1,26 @@ """Test various things related to the configuration file""" import sys -import yaml import pytest +import yaml sys.path.append("src") -from utils_for_test_files import ( # pylint: disable=wrong-import-position +from solace_ai_connector.test_utils.utils_for_test_files import ( # pylint: disable=wrong-import-position create_connector, + create_test_flows, + dispose_connector, + send_message_to_flow, + get_message_from_flow, ) from solace_ai_connector.solace_ai_connector import ( # pylint: disable=wrong-import-position SolaceAiConnector, ) +from solace_ai_connector.common.message import Message +import solace_ai_connector.components.general.pass_through + # from solace_ai_connector.common.log import log @@ -143,6 +150,46 @@ def test_no_component_module(): assert str(e) == "component_module not provided in flow 0, component 0" +def test_static_import_and_object_config(): + """Test that we can statically import a module and pass an object for the config""" + + config = { + "log": {"log_file_level": "DEBUG", "log_file": "solace_ai_connector.log"}, + "flows": [ + { + "name": "test_flow", + "components": [ + { + "component_name": "delay1", + "component_module": solace_ai_connector.components.general.pass_through, + "component_config": {"delay": 0.1}, + "input_selection": {"source_expression": "input.payload"}, + } + ], + } + ], + } + connector = None + try: + connector, flows = create_test_flows(config) + + # Test pushing a simple message through the delay component + message = Message(payload={"text": "Hello, World!"}) + send_message_to_flow(flows[0], message) + + # Get the output message + output_message = get_message_from_flow(flows[0]) + + # Check that the output is correct + assert output_message.get_data("previous") == {"text": "Hello, World!"} + + except Exception as e: + pytest.fail(f"Test failed with exception: {e}") + finally: + if "connector" in locals(): + dispose_connector(connector) + + def test_bad_module(): """Test that the program exits if the component module is not found""" try: diff --git a/tests/test_error_flows.py b/tests/test_error_flows.py index b8ff4d72..8e7edfe6 100644 --- a/tests/test_error_flows.py +++ b/tests/test_error_flows.py @@ -2,11 +2,11 @@ import sys -# import queue - sys.path.append("src") -from utils_for_test_files import ( # pylint: disable=wrong-import-position +# import queue + +from solace_ai_connector.test_utils.utils_for_test_files import ( # pylint: disable=wrong-import-position create_test_flows, # create_and_run_component, dispose_connector, diff --git a/tests/test_filter.py b/tests/test_filter.py index b43b72f8..478cc94d 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -1,8 +1,12 @@ """Some tests to verify the filter component works as expected""" +import sys + +sys.path.append("src") + # import pytest -from utils_for_test_files import ( +from solace_ai_connector.test_utils.utils_for_test_files import ( create_test_flows, # create_connector, dispose_connector, diff --git a/tests/test_flows.py b/tests/test_flows.py index 6196ae1e..4687fda9 100644 --- a/tests/test_flows.py +++ b/tests/test_flows.py @@ -1,9 +1,12 @@ """This test file tests all things to do with the flows and the components that make up the flows""" +import sys + +sys.path.append("src") import pytest import time -from utils_for_test_files import ( +from solace_ai_connector.test_utils.utils_for_test_files import ( create_test_flows, create_connector, dispose_connector, diff --git a/tests/test_invoke.py b/tests/test_invoke.py index 58d0b771..fa0de0f3 100644 --- a/tests/test_invoke.py +++ b/tests/test_invoke.py @@ -5,13 +5,14 @@ sys.path.append("src") -from utils_for_test_files import ( # pylint: disable=wrong-import-position + +from solace_ai_connector.test_utils.utils_for_test_files import ( create_and_run_component, ) -from solace_ai_connector.common.utils import ( # pylint: disable=wrong-import-position +from solace_ai_connector.common.utils import ( resolve_config_values, ) -from solace_ai_connector.common.message import ( # pylint: disable=wrong-import-position +from solace_ai_connector.common.message import ( Message, ) @@ -1083,16 +1084,13 @@ def test_invoke_with_uuid_generator(): response = resolve_config_values( { "a": { - "invoke": { - "module": "invoke_functions", - "function": "uuid" - }, + "invoke": {"module": "invoke_functions", "function": "uuid"}, }, } - ) - + ) + # Check if the output is of type string assert type(response["a"]) == str # Check if the output is a valid UUID - assert len(response["a"]) == 36 \ No newline at end of file + assert len(response["a"]) == 36 diff --git a/tests/test_iterate.py b/tests/test_iterate.py index a33baccc..cffb7630 100644 --- a/tests/test_iterate.py +++ b/tests/test_iterate.py @@ -1,8 +1,12 @@ """Some tests to verify the iterate component works as expected""" +import sys + +sys.path.append("src") + # import pytest -from utils_for_test_files import ( +from solace_ai_connector.test_utils.utils_for_test_files import ( create_test_flows, # create_connector, dispose_connector, diff --git a/tests/test_message_get_set_data.py b/tests/test_message_get_set_data.py index f2258d1f..33622eda 100644 --- a/tests/test_message_get_set_data.py +++ b/tests/test_message_get_set_data.py @@ -1,11 +1,11 @@ """This test fixture will test the get_data and set_data methods of the Message class""" +import sys +sys.path.append("src") import json import base64 -import sys import pytest -sys.path.append("src") from solace_ai_connector.common.message import Message # Create a few different messages to test with diff --git a/tests/test_timer_input.py b/tests/test_timer_input.py index 343a7d87..b8897e2f 100644 --- a/tests/test_timer_input.py +++ b/tests/test_timer_input.py @@ -1,9 +1,12 @@ """Test the timer input component""" +import sys + +sys.path.append("src") import time import pytest -from utils_for_test_files import ( +from solace_ai_connector.test_utils.utils_for_test_files import ( create_test_flows, create_connector, dispose_connector, diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 9b001505..2efe69cc 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -4,14 +4,16 @@ sys.path.append("src") -from utils_for_test_files import ( # pylint: disable=wrong-import-position +from solace_ai_connector.test_utils.utils_for_test_files import ( # pylint: disable=wrong-import-position create_connector, create_and_run_component, + run_component_test, # dispose_connector, ) from solace_ai_connector.common.message import ( # pylint: disable=wrong-import-position Message, ) +import solace_ai_connector.components.general.pass_through def test_basic_copy_transform(): @@ -44,6 +46,64 @@ def test_basic_copy_transform(): assert output_message.get_data("previous") == "Hello, World!" +def test_transform_with_run_component_test(): + """This test is actually testing the test infrastructure method: run_component_test""" + + def validation_func(output_data, output_message, _input_message): + assert output_data == "Hello, World!" + assert output_message.get_data("user_data.temp") == { + "payload": {"text": "Hello, World!", "greeting": "Static Greeting!"} + } + + run_component_test( + "pass_through", + validation_func, + input_data={"text": "Hello, World!"}, + input_transforms=[ + { + "type": "copy", + "source_expression": "input.payload", + "dest_expression": "user_data.temp:payload", + }, + { + "type": "copy", + "source_value": "Static Greeting!", + "dest_expression": "user_data.temp:payload.greeting", + }, + ], + input_selection={"source_expression": "user_data.temp:payload.text"}, + ) + + +def test_transform_with_run_component_test_with_static_import(): + """This test is actually testing the test infrastructure method: run_component_test""" + + def validation_func(output_data, output_message, _input_message): + assert output_data == "Hello, World!" + assert output_message.get_data("user_data.temp") == { + "payload": {"text": "Hello, World!", "greeting": "Static Greeting!"} + } + + run_component_test( + solace_ai_connector.components.general.pass_through, + validation_func, + input_data={"text": "Hello, World!"}, + input_transforms=[ + { + "type": "copy", + "source_expression": "input.payload", + "dest_expression": "user_data.temp:payload", + }, + { + "type": "copy", + "source_value": "Static Greeting!", + "dest_expression": "user_data.temp:payload.greeting", + }, + ], + input_selection={"source_expression": "user_data.temp:payload.text"}, + ) + + def test_basic_map_transform(): """Test the basic map transform""" # Create a simple configuration From 113b9304cd4af05172c3a0a2d7c2189374c6d78a Mon Sep 17 00:00:00 2001 From: Cyrus Mobini <68962752+cyrus2281@users.noreply.github.com> Date: Wed, 11 Sep 2024 09:53:21 -0400 Subject: [PATCH 10/26] Fix for anthropic example (#35) --- config.yaml | 1 - examples/ack_test.yaml | 1 - examples/anthropic_bedrock.yaml | 16 ++++++---------- examples/error_handler.yaml | 1 - examples/request_reply.yaml | 1 - 5 files changed, 6 insertions(+), 14 deletions(-) diff --git a/config.yaml b/config.yaml index fcb46677..360e99da 100644 --- a/config.yaml +++ b/config.yaml @@ -9,7 +9,6 @@ log: shared_config: - broker_config: &broker_connection - broker_connection_share: ${SOLACE_BROKER_URL} broker_type: solace broker_url: ${SOLACE_BROKER_URL} broker_username: ${SOLACE_BROKER_USERNAME} diff --git a/examples/ack_test.yaml b/examples/ack_test.yaml index 08314aa9..41fb2eb5 100644 --- a/examples/ack_test.yaml +++ b/examples/ack_test.yaml @@ -15,7 +15,6 @@ log: shared_config: - broker_config: &broker_connection - broker_connection_share: ${SOLACE_BROKER_URL} broker_type: solace broker_url: ${SOLACE_BROKER_URL} broker_username: ${SOLACE_BROKER_USERNAME} diff --git a/examples/anthropic_bedrock.yaml b/examples/anthropic_bedrock.yaml index 7c35bb60..03a0c6c9 100644 --- a/examples/anthropic_bedrock.yaml +++ b/examples/anthropic_bedrock.yaml @@ -3,7 +3,10 @@ # sends a message to an Anthropic Bedrock model, and # sends the response back to the Solace broker # It will ask the model to write a dry joke about the input -# message. It takes the entire payload of the input message +# message. +# Send a message to the Solace broker topics `my/topic1` or `my/topic2` +# with a plain text payload. The model will respond with a dry joke to the +# same topic prefixed with `response/`. (e.g. `response/my/topic1`) # # Dependencies: # pip install langchain_aws langchain_community @@ -28,12 +31,12 @@ log: shared_config: - broker_config: &broker_connection - broker_connection_share: ${SOLACE_BROKER_URL} broker_type: solace broker_url: ${SOLACE_BROKER_URL} broker_username: ${SOLACE_BROKER_USERNAME} broker_password: ${SOLACE_BROKER_PASSWORD} broker_vpn: ${SOLACE_BROKER_VPN} + payload_encoding: utf-8 # List of flows flows: @@ -51,7 +54,6 @@ flows: qos: 1 - topic: my/topic2 qos: 1 - payload_encoding: utf-8 payload_format: text - component_name: llm @@ -81,13 +83,7 @@ flows: - component_name: solace_sw_broker component_module: broker_output component_config: - broker_connection_share: ${SOLACE_BROKER_URL} - broker_type: solace - broker_url: ${SOLACE_BROKER_URL} - broker_username: ${SOLACE_BROKER_USERNAME} - broker_password: ${SOLACE_BROKER_PASSWORD} - broker_vpn: ${SOLACE_BROKER_VPN} - payload_encoding: utf-8 + <<: *broker_connection payload_format: text input_transforms: - type: copy diff --git a/examples/error_handler.yaml b/examples/error_handler.yaml index a8c700e6..f2277949 100644 --- a/examples/error_handler.yaml +++ b/examples/error_handler.yaml @@ -25,7 +25,6 @@ log: shared_config: - broker_config: &broker_connection - broker_connection_share: ${SOLACE_BROKER_URL} broker_type: solace broker_url: ${SOLACE_BROKER_URL} broker_username: ${SOLACE_BROKER_USERNAME} diff --git a/examples/request_reply.yaml b/examples/request_reply.yaml index 3cdae477..69e5834e 100644 --- a/examples/request_reply.yaml +++ b/examples/request_reply.yaml @@ -16,7 +16,6 @@ log: shared_config: - broker_config: &broker_connection - broker_connection_share: ${SOLACE_BROKER_URL} broker_type: solace broker_url: ${SOLACE_BROKER_URL} broker_username: ${SOLACE_BROKER_USERNAME} From 6cbe54a1d26832d00a8dd36827d9fc26d2677930 Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Fri, 13 Sep 2024 15:35:04 -0400 Subject: [PATCH 11/26] feat: add request_response_controller.py --- .../flow/request_response_controller.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 src/solace_ai_connector/flow/request_response_controller.py diff --git a/src/solace_ai_connector/flow/request_response_controller.py b/src/solace_ai_connector/flow/request_response_controller.py new file mode 100644 index 00000000..fae79723 --- /dev/null +++ b/src/solace_ai_connector/flow/request_response_controller.py @@ -0,0 +1,27 @@ +""" +This file will handle sending a message to a named flow and then +receiving the output message from that flow. It will also support the result +message being a streamed message that comes in multiple parts. + +Each component can optionally create multiple of these using the configuration: + +```yaml +- name: example_flow + components: + - component_name: example_component + component_module: custom_component + request_response_controllers: + - name: example_controller + flow_name: llm_flow + streaming: true + streaming_last_message_expression: input.payload:streaming.last_message + timeout_ms: 300000 +``` + +""" + +# +# - Create the request response flow manager class that will hold all the request response flows +# - Create the request response controller class that will hold the request response controller +# config and manage sending messages to the flow and getting the output messages. +# From 4f9f79c0539382fb0693e046d352b25bc5af3748 Mon Sep 17 00:00:00 2001 From: "Edward Funnekotter (aider)" Date: Fri, 13 Sep 2024 15:35:39 -0400 Subject: [PATCH 12/26] feat: implement RequestResponseFlowManager and RequestResponseController classes --- .../flow/request_response_controller.py | 69 +++++++++++++++++-- 1 file changed, 64 insertions(+), 5 deletions(-) diff --git a/src/solace_ai_connector/flow/request_response_controller.py b/src/solace_ai_connector/flow/request_response_controller.py index fae79723..bf3a3e4b 100644 --- a/src/solace_ai_connector/flow/request_response_controller.py +++ b/src/solace_ai_connector/flow/request_response_controller.py @@ -20,8 +20,67 @@ """ -# -# - Create the request response flow manager class that will hold all the request response flows -# - Create the request response controller class that will hold the request response controller -# config and manage sending messages to the flow and getting the output messages. -# +import threading +import queue +import time +from typing import Dict, Any + +class RequestResponseFlowManager: + def __init__(self): + self.flows: Dict[str, Any] = {} + + def add_flow(self, flow_name: str, flow): + self.flows[flow_name] = flow + + def get_flow(self, flow_name: str): + return self.flows.get(flow_name) + +class RequestResponseController: + def __init__(self, config: Dict[str, Any], flow_manager: RequestResponseFlowManager): + self.config = config + self.flow_manager = flow_manager + self.flow_name = config['flow_name'] + self.streaming = config.get('streaming', False) + self.streaming_last_message_expression = config.get('streaming_last_message_expression') + self.timeout_ms = config.get('timeout_ms', 30000) + self.response_queue = queue.Queue() + + def send_message(self, message: Any): + flow = self.flow_manager.get_flow(self.flow_name) + if not flow: + raise ValueError(f"Flow {self.flow_name} not found") + + flow.send_message(message) + + def get_response(self): + try: + if self.streaming: + return self._get_streaming_response() + else: + return self.response_queue.get(timeout=self.timeout_ms / 1000) + except queue.Empty: + raise TimeoutError(f"Timeout waiting for response from flow {self.flow_name}") + + def _get_streaming_response(self): + responses = [] + start_time = time.time() + while True: + try: + response = self.response_queue.get(timeout=(start_time + self.timeout_ms / 1000 - time.time())) + responses.append(response) + if self.streaming_last_message_expression: + if self._is_last_message(response): + break + except queue.Empty: + if responses: + break + raise TimeoutError(f"Timeout waiting for streaming response from flow {self.flow_name}") + return responses + + def _is_last_message(self, message): + # Implement logic to check if this is the last message based on the streaming_last_message_expression + # This might involve parsing the expression and checking the message content + pass + + def handle_response(self, response): + self.response_queue.put(response) From 139ed7087a02e4b423dbfd83f799ef20b79076a8 Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Fri, 13 Sep 2024 15:42:25 -0400 Subject: [PATCH 13/26] style: format code with black and improve readability --- .../flow/request_response_controller.py | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/solace_ai_connector/flow/request_response_controller.py b/src/solace_ai_connector/flow/request_response_controller.py index bf3a3e4b..36a890e9 100644 --- a/src/solace_ai_connector/flow/request_response_controller.py +++ b/src/solace_ai_connector/flow/request_response_controller.py @@ -25,6 +25,7 @@ import time from typing import Dict, Any + class RequestResponseFlowManager: def __init__(self): self.flows: Dict[str, Any] = {} @@ -35,21 +36,26 @@ def add_flow(self, flow_name: str, flow): def get_flow(self, flow_name: str): return self.flows.get(flow_name) + class RequestResponseController: - def __init__(self, config: Dict[str, Any], flow_manager: RequestResponseFlowManager): + def __init__( + self, config: Dict[str, Any], flow_manager: RequestResponseFlowManager + ): self.config = config self.flow_manager = flow_manager - self.flow_name = config['flow_name'] - self.streaming = config.get('streaming', False) - self.streaming_last_message_expression = config.get('streaming_last_message_expression') - self.timeout_ms = config.get('timeout_ms', 30000) + self.flow_name = config["flow_name"] + self.streaming = config.get("streaming", False) + self.streaming_last_message_expression = config.get( + "streaming_last_message_expression" + ) + self.timeout_ms = config.get("timeout_ms", 30000) self.response_queue = queue.Queue() def send_message(self, message: Any): flow = self.flow_manager.get_flow(self.flow_name) if not flow: raise ValueError(f"Flow {self.flow_name} not found") - + flow.send_message(message) def get_response(self): @@ -59,14 +65,18 @@ def get_response(self): else: return self.response_queue.get(timeout=self.timeout_ms / 1000) except queue.Empty: - raise TimeoutError(f"Timeout waiting for response from flow {self.flow_name}") + raise TimeoutError( + f"Timeout waiting for response from flow {self.flow_name}" + ) def _get_streaming_response(self): responses = [] start_time = time.time() while True: try: - response = self.response_queue.get(timeout=(start_time + self.timeout_ms / 1000 - time.time())) + response = self.response_queue.get( + timeout=(start_time + self.timeout_ms / 1000 - time.time()) + ) responses.append(response) if self.streaming_last_message_expression: if self._is_last_message(response): @@ -74,7 +84,9 @@ def _get_streaming_response(self): except queue.Empty: if responses: break - raise TimeoutError(f"Timeout waiting for streaming response from flow {self.flow_name}") + raise TimeoutError( + f"Timeout waiting for streaming response from flow {self.flow_name}" + ) return responses def _is_last_message(self, message): From 1e5404993e8becd6a4bbc18ad58944b15ec7483b Mon Sep 17 00:00:00 2001 From: "Edward Funnekotter (aider)" Date: Fri, 13 Sep 2024 15:42:27 -0400 Subject: [PATCH 14/26] feat: implement RequestResponseController for flow-based request-response handling --- .../components/component_base.py | 16 +++++++++++ src/solace_ai_connector/flow/flow.py | 28 +++++++++++++++++++ .../flow/request_response_controller.py | 23 +++++++++++---- .../solace_ai_connector.py | 20 ++++++++++++- 4 files changed, 80 insertions(+), 7 deletions(-) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index bd4c52c4..b229eae3 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -78,6 +78,22 @@ def run(self): self.stop_component() + def process_single_event(self, event): + try: + if self.trace_queue: + self.trace_event(event) + return self.process_event(event) + except Exception as e: + log.error( + "%sComponent has encountered an error: %s\n%s", + self.log_identifier, + e, + traceback.format_exc(), + ) + if self.error_queue: + self.handle_error(e, event) + raise + def get_next_event(self): # Check if there is a get_next_message defined by a # component that inherits from this class - this is diff --git a/src/solace_ai_connector/flow/flow.py b/src/solace_ai_connector/flow/flow.py index 782c7ce8..def61dd0 100644 --- a/src/solace_ai_connector/flow/flow.py +++ b/src/solace_ai_connector/flow/flow.py @@ -47,6 +47,7 @@ def __init__( trace_queue=None, flow_instance_index=0, connector=None, + for_request_response=False, ): self.flow_config = flow_config self.flow_index = flow_index @@ -64,8 +65,30 @@ def __init__( self.flow_lock_manager = Flow._lock_manager self.flow_kv_store = Flow._kv_store self.cache_service = connector.cache_service if connector else None + self.for_request_response = for_request_response self.create_components() + def create_components(self): + # Loop through the components and create them + for index, component in enumerate(self.flow_config.get("components", [])): + self.create_component_group(component, index) + + # Now loop through them again and set the next component + for index, component_group in enumerate(self.component_groups): + if index < len(self.component_groups) - 1: + for component in component_group: + component.set_next_component(self.component_groups[index + 1][0]) + + # For request-response flows, don't create threads + if not self.for_request_response: + # Now one more time to create threads and run them + for index, component_group in enumerate(self.component_groups): + for component in component_group: + thread = component.create_thread_and_run() + self.threads.append(thread) + + self.flow_input_queue = self.component_groups[0][0].get_input_queue() + def create_components(self): # Loop through the components and create them for index, component in enumerate(self.flow_config.get("components", [])): @@ -124,6 +147,11 @@ def create_component_group(self, component, index): ) sibling_component = component_instance + # Set up RequestResponseController if specified + request_response_controllers = component.get("request_response_controllers", []) + for controller_config in request_response_controllers: + self.connector.create_request_response_controller(component_instance, controller_config) + # Add the component to the list component_group.append(component_instance) diff --git a/src/solace_ai_connector/flow/request_response_controller.py b/src/solace_ai_connector/flow/request_response_controller.py index 36a890e9..e54d75e2 100644 --- a/src/solace_ai_connector/flow/request_response_controller.py +++ b/src/solace_ai_connector/flow/request_response_controller.py @@ -39,10 +39,10 @@ def get_flow(self, flow_name: str): class RequestResponseController: def __init__( - self, config: Dict[str, Any], flow_manager: RequestResponseFlowManager + self, config: Dict[str, Any], connector: 'SolaceAiConnector' ): self.config = config - self.flow_manager = flow_manager + self.connector = connector self.flow_name = config["flow_name"] self.streaming = config.get("streaming", False) self.streaming_last_message_expression = config.get( @@ -50,13 +50,24 @@ def __init__( ) self.timeout_ms = config.get("timeout_ms", 30000) self.response_queue = queue.Queue() + self.flow_instance = self.connector.create_flow_instance(self.flow_name) + self.input_queue = self.flow_instance.get_flow_input_queue() + self.setup_response_queue() + + def setup_response_queue(self): + last_component = self.flow_instance.component_groups[-1][-1] + last_component.set_next_component(self) def send_message(self, message: Any): - flow = self.flow_manager.get_flow(self.flow_name) - if not flow: - raise ValueError(f"Flow {self.flow_name} not found") + if not self.input_queue: + raise ValueError(f"Input queue for flow {self.flow_name} not found") + + event = Event(EventType.MESSAGE, message) + self.input_queue.put(event) - flow.send_message(message) + def enqueue(self, event): + if event.event_type == EventType.MESSAGE: + self.response_queue.put(event.data) def get_response(self): try: diff --git a/src/solace_ai_connector/solace_ai_connector.py b/src/solace_ai_connector/solace_ai_connector.py index f41f50f4..882e6665 100644 --- a/src/solace_ai_connector/solace_ai_connector.py +++ b/src/solace_ai_connector/solace_ai_connector.py @@ -32,6 +32,15 @@ def __init__(self, config, event_handlers=None, error_queue=None): self.instance_name = self.config.get("instance_name", "solace_ai_connector") self.timer_manager = TimerManager(self.stop_signal) self.cache_service = self.setup_cache_service() + self.request_response_controllers = {} + + def create_request_response_controller(self, component, controller_config): + controller = RequestResponseController(controller_config, self) + self.request_response_controllers[component] = controller + return controller + + def get_request_response_controller(self, component): + return self.request_response_controllers.get(component) def run(self): """Run the Solace AI Event Connector""" @@ -63,7 +72,7 @@ def create_flows(self): self.flow_input_queues[flow.get("name")] = flow_input_queue self.flows.append(flow_instance) - def create_flow(self, flow: dict, index: int, flow_instance_index: int): + def create_flow(self, flow: dict, index: int, flow_instance_index: int, for_request_response=False): """Create a single flow""" return Flow( @@ -75,8 +84,17 @@ def create_flow(self, flow: dict, index: int, flow_instance_index: int): instance_name=self.instance_name, trace_queue=self.trace_queue, connector=self, + for_request_response=for_request_response ) + def create_flow_instance(self, flow_name: str): + """Create a new instance of a flow for request-response""" + for flow in self.config.get("flows", []): + if flow.get("name") == flow_name: + new_flow = self.create_flow(flow, -1, -1, for_request_response=True) + return new_flow + raise ValueError(f"Flow '{flow_name}' not found") + def send_message_to_flow(self, flow_name, message): """Send a message to a flow""" flow_input_queue = self.flow_input_queues.get(flow_name) From e16b0caa1e561553c165d2c111ef6cc7934afdc3 Mon Sep 17 00:00:00 2001 From: "Edward Funnekotter (aider)" Date: Fri, 13 Sep 2024 15:44:06 -0400 Subject: [PATCH 15/26] feat: implement RequestResponseController for handling request-response patterns --- src/solace_ai_connector/flow/request_response_controller.py | 3 +++ src/solace_ai_connector/solace_ai_connector.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/src/solace_ai_connector/flow/request_response_controller.py b/src/solace_ai_connector/flow/request_response_controller.py index e54d75e2..2cdbf2a9 100644 --- a/src/solace_ai_connector/flow/request_response_controller.py +++ b/src/solace_ai_connector/flow/request_response_controller.py @@ -37,6 +37,9 @@ def get_flow(self, flow_name: str): return self.flows.get(flow_name) +from typing import Dict, Any +from ..common.event import Event, EventType + class RequestResponseController: def __init__( self, config: Dict[str, Any], connector: 'SolaceAiConnector' diff --git a/src/solace_ai_connector/solace_ai_connector.py b/src/solace_ai_connector/solace_ai_connector.py index 882e6665..2c8eb79d 100644 --- a/src/solace_ai_connector/solace_ai_connector.py +++ b/src/solace_ai_connector/solace_ai_connector.py @@ -13,6 +13,8 @@ from .services.cache_service import CacheService, create_storage_backend +from .flow.request_response_controller import RequestResponseController + class SolaceAiConnector: """Solace AI Connector""" From 776622ffb2c8497faf26982edd9891412de47ca8 Mon Sep 17 00:00:00 2001 From: "Edward Funnekotter (aider)" Date: Fri, 13 Sep 2024 15:49:40 -0400 Subject: [PATCH 16/26] fix: import SolaceAiConnector for type checking --- src/solace_ai_connector/flow/request_response_controller.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/solace_ai_connector/flow/request_response_controller.py b/src/solace_ai_connector/flow/request_response_controller.py index 2cdbf2a9..31971fa9 100644 --- a/src/solace_ai_connector/flow/request_response_controller.py +++ b/src/solace_ai_connector/flow/request_response_controller.py @@ -37,9 +37,12 @@ def get_flow(self, flow_name: str): return self.flows.get(flow_name) -from typing import Dict, Any +from typing import Dict, Any, TYPE_CHECKING from ..common.event import Event, EventType +if TYPE_CHECKING: + from ..solace_ai_connector import SolaceAiConnector + class RequestResponseController: def __init__( self, config: Dict[str, Any], connector: 'SolaceAiConnector' From e90b1366851f3d34ce3aec37c73a56e7f9973346 Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Fri, 13 Sep 2024 16:04:11 -0400 Subject: [PATCH 17/26] refactor: restructure Flow class and improve code organization --- src/solace_ai_connector/flow/flow.py | 27 +++++-------------- .../solace_ai_connector.py | 13 ++++++--- 2 files changed, 15 insertions(+), 25 deletions(-) diff --git a/src/solace_ai_connector/flow/flow.py b/src/solace_ai_connector/flow/flow.py index def61dd0..283f460a 100644 --- a/src/solace_ai_connector/flow/flow.py +++ b/src/solace_ai_connector/flow/flow.py @@ -89,25 +89,6 @@ def create_components(self): self.flow_input_queue = self.component_groups[0][0].get_input_queue() - def create_components(self): - # Loop through the components and create them - for index, component in enumerate(self.flow_config.get("components", [])): - self.create_component_group(component, index) - - # Now loop through them again and set the next component - for index, component_group in enumerate(self.component_groups): - if index < len(self.component_groups) - 1: - for component in component_group: - component.set_next_component(self.component_groups[index + 1][0]) - - # Now one more time to create threads and run them - for index, component_group in enumerate(self.component_groups): - for component in component_group: - thread = component.create_thread_and_run() - self.threads.append(thread) - - self.flow_input_queue = self.component_groups[0][0].get_input_queue() - def create_component_group(self, component, index): component_module = component.get("component_module", "") base_path = component.get("component_base_path", None) @@ -148,9 +129,13 @@ def create_component_group(self, component, index): sibling_component = component_instance # Set up RequestResponseController if specified - request_response_controllers = component.get("request_response_controllers", []) + request_response_controllers = component.get( + "request_response_controllers", [] + ) for controller_config in request_response_controllers: - self.connector.create_request_response_controller(component_instance, controller_config) + self.connector.create_request_response_controller( + component_instance, controller_config + ) # Add the component to the list component_group.append(component_instance) diff --git a/src/solace_ai_connector/solace_ai_connector.py b/src/solace_ai_connector/solace_ai_connector.py index 2c8eb79d..aa101b97 100644 --- a/src/solace_ai_connector/solace_ai_connector.py +++ b/src/solace_ai_connector/solace_ai_connector.py @@ -11,10 +11,9 @@ from .flow.timer_manager import TimerManager from .common.event import Event, EventType from .services.cache_service import CacheService, create_storage_backend - - from .flow.request_response_controller import RequestResponseController + class SolaceAiConnector: """Solace AI Connector""" @@ -74,7 +73,13 @@ def create_flows(self): self.flow_input_queues[flow.get("name")] = flow_input_queue self.flows.append(flow_instance) - def create_flow(self, flow: dict, index: int, flow_instance_index: int, for_request_response=False): + def create_flow( + self, + flow: dict, + index: int, + flow_instance_index: int, + for_request_response=False, + ): """Create a single flow""" return Flow( @@ -86,7 +91,7 @@ def create_flow(self, flow: dict, index: int, flow_instance_index: int, for_requ instance_name=self.instance_name, trace_queue=self.trace_queue, connector=self, - for_request_response=for_request_response + for_request_response=for_request_response, ) def create_flow_instance(self, flow_name: str): From e9ab2f5dbcce6b875cc19f7a860af2094dfd68d5 Mon Sep 17 00:00:00 2001 From: "Edward Funnekotter (aider)" Date: Fri, 13 Sep 2024 16:04:13 -0400 Subject: [PATCH 18/26] feat: implement multiple named RequestResponseControllers per component --- src/solace_ai_connector/components/component_base.py | 4 ++++ src/solace_ai_connector/flow/flow.py | 10 ++++------ src/solace_ai_connector/solace_ai_connector.py | 7 ++----- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index b229eae3..e0bfdc38 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -38,6 +38,8 @@ def __init__(self, module_info, **kwargs): resolve_config_values(self.component_config) + self.request_response_controllers = {} + self.next_component = None self.thread = None self.queue_timeout_ms = DEFAULT_QUEUE_TIMEOUT_MS @@ -381,3 +383,5 @@ def cleanup(self): self.input_queue.get_nowait() except queue.Empty: break + def get_request_response_controller(self, name): + return self.request_response_controllers.get(name) diff --git a/src/solace_ai_connector/flow/flow.py b/src/solace_ai_connector/flow/flow.py index 283f460a..171543a9 100644 --- a/src/solace_ai_connector/flow/flow.py +++ b/src/solace_ai_connector/flow/flow.py @@ -128,13 +128,11 @@ def create_component_group(self, component, index): ) sibling_component = component_instance - # Set up RequestResponseController if specified - request_response_controllers = component.get( - "request_response_controllers", [] - ) - for controller_config in request_response_controllers: + # Set up RequestResponseControllers if specified + request_response_controllers = component.get("request_response_controllers", {}) + for controller_name, controller_config in request_response_controllers.items(): self.connector.create_request_response_controller( - component_instance, controller_config + component_instance, controller_name, controller_config ) # Add the component to the list diff --git a/src/solace_ai_connector/solace_ai_connector.py b/src/solace_ai_connector/solace_ai_connector.py index aa101b97..93c13de7 100644 --- a/src/solace_ai_connector/solace_ai_connector.py +++ b/src/solace_ai_connector/solace_ai_connector.py @@ -35,14 +35,11 @@ def __init__(self, config, event_handlers=None, error_queue=None): self.cache_service = self.setup_cache_service() self.request_response_controllers = {} - def create_request_response_controller(self, component, controller_config): + def create_request_response_controller(self, component, controller_name, controller_config): controller = RequestResponseController(controller_config, self) - self.request_response_controllers[component] = controller + component.request_response_controllers[controller_name] = controller return controller - def get_request_response_controller(self, component): - return self.request_response_controllers.get(component) - def run(self): """Run the Solace AI Event Connector""" log.debug("Starting Solace AI Event Connector") From 11b345ba533cfdd5f158861c6ecae25181f46ac2 Mon Sep 17 00:00:00 2001 From: "Edward Funnekotter (aider)" Date: Fri, 13 Sep 2024 16:07:42 -0400 Subject: [PATCH 19/26] refactor: initialize request-response controllers in ComponentBase --- src/solace_ai_connector/components/component_base.py | 10 ++++++++++ src/solace_ai_connector/flow/flow.py | 7 ------- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index e0bfdc38..ec0d1bf3 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -39,6 +39,7 @@ def __init__(self, module_info, **kwargs): resolve_config_values(self.component_config) self.request_response_controllers = {} + self.initialize_request_response_controllers() self.next_component = None self.thread = None @@ -383,5 +384,14 @@ def cleanup(self): self.input_queue.get_nowait() except queue.Empty: break + + def initialize_request_response_controllers(self): + if self.connector: + request_response_controllers = self.config.get("request_response_controllers", {}) + for controller_name, controller_config in request_response_controllers.items(): + self.connector.create_request_response_controller( + self, controller_name, controller_config + ) + def get_request_response_controller(self, name): return self.request_response_controllers.get(name) diff --git a/src/solace_ai_connector/flow/flow.py b/src/solace_ai_connector/flow/flow.py index 171543a9..b591128d 100644 --- a/src/solace_ai_connector/flow/flow.py +++ b/src/solace_ai_connector/flow/flow.py @@ -128,13 +128,6 @@ def create_component_group(self, component, index): ) sibling_component = component_instance - # Set up RequestResponseControllers if specified - request_response_controllers = component.get("request_response_controllers", {}) - for controller_name, controller_config in request_response_controllers.items(): - self.connector.create_request_response_controller( - component_instance, controller_name, controller_config - ) - # Add the component to the list component_group.append(component_instance) From 9f5817acf68866b87c1b48bec88339973e1e1cf6 Mon Sep 17 00:00:00 2001 From: "Edward Funnekotter (aider)" Date: Sun, 15 Sep 2024 16:04:33 -0400 Subject: [PATCH 20/26] test: add request_response_controller functionality tests --- tests/test_request_response_controller.py | 180 ++++++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 tests/test_request_response_controller.py diff --git a/tests/test_request_response_controller.py b/tests/test_request_response_controller.py new file mode 100644 index 00000000..31ec39b1 --- /dev/null +++ b/tests/test_request_response_controller.py @@ -0,0 +1,180 @@ +import sys +import pytest +from unittest.mock import MagicMock + +sys.path.append("src") + +from solace_ai_connector.test_utils.utils_for_test_files import ( + create_test_flows, + dispose_connector, + send_message_to_flow, + get_message_from_flow, +) +from solace_ai_connector.common.message import Message +from solace_ai_connector.flow.request_response_controller import RequestResponseController + + +def test_request_response_controller_basic(): + """Test basic functionality of the RequestResponseController""" + config_yaml = """ +log: + log_file_level: DEBUG + log_file: solace_ai_connector.log +flows: + - name: request_flow + components: + - component_name: requester + component_module: pass_through + request_response_controllers: + test_controller: + flow_name: response_flow + timeout_ms: 5000 + - name: response_flow + components: + - component_name: responder + component_module: pass_through +""" + connector, flows = create_test_flows(config_yaml) + request_flow, response_flow = flows + + try: + # Mock the send_message_to_flow method of the connector + connector.send_message_to_flow = MagicMock() + + # Get the RequestResponseController from the requester component + requester_component = request_flow['flow'].component_groups[0][0] + controller = requester_component.get_request_response_controller("test_controller") + + assert controller is not None, "RequestResponseController not found" + + # Test sending a message + request_data = { + "payload": {"test": "data"}, + "topic": "test/topic", + "user_properties": {} + } + response = controller.send_message(request_data) + + # Check that send_message_to_flow was called with the correct arguments + connector.send_message_to_flow.assert_called_once() + call_args = connector.send_message_to_flow.call_args + assert call_args[0][0] == "response_flow" + sent_message = call_args[0][1] + assert sent_message.get_payload() == {"test": "data"} + assert sent_message.get_topic() == "test/topic" + + # Simulate a response + response_message = Message(payload={"response": "data"}) + send_message_to_flow(response_flow, response_message) + + # Check the response + assert response == {"response": "data"} + + finally: + dispose_connector(connector) + + +def test_request_response_controller_timeout(): + """Test timeout functionality of the RequestResponseController""" + config_yaml = """ +log: + log_file_level: DEBUG + log_file: solace_ai_connector.log +flows: + - name: request_flow + components: + - component_name: requester + component_module: pass_through + request_response_controllers: + test_controller: + flow_name: response_flow + timeout_ms: 100 # Very short timeout for testing + - name: response_flow + components: + - component_name: responder + component_module: pass_through +""" + connector, flows = create_test_flows(config_yaml) + request_flow = flows[0] + + try: + # Get the RequestResponseController from the requester component + requester_component = request_flow['flow'].component_groups[0][0] + controller = requester_component.get_request_response_controller("test_controller") + + assert controller is not None, "RequestResponseController not found" + + # Test sending a message + request_data = { + "payload": {"test": "data"}, + "topic": "test/topic", + "user_properties": {} + } + + with pytest.raises(TimeoutError): + controller.send_message(request_data) + + finally: + dispose_connector(connector) + + +def test_multiple_request_response_controllers(): + """Test multiple RequestResponseControllers in a single component""" + config_yaml = """ +log: + log_file_level: DEBUG + log_file: solace_ai_connector.log +flows: + - name: request_flow + components: + - component_name: requester + component_module: pass_through + request_response_controllers: + controller1: + flow_name: response_flow1 + timeout_ms: 5000 + controller2: + flow_name: response_flow2 + timeout_ms: 5000 + - name: response_flow1 + components: + - component_name: responder1 + component_module: pass_through + - name: response_flow2 + components: + - component_name: responder2 + component_module: pass_through +""" + connector, flows = create_test_flows(config_yaml) + request_flow, response_flow1, response_flow2 = flows + + try: + # Mock the send_message_to_flow method of the connector + connector.send_message_to_flow = MagicMock() + + # Get the RequestResponseControllers from the requester component + requester_component = request_flow['flow'].component_groups[0][0] + controller1 = requester_component.get_request_response_controller("controller1") + controller2 = requester_component.get_request_response_controller("controller2") + + assert controller1 is not None, "RequestResponseController 1 not found" + assert controller2 is not None, "RequestResponseController 2 not found" + + # Test sending messages to both controllers + request_data = { + "payload": {"test": "data"}, + "topic": "test/topic", + "user_properties": {} + } + + controller1.send_message(request_data) + controller2.send_message(request_data) + + # Check that send_message_to_flow was called twice with different flow names + assert connector.send_message_to_flow.call_count == 2 + call_args_list = connector.send_message_to_flow.call_args_list + assert call_args_list[0][0][0] == "response_flow1" + assert call_args_list[1][0][0] == "response_flow2" + + finally: + dispose_connector(connector) From 03e8292c859e914d5ca45c37d6cf21ad77fa704f Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Mon, 16 Sep 2024 08:45:55 -0400 Subject: [PATCH 21/26] feat: finished implementation and added some tests --- .../components/component_base.py | 59 ++-- .../components/general/delay.py | 3 +- .../general/for_testing/handler_callback.py | 67 ++++ src/solace_ai_connector/flow/flow.py | 26 +- .../flow/request_response_controller.py | 151 ++++---- .../solace_ai_connector.py | 33 +- .../test_utils/utils_for_test_files.py | 2 + tests/test_request_response_controller.py | 331 ++++++++++-------- 8 files changed, 405 insertions(+), 267 deletions(-) create mode 100644 src/solace_ai_connector/components/general/for_testing/handler_callback.py diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index ec0d1bf3..86dd1a30 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -10,6 +10,7 @@ from ..common.message import Message from ..common.trace_message import TraceMessage from ..common.event import Event, EventType +from ..flow.request_response_controller import RequestResponseController DEFAULT_QUEUE_TIMEOUT_MS = 200 DEFAULT_QUEUE_MAX_DEPTH = 5 @@ -39,7 +40,6 @@ def __init__(self, module_info, **kwargs): resolve_config_values(self.component_config) self.request_response_controllers = {} - self.initialize_request_response_controllers() self.next_component = None self.thread = None @@ -61,6 +61,10 @@ def create_thread_and_run(self): return self.thread def run(self): + # Init the request response controllers here so that we know + # the connector is fully initialized and all flows are created + self.initialize_request_response_controllers() + while not self.stop_signal.is_set(): event = None try: @@ -81,22 +85,6 @@ def run(self): self.stop_component() - def process_single_event(self, event): - try: - if self.trace_queue: - self.trace_event(event) - return self.process_event(event) - except Exception as e: - log.error( - "%sComponent has encountered an error: %s\n%s", - self.log_identifier, - e, - traceback.format_exc(), - ) - if self.error_queue: - self.handle_error(e, event) - raise - def get_next_event(self): # Check if there is a get_next_message defined by a # component that inherits from this class - this is @@ -233,7 +221,11 @@ def get_config(self, key=None, default=None): val = self.component_config.get(key, None) if val is None: val = self.config.get(key, default) - if callable(val): + if callable(val) and key not in [ + "invoke_handler", + "get_next_event_handler", + "send_message_handler", + ]: if self.current_message is None: raise ValueError( f"Component {self.log_identifier} is trying to use an `invoke` config " @@ -386,12 +378,33 @@ def cleanup(self): break def initialize_request_response_controllers(self): - if self.connector: - request_response_controllers = self.config.get("request_response_controllers", {}) - for controller_name, controller_config in request_response_controllers.items(): - self.connector.create_request_response_controller( - self, controller_name, controller_config + request_response_controllers_config = self.config.get( + "request_response_controllers", [] + ) + if request_response_controllers_config: + for rrc_config in request_response_controllers_config: + name = rrc_config.get("name") + if not name: + raise ValueError( + f"Request Response Controller in component {self.name} does not have a name" + ) + + rrc = RequestResponseController( + config=rrc_config, connector=self.connector ) + if not rrc: + raise ValueError( + f"Request Response Controller failed to initialize in component {self.name}" + ) + + self.request_response_controllers[name] = rrc + def get_request_response_controller(self, name): return self.request_response_controllers.get(name) + + def send_request_response_message(self, rrc_name, message, data): + rrc = self.get_request_response_controller(rrc_name) + if rrc: + return rrc.send_message(message, data) + return None diff --git a/src/solace_ai_connector/components/general/delay.py b/src/solace_ai_connector/components/general/delay.py index d4a05d03..8d8aaf02 100644 --- a/src/solace_ai_connector/components/general/delay.py +++ b/src/solace_ai_connector/components/general/delay.py @@ -38,5 +38,6 @@ def __init__(self, **kwargs): super().__init__(info, **kwargs) def invoke(self, message, data): - sleep(self.get_config("delay")) + delay = self.get_config("delay") + sleep(delay) return deepcopy(data) diff --git a/src/solace_ai_connector/components/general/for_testing/handler_callback.py b/src/solace_ai_connector/components/general/for_testing/handler_callback.py new file mode 100644 index 00000000..12d0ea72 --- /dev/null +++ b/src/solace_ai_connector/components/general/for_testing/handler_callback.py @@ -0,0 +1,67 @@ +"""This test component allows a tester to configure callback handlers for + get_next_event, send_message and invoke methods""" + +from ...component_base import ComponentBase + + +info = { + "class_name": "HandlerCallback", + "description": ( + "This test component allows a tester to configure callback handlers for " + "get_next_event, send_message and invoke methods" + ), + "config_parameters": [ + { + "name": "get_next_event_handler", + "required": False, + "description": "The callback handler for the get_next_event method", + "type": "function", + }, + { + "name": "send_message_handler", + "required": False, + "description": "The callback handler for the send_message method", + "type": "function", + }, + { + "name": "invoke_handler", + "required": False, + "description": "The callback handler for the invoke method", + "type": "function", + }, + ], + "input_schema": { + "type": "object", + "properties": {}, + }, + "output_schema": { + "type": "object", + "properties": {}, + }, +} + + +class HandlerCallback(ComponentBase): + def __init__(self, **kwargs): + super().__init__(info, **kwargs) + self.get_next_event_handler = self.get_config("get_next_event_handler") + self.send_message_handler = self.get_config("send_message_handler") + self.invoke_handler = self.get_config("invoke_handler") + + def get_next_event(self): + if self.get_next_event_handler: + return self.get_next_event_handler(self) + else: + return super().get_next_event() + + def send_message(self, message): + if self.send_message_handler: + return self.send_message_handler(self, message) + else: + return super().send_message(message) + + def invoke(self, message, data): + if self.invoke_handler: + return self.invoke_handler(self, message, data) + else: + return super().invoke(message, data) diff --git a/src/solace_ai_connector/flow/flow.py b/src/solace_ai_connector/flow/flow.py index b591128d..ea5091c0 100644 --- a/src/solace_ai_connector/flow/flow.py +++ b/src/solace_ai_connector/flow/flow.py @@ -47,7 +47,6 @@ def __init__( trace_queue=None, flow_instance_index=0, connector=None, - for_request_response=False, ): self.flow_config = flow_config self.flow_index = flow_index @@ -65,9 +64,11 @@ def __init__( self.flow_lock_manager = Flow._lock_manager self.flow_kv_store = Flow._kv_store self.cache_service = connector.cache_service if connector else None - self.for_request_response = for_request_response self.create_components() + def get_input_queue(self): + return self.flow_input_queue + def create_components(self): # Loop through the components and create them for index, component in enumerate(self.flow_config.get("components", [])): @@ -79,16 +80,15 @@ def create_components(self): for component in component_group: component.set_next_component(self.component_groups[index + 1][0]) - # For request-response flows, don't create threads - if not self.for_request_response: - # Now one more time to create threads and run them - for index, component_group in enumerate(self.component_groups): - for component in component_group: - thread = component.create_thread_and_run() - self.threads.append(thread) - self.flow_input_queue = self.component_groups[0][0].get_input_queue() + def run(self): + # Now one more time to create threads and run them + for _index, component_group in enumerate(self.component_groups): + for component in component_group: + thread = component.create_thread_and_run() + self.threads.append(thread) + def create_component_group(self, component, index): component_module = component.get("component_module", "") base_path = component.get("component_base_path", None) @@ -137,6 +137,12 @@ def create_component_group(self, component, index): def get_flow_input_queue(self): return self.flow_input_queue + # This will set the next component in all the components in the + # last component group + def set_next_component(self, component): + for comp in self.component_groups[-1]: + comp.set_next_component(component) + def wait_for_threads(self): for thread in self.threads: thread.join() diff --git a/src/solace_ai_connector/flow/request_response_controller.py b/src/solace_ai_connector/flow/request_response_controller.py index 31971fa9..67978ac9 100644 --- a/src/solace_ai_connector/flow/request_response_controller.py +++ b/src/solace_ai_connector/flow/request_response_controller.py @@ -20,96 +20,111 @@ """ -import threading import queue import time from typing import Dict, Any +from ..common.message import Message +from ..common.event import Event, EventType -class RequestResponseFlowManager: - def __init__(self): - self.flows: Dict[str, Any] = {} - - def add_flow(self, flow_name: str, flow): - self.flows[flow_name] = flow - - def get_flow(self, flow_name: str): - return self.flows.get(flow_name) +# This is a very basic component which will be stitched onto the final component in the flow +class RequestResponseControllerOuputComponent: + def __init__(self, controller): + self.controller = controller -from typing import Dict, Any, TYPE_CHECKING -from ..common.event import Event, EventType + def enqueue(self, event): + self.controller.enqueue_response(event) -if TYPE_CHECKING: - from ..solace_ai_connector import SolaceAiConnector +# This is the main class that will be used to send messages to a flow and receive the response class RequestResponseController: - def __init__( - self, config: Dict[str, Any], connector: 'SolaceAiConnector' - ): + def __init__(self, config: Dict[str, Any], connector: "SolaceAiConnector"): self.config = config self.connector = connector - self.flow_name = config["flow_name"] + self.flow_name = config.get("flow_name") self.streaming = config.get("streaming", False) self.streaming_last_message_expression = config.get( "streaming_last_message_expression" ) - self.timeout_ms = config.get("timeout_ms", 30000) - self.response_queue = queue.Queue() - self.flow_instance = self.connector.create_flow_instance(self.flow_name) - self.input_queue = self.flow_instance.get_flow_input_queue() - self.setup_response_queue() + self.timeout_s = config.get("timeout_ms", 30000) / 1000 + self.input_queue = None + self.response_queue = None + self.enqueue_time = None + self.request_outstanding = False + + flow = connector.get_flow(self.flow_name) + + if not flow: + raise ValueError(f"Flow {self.flow_name} not found") + + self.setup_queues(flow) - def setup_response_queue(self): - last_component = self.flow_instance.component_groups[-1][-1] - last_component.set_next_component(self) + def setup_queues(self, flow): + # Input queue to send the message to the flow + self.input_queue = flow.get_input_queue() + + # Response queue to receive the response from the flow + self.response_queue = queue.Queue() + rrcComponent = RequestResponseControllerOuputComponent(self) + flow.set_next_component(rrcComponent) + + def send_message(self, message: Message, data: Any): + # Make a new message, but copy the data from the original message + payload = message.get_payload() + topic = message.get_topic() + user_properties = message.get_user_properties() + new_message = Message( + payload=payload, topic=topic, user_properties=user_properties + ) + new_message.set_previous(data) - def send_message(self, message: Any): if not self.input_queue: raise ValueError(f"Input queue for flow {self.flow_name} not found") - event = Event(EventType.MESSAGE, message) + event = Event(EventType.MESSAGE, new_message) + self.enqueue_time = time.time() + self.request_outstanding = True self.input_queue.put(event) + return self.response_iterator - def enqueue(self, event): - if event.event_type == EventType.MESSAGE: - self.response_queue.put(event.data) - - def get_response(self): - try: + def response_iterator(self): + while True: + now = time.time() + elapsed_time = now - self.enqueue_time + remaining_timeout = self.timeout_s - elapsed_time if self.streaming: - return self._get_streaming_response() + # If we are in streaming mode, we will return individual messages + # until we receive the last message. Use the expression to determine + # if this is the last message + while True: + try: + event = self.response_queue.get(timeout=remaining_timeout) + if event.event_type == EventType.MESSAGE: + message = event.data + yield message, message.get_previous() + if self.streaming_last_message_expression: + last_message = message.get_data( + self.streaming_last_message_expression + ) + if last_message: + return + except queue.Empty: + if (time.time() - self.enqueue_time) > self.timeout_s: + raise TimeoutError("Timeout waiting for response") + else: - return self.response_queue.get(timeout=self.timeout_ms / 1000) - except queue.Empty: - raise TimeoutError( - f"Timeout waiting for response from flow {self.flow_name}" - ) - - def _get_streaming_response(self): - responses = [] - start_time = time.time() - while True: - try: - response = self.response_queue.get( - timeout=(start_time + self.timeout_ms / 1000 - time.time()) - ) - responses.append(response) - if self.streaming_last_message_expression: - if self._is_last_message(response): - break - except queue.Empty: - if responses: - break - raise TimeoutError( - f"Timeout waiting for streaming response from flow {self.flow_name}" - ) - return responses - - def _is_last_message(self, message): - # Implement logic to check if this is the last message based on the streaming_last_message_expression - # This might involve parsing the expression and checking the message content - pass - - def handle_response(self, response): - self.response_queue.put(response) + # If we are not in streaming mode, we will return a single message + # and then stop the iterator + try: + event = self.response_queue.get(timeout=remaining_timeout) + if event.event_type == EventType.MESSAGE: + message = event.data + yield message, message.get_previous() + return + except queue.Empty: + if (time.time() - self.enqueue_time) > self.timeout_s: + raise TimeoutError("Timeout waiting for response") + + def enqueue_response(self, event): + self.response_queue.put(event) diff --git a/src/solace_ai_connector/solace_ai_connector.py b/src/solace_ai_connector/solace_ai_connector.py index 93c13de7..7c4a6881 100644 --- a/src/solace_ai_connector/solace_ai_connector.py +++ b/src/solace_ai_connector/solace_ai_connector.py @@ -11,7 +11,6 @@ from .flow.timer_manager import TimerManager from .common.event import Event, EventType from .services.cache_service import CacheService, create_storage_backend -from .flow.request_response_controller import RequestResponseController class SolaceAiConnector: @@ -33,12 +32,6 @@ def __init__(self, config, event_handlers=None, error_queue=None): self.instance_name = self.config.get("instance_name", "solace_ai_connector") self.timer_manager = TimerManager(self.stop_signal) self.cache_service = self.setup_cache_service() - self.request_response_controllers = {} - - def create_request_response_controller(self, component, controller_name, controller_config): - controller = RequestResponseController(controller_config, self) - component.request_response_controllers[controller_name] = controller - return controller def run(self): """Run the Solace AI Event Connector""" @@ -69,14 +62,10 @@ def create_flows(self): flow_input_queue = flow_instance.get_flow_input_queue() self.flow_input_queues[flow.get("name")] = flow_input_queue self.flows.append(flow_instance) + for flow in self.flows: + flow.run() - def create_flow( - self, - flow: dict, - index: int, - flow_instance_index: int, - for_request_response=False, - ): + def create_flow(self, flow: dict, index: int, flow_instance_index: int): """Create a single flow""" return Flow( @@ -88,17 +77,8 @@ def create_flow( instance_name=self.instance_name, trace_queue=self.trace_queue, connector=self, - for_request_response=for_request_response, ) - def create_flow_instance(self, flow_name: str): - """Create a new instance of a flow for request-response""" - for flow in self.config.get("flows", []): - if flow.get("name") == flow_name: - new_flow = self.create_flow(flow, -1, -1, for_request_response=True) - return new_flow - raise ValueError(f"Flow '{flow_name}' not found") - def send_message_to_flow(self, flow_name, message): """Send a message to a flow""" flow_input_queue = self.flow_input_queues.get(flow_name) @@ -224,6 +204,13 @@ def get_flows(self): """Return the flows""" return self.flows + def get_flow(self, flow_name): + """Return a specific flow by name""" + for flow in self.flows: + if flow.name == flow_name: + return flow + return None + def setup_cache_service(self): """Setup the cache service""" cache_config = self.config.get("cache", {}) diff --git a/src/solace_ai_connector/test_utils/utils_for_test_files.py b/src/solace_ai_connector/test_utils/utils_for_test_files.py index fec9bad2..15a64927 100644 --- a/src/solace_ai_connector/test_utils/utils_for_test_files.py +++ b/src/solace_ai_connector/test_utils/utils_for_test_files.py @@ -156,6 +156,8 @@ def create_test_flows( # For each of the flows, add the input and output components flow_info = [] for flow in flows: + if flow.flow_config.get("test_ignore", False): + continue input_component = TestInputComponent( flow.component_groups[0][0].get_input_queue() ) diff --git a/tests/test_request_response_controller.py b/tests/test_request_response_controller.py index 31ec39b1..fa401ae1 100644 --- a/tests/test_request_response_controller.py +++ b/tests/test_request_response_controller.py @@ -1,6 +1,5 @@ import sys import pytest -from unittest.mock import MagicMock sys.path.append("src") @@ -11,170 +10,218 @@ get_message_from_flow, ) from solace_ai_connector.common.message import Message -from solace_ai_connector.flow.request_response_controller import RequestResponseController +from solace_ai_connector.flow.request_response_controller import ( + RequestResponseController, +) def test_request_response_controller_basic(): """Test basic functionality of the RequestResponseController""" - config_yaml = """ -log: - log_file_level: DEBUG - log_file: solace_ai_connector.log -flows: - - name: request_flow - components: - - component_name: requester - component_module: pass_through - request_response_controllers: - test_controller: - flow_name: response_flow - timeout_ms: 5000 - - name: response_flow - components: - - component_name: responder - component_module: pass_through -""" - connector, flows = create_test_flows(config_yaml) - request_flow, response_flow = flows + + def test_invoke_handler(component, message, data): + # Call the request_response_flow + data_iter = component.send_request_response_message( + "test_controller", message, {"test": "data"} + ) + + # Just a single message with no streaming + for message, data in data_iter(): + assert message.get_data("previous") == {"test": "data"} + assert message.get_data("input.payload") == {"text": "Hello, World!"} + + return "done" + + config = { + "flows": [ + { + "name": "test_flow", + "components": [ + { + "component_name": "requester", + "component_module": "handler_callback", + "component_config": { + "invoke_handler": test_invoke_handler, + }, + "request_response_controllers": [ + { + "name": "test_controller", + "flow_name": "request_response_flow", + "timeout_ms": 500000, + } + ], + } + ], + }, + { + "name": "request_response_flow", + "test_ignore": True, + "components": [ + {"component_name": "responder", "component_module": "pass_through"} + ], + }, + ] + } + connector, flows = create_test_flows(config) + + test_flow = flows[0] try: - # Mock the send_message_to_flow method of the connector - connector.send_message_to_flow = MagicMock() - - # Get the RequestResponseController from the requester component - requester_component = request_flow['flow'].component_groups[0][0] - controller = requester_component.get_request_response_controller("test_controller") - - assert controller is not None, "RequestResponseController not found" - - # Test sending a message - request_data = { - "payload": {"test": "data"}, - "topic": "test/topic", - "user_properties": {} - } - response = controller.send_message(request_data) - - # Check that send_message_to_flow was called with the correct arguments - connector.send_message_to_flow.assert_called_once() - call_args = connector.send_message_to_flow.call_args - assert call_args[0][0] == "response_flow" - sent_message = call_args[0][1] - assert sent_message.get_payload() == {"test": "data"} - assert sent_message.get_topic() == "test/topic" - - # Simulate a response - response_message = Message(payload={"response": "data"}) - send_message_to_flow(response_flow, response_message) - - # Check the response - assert response == {"response": "data"} + + # Send a message to the input flow + send_message_to_flow(test_flow, Message(payload={"text": "Hello, World!"})) + + # Get the output message + output_message = get_message_from_flow(test_flow) + + assert output_message.get_data("previous") == "done" finally: dispose_connector(connector) -def test_request_response_controller_timeout(): - """Test timeout functionality of the RequestResponseController""" - config_yaml = """ -log: - log_file_level: DEBUG - log_file: solace_ai_connector.log -flows: - - name: request_flow - components: - - component_name: requester - component_module: pass_through - request_response_controllers: - test_controller: - flow_name: response_flow - timeout_ms: 100 # Very short timeout for testing - - name: response_flow - components: - - component_name: responder - component_module: pass_through -""" - connector, flows = create_test_flows(config_yaml) - request_flow = flows[0] +# Test simple streaming request response +# Use the iterate component to break a single message into multiple messages +def test_request_response_controller_streaming(): + """Test streaming functionality of the RequestResponseController""" + + def test_invoke_handler(component, message, data): + # Call the request_response_flow + data_iter = component.send_request_response_message( + "test_controller", + message, + [ + {"test": "data1", "streaming": {"last_message": False}}, + {"test": "data2", "streaming": {"last_message": False}}, + {"test": "data3", "streaming": {"last_message": True}}, + ], + ) + + # Expecting 3 messages + results = [] + for message, data in data_iter(): + results.append(data.get("test")) + + assert results == ["data1", "data2", "data3"] + return "done" + + config = { + "flows": [ + { + "name": "test_flow", + "components": [ + { + "component_name": "requester", + "component_module": "handler_callback", + "component_config": { + "invoke_handler": test_invoke_handler, + }, + "request_response_controllers": [ + { + "name": "test_controller", + "flow_name": "request_response_flow", + "streaming": True, + "streaming_last_message_expression": "previous:streaming.last_message", + "timeout_ms": 500000, + } + ], + } + ], + }, + { + "name": "request_response_flow", + "test_ignore": True, + "components": [ + {"component_name": "responder", "component_module": "iterate"} + ], + }, + ] + } + connector, flows = create_test_flows(config) + + test_flow = flows[0] try: - # Get the RequestResponseController from the requester component - requester_component = request_flow['flow'].component_groups[0][0] - controller = requester_component.get_request_response_controller("test_controller") - assert controller is not None, "RequestResponseController not found" + # Send a message to the input flow + send_message_to_flow(test_flow, Message(payload={"text": "Hello, World!"})) - # Test sending a message - request_data = { - "payload": {"test": "data"}, - "topic": "test/topic", - "user_properties": {} - } + # Get the output message + output_message = get_message_from_flow(test_flow) - with pytest.raises(TimeoutError): - controller.send_message(request_data) + assert output_message.get_data("previous") == "done" finally: dispose_connector(connector) -def test_multiple_request_response_controllers(): - """Test multiple RequestResponseControllers in a single component""" - config_yaml = """ -log: - log_file_level: DEBUG - log_file: solace_ai_connector.log -flows: - - name: request_flow - components: - - component_name: requester - component_module: pass_through - request_response_controllers: - controller1: - flow_name: response_flow1 - timeout_ms: 5000 - controller2: - flow_name: response_flow2 - timeout_ms: 5000 - - name: response_flow1 - components: - - component_name: responder1 - component_module: pass_through - - name: response_flow2 - components: - - component_name: responder2 - component_module: pass_through -""" - connector, flows = create_test_flows(config_yaml) - request_flow, response_flow1, response_flow2 = flows +# Test the timeout functionality +def test_request_response_controller_timeout(): + """Test timeout functionality of the RequestResponseController""" + + def test_invoke_handler(component, message, data): + # Call the request_response_flow + data_iter = component.send_request_response_message( + "test_controller", message, {"test": "data"} + ) + + # This will timeout + try: + for message, data in data_iter(): + assert message.get_data("previous") == {"test": "data"} + assert message.get_data("input.payload") == {"text": "Hello, World!"} + except TimeoutError: + return "timeout" + return "done" + + config = { + "flows": [ + { + "name": "test_flow", + "components": [ + { + "component_name": "requester", + "component_module": "handler_callback", + "component_config": { + "invoke_handler": test_invoke_handler, + }, + "request_response_controllers": [ + { + "name": "test_controller", + "flow_name": "request_response_flow", + "timeout_ms": 1000, + } + ], + } + ], + }, + { + "name": "request_response_flow", + "test_ignore": True, + "components": [ + { + "component_name": "responder", + "component_module": "delay", + "component_config": { + "delay": 5, + }, + } + ], + }, + ] + } + connector, flows = create_test_flows(config) + + test_flow = flows[0] try: - # Mock the send_message_to_flow method of the connector - connector.send_message_to_flow = MagicMock() - - # Get the RequestResponseControllers from the requester component - requester_component = request_flow['flow'].component_groups[0][0] - controller1 = requester_component.get_request_response_controller("controller1") - controller2 = requester_component.get_request_response_controller("controller2") - - assert controller1 is not None, "RequestResponseController 1 not found" - assert controller2 is not None, "RequestResponseController 2 not found" - - # Test sending messages to both controllers - request_data = { - "payload": {"test": "data"}, - "topic": "test/topic", - "user_properties": {} - } - - controller1.send_message(request_data) - controller2.send_message(request_data) - - # Check that send_message_to_flow was called twice with different flow names - assert connector.send_message_to_flow.call_count == 2 - call_args_list = connector.send_message_to_flow.call_args_list - assert call_args_list[0][0][0] == "response_flow1" - assert call_args_list[1][0][0] == "response_flow2" + + # Send a message to the input flow + send_message_to_flow(test_flow, Message(payload={"text": "Hello, World!"})) + + # Get the output message + output_message = get_message_from_flow(test_flow) + + assert output_message.get_data("previous") == "timeout" finally: dispose_connector(connector) From 9943637bc054e6ca172a58dbdab10c8226df9bb3 Mon Sep 17 00:00:00 2001 From: "Edward Funnekotter (aider)" Date: Mon, 16 Sep 2024 08:53:01 -0400 Subject: [PATCH 22/26] refactor: rename RequestResponseController to RequestResponseFlowController --- .../components/component_base.py | 38 +++++++++---------- .../flow/request_response_controller.py | 2 +- tests/test_request_response_controller.py | 10 ++--- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 86dd1a30..c029b530 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -39,7 +39,7 @@ def __init__(self, module_info, **kwargs): resolve_config_values(self.component_config) - self.request_response_controllers = {} + self.request_response_flow_controllers = {} self.next_component = None self.thread = None @@ -377,34 +377,34 @@ def cleanup(self): except queue.Empty: break - def initialize_request_response_controllers(self): - request_response_controllers_config = self.config.get( - "request_response_controllers", [] + def initialize_request_response_flow_controllers(self): + request_response_flow_controllers_config = self.config.get( + "request_response_flow_controllers", [] ) - if request_response_controllers_config: - for rrc_config in request_response_controllers_config: - name = rrc_config.get("name") + if request_response_flow_controllers_config: + for rrfc_config in request_response_flow_controllers_config: + name = rrfc_config.get("name") if not name: raise ValueError( - f"Request Response Controller in component {self.name} does not have a name" + f"Request Response Flow Controller in component {self.name} does not have a name" ) - rrc = RequestResponseController( - config=rrc_config, connector=self.connector + rrfc = RequestResponseFlowController( + config=rrfc_config, connector=self.connector ) - if not rrc: + if not rrfc: raise ValueError( - f"Request Response Controller failed to initialize in component {self.name}" + f"Request Response Flow Controller failed to initialize in component {self.name}" ) - self.request_response_controllers[name] = rrc + self.request_response_flow_controllers[name] = rrfc - def get_request_response_controller(self, name): - return self.request_response_controllers.get(name) + def get_request_response_flow_controller(self, name): + return self.request_response_flow_controllers.get(name) - def send_request_response_message(self, rrc_name, message, data): - rrc = self.get_request_response_controller(rrc_name) - if rrc: - return rrc.send_message(message, data) + def send_request_response_flow_message(self, rrfc_name, message, data): + rrfc = self.get_request_response_flow_controller(rrfc_name) + if rrfc: + return rrfc.send_message(message, data) return None diff --git a/src/solace_ai_connector/flow/request_response_controller.py b/src/solace_ai_connector/flow/request_response_controller.py index 67978ac9..ef0aae21 100644 --- a/src/solace_ai_connector/flow/request_response_controller.py +++ b/src/solace_ai_connector/flow/request_response_controller.py @@ -38,7 +38,7 @@ def enqueue(self, event): # This is the main class that will be used to send messages to a flow and receive the response -class RequestResponseController: +class RequestResponseFlowController: def __init__(self, config: Dict[str, Any], connector: "SolaceAiConnector"): self.config = config self.connector = connector diff --git a/tests/test_request_response_controller.py b/tests/test_request_response_controller.py index fa401ae1..13436030 100644 --- a/tests/test_request_response_controller.py +++ b/tests/test_request_response_controller.py @@ -11,16 +11,16 @@ ) from solace_ai_connector.common.message import Message from solace_ai_connector.flow.request_response_controller import ( - RequestResponseController, + RequestResponseFlowController, ) -def test_request_response_controller_basic(): - """Test basic functionality of the RequestResponseController""" +def test_request_response_flow_controller_basic(): + """Test basic functionality of the RequestResponseFlowController""" def test_invoke_handler(component, message, data): # Call the request_response_flow - data_iter = component.send_request_response_message( + data_iter = component.send_request_response_flow_message( "test_controller", message, {"test": "data"} ) @@ -42,7 +42,7 @@ def test_invoke_handler(component, message, data): "component_config": { "invoke_handler": test_invoke_handler, }, - "request_response_controllers": [ + "request_response_flow_controllers": [ { "name": "test_controller", "flow_name": "request_response_flow", From 49796c873333e2c2abdf6a516758338c6a06ec19 Mon Sep 17 00:00:00 2001 From: "Edward Funnekotter (aider)" Date: Mon, 16 Sep 2024 08:53:42 -0400 Subject: [PATCH 23/26] refactor: rename RequestResponseController to RequestResponseFlowController --- src/solace_ai_connector/components/component_base.py | 2 +- src/solace_ai_connector/flow/request_response_controller.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index c029b530..34eab10b 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -10,7 +10,7 @@ from ..common.message import Message from ..common.trace_message import TraceMessage from ..common.event import Event, EventType -from ..flow.request_response_controller import RequestResponseController +from ..flow.request_response_controller import RequestResponseFlowController DEFAULT_QUEUE_TIMEOUT_MS = 200 DEFAULT_QUEUE_MAX_DEPTH = 5 diff --git a/src/solace_ai_connector/flow/request_response_controller.py b/src/solace_ai_connector/flow/request_response_controller.py index ef0aae21..99731655 100644 --- a/src/solace_ai_connector/flow/request_response_controller.py +++ b/src/solace_ai_connector/flow/request_response_controller.py @@ -22,7 +22,10 @@ import queue import time -from typing import Dict, Any +from typing import Dict, Any, TYPE_CHECKING + +if TYPE_CHECKING: + from ..solace_ai_connector import SolaceAiConnector from ..common.message import Message from ..common.event import Event, EventType From 3db7fa9e05f6065720a939b550089097abba1d9d Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Mon, 16 Sep 2024 09:07:56 -0400 Subject: [PATCH 24/26] refactor: some name changes --- .../components/component_base.py | 4 ++-- ...er.py => request_response_flow_controller.py} | 7 ++----- tests/test_request_response_controller.py | 16 ++++++++-------- 3 files changed, 12 insertions(+), 15 deletions(-) rename src/solace_ai_connector/flow/{request_response_controller.py => request_response_flow_controller.py} (96%) diff --git a/src/solace_ai_connector/components/component_base.py b/src/solace_ai_connector/components/component_base.py index 34eab10b..e20ecae9 100644 --- a/src/solace_ai_connector/components/component_base.py +++ b/src/solace_ai_connector/components/component_base.py @@ -10,7 +10,7 @@ from ..common.message import Message from ..common.trace_message import TraceMessage from ..common.event import Event, EventType -from ..flow.request_response_controller import RequestResponseFlowController +from ..flow.request_response_flow_controller import RequestResponseFlowController DEFAULT_QUEUE_TIMEOUT_MS = 200 DEFAULT_QUEUE_MAX_DEPTH = 5 @@ -63,7 +63,7 @@ def create_thread_and_run(self): def run(self): # Init the request response controllers here so that we know # the connector is fully initialized and all flows are created - self.initialize_request_response_controllers() + self.initialize_request_response_flow_controllers() while not self.stop_signal.is_set(): event = None diff --git a/src/solace_ai_connector/flow/request_response_controller.py b/src/solace_ai_connector/flow/request_response_flow_controller.py similarity index 96% rename from src/solace_ai_connector/flow/request_response_controller.py rename to src/solace_ai_connector/flow/request_response_flow_controller.py index 99731655..d81d7118 100644 --- a/src/solace_ai_connector/flow/request_response_controller.py +++ b/src/solace_ai_connector/flow/request_response_flow_controller.py @@ -22,10 +22,7 @@ import queue import time -from typing import Dict, Any, TYPE_CHECKING - -if TYPE_CHECKING: - from ..solace_ai_connector import SolaceAiConnector +from typing import Dict, Any from ..common.message import Message from ..common.event import Event, EventType @@ -42,7 +39,7 @@ def enqueue(self, event): # This is the main class that will be used to send messages to a flow and receive the response class RequestResponseFlowController: - def __init__(self, config: Dict[str, Any], connector: "SolaceAiConnector"): + def __init__(self, config: Dict[str, Any], connector): self.config = config self.connector = connector self.flow_name = config.get("flow_name") diff --git a/tests/test_request_response_controller.py b/tests/test_request_response_controller.py index 13436030..da3063aa 100644 --- a/tests/test_request_response_controller.py +++ b/tests/test_request_response_controller.py @@ -1,5 +1,4 @@ import sys -import pytest sys.path.append("src") @@ -10,22 +9,19 @@ get_message_from_flow, ) from solace_ai_connector.common.message import Message -from solace_ai_connector.flow.request_response_controller import ( - RequestResponseFlowController, -) def test_request_response_flow_controller_basic(): """Test basic functionality of the RequestResponseFlowController""" - def test_invoke_handler(component, message, data): + def test_invoke_handler(component, message, _data): # Call the request_response_flow data_iter = component.send_request_response_flow_message( "test_controller", message, {"test": "data"} ) # Just a single message with no streaming - for message, data in data_iter(): + for message, _data in data_iter(): assert message.get_data("previous") == {"test": "data"} assert message.get_data("input.payload") == {"text": "Hello, World!"} @@ -86,7 +82,7 @@ def test_request_response_controller_streaming(): def test_invoke_handler(component, message, data): # Call the request_response_flow - data_iter = component.send_request_response_message( + data_iter = component.send_request_response_flow_message( "test_controller", message, [ @@ -150,6 +146,10 @@ def test_invoke_handler(component, message, data): assert output_message.get_data("previous") == "done" + except Exception as e: + print(e) + assert False + finally: dispose_connector(connector) @@ -160,7 +160,7 @@ def test_request_response_controller_timeout(): def test_invoke_handler(component, message, data): # Call the request_response_flow - data_iter = component.send_request_response_message( + data_iter = component.send_request_response_flow_message( "test_controller", message, {"test": "data"} ) From a6c0c5dda1f0a4a51959c8589dbd6dc0dd166002 Mon Sep 17 00:00:00 2001 From: "Edward Funnekotter (aider)" Date: Mon, 16 Sep 2024 09:08:28 -0400 Subject: [PATCH 25/26] fix: update test function names for RequestResponseFlowController --- tests/test_request_response_controller.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_request_response_controller.py b/tests/test_request_response_controller.py index da3063aa..d8cc3a1d 100644 --- a/tests/test_request_response_controller.py +++ b/tests/test_request_response_controller.py @@ -77,8 +77,8 @@ def test_invoke_handler(component, message, _data): # Test simple streaming request response # Use the iterate component to break a single message into multiple messages -def test_request_response_controller_streaming(): - """Test streaming functionality of the RequestResponseController""" +def test_request_response_flow_controller_streaming(): + """Test streaming functionality of the RequestResponseFlowController""" def test_invoke_handler(component, message, data): # Call the request_response_flow @@ -155,8 +155,8 @@ def test_invoke_handler(component, message, data): # Test the timeout functionality -def test_request_response_controller_timeout(): - """Test timeout functionality of the RequestResponseController""" +def test_request_response_flow_controller_timeout(): + """Test timeout functionality of the RequestResponseFlowController""" def test_invoke_handler(component, message, data): # Call the request_response_flow From b836da7dc9d067fd70a7ae627464e0b431071d9a Mon Sep 17 00:00:00 2001 From: Edward Funnekotter Date: Mon, 16 Sep 2024 09:10:46 -0400 Subject: [PATCH 26/26] refactor: more name changes --- .../flow/request_response_flow_controller.py | 2 +- tests/test_request_response_controller.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/solace_ai_connector/flow/request_response_flow_controller.py b/src/solace_ai_connector/flow/request_response_flow_controller.py index d81d7118..373f2ec7 100644 --- a/src/solace_ai_connector/flow/request_response_flow_controller.py +++ b/src/solace_ai_connector/flow/request_response_flow_controller.py @@ -10,7 +10,7 @@ components: - component_name: example_component component_module: custom_component - request_response_controllers: + request_response_flow_controllers: - name: example_controller flow_name: llm_flow streaming: true diff --git a/tests/test_request_response_controller.py b/tests/test_request_response_controller.py index d8cc3a1d..5719badd 100644 --- a/tests/test_request_response_controller.py +++ b/tests/test_request_response_controller.py @@ -111,7 +111,7 @@ def test_invoke_handler(component, message, data): "component_config": { "invoke_handler": test_invoke_handler, }, - "request_response_controllers": [ + "request_response_flow_controllers": [ { "name": "test_controller", "flow_name": "request_response_flow", @@ -184,7 +184,7 @@ def test_invoke_handler(component, message, data): "component_config": { "invoke_handler": test_invoke_handler, }, - "request_response_controllers": [ + "request_response_flow_controllers": [ { "name": "test_controller", "flow_name": "request_response_flow",