diff --git a/.github/workflows/plugin-tests.yml b/.github/workflows/plugin-tests.yml index 622c4c1..2245e01 100644 --- a/.github/workflows/plugin-tests.yml +++ b/.github/workflows/plugin-tests.yml @@ -59,6 +59,15 @@ jobs: sudo -E -u postgres script/start_test_db.rb sudo -u postgres psql -c "CREATE ROLE $PGUSER LOGIN SUPERUSER PASSWORD '$PGPASSWORD';" + - name: Install pg_embeddings + run: | + sudo apt-get update + sudo apt-get -y install -y postgresql-server-dev-13 + git clone https://github.com/neondatabase/pg_embedding.git + cd pg_embedding + make PG_CONFIG=/usr/lib/postgresql/13/bin/pg_config + make PG_CONFIG=/usr/lib/postgresql/13/bin/pg_config install + - name: Bundler cache uses: actions/cache@v3 with: diff --git a/app/jobs/regular/chatbot_post_embedding_delete_job.rb b/app/jobs/regular/chatbot_post_embedding_delete_job.rb new file mode 100644 index 0000000..a09c8ef --- /dev/null +++ b/app/jobs/regular/chatbot_post_embedding_delete_job.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +# Job is triggered on a Post destruction. +class ::Jobs::ChatbotPostEmbeddingDeleteJob < Jobs::Base + sidekiq_options retry: false + + def execute(opts) + begin + post_id = opts[:id] + + ::DiscourseChatbot.progress_debug_message("101. Deleting a Post Embedding for Post id: #{post_id}") + + ::DiscourseChatbot::PostEmbedding.find_by(post_id: post_id).destroy! + rescue => e + Rails.logger.error ("OpenAIBot Post Embedding: There was a problem, but will retry til limit: #{e}") + end + end +end diff --git a/app/jobs/regular/chatbot_post_embedding_job.rb b/app/jobs/regular/chatbot_post_embedding_job.rb new file mode 100644 index 0000000..226abc5 --- /dev/null +++ b/app/jobs/regular/chatbot_post_embedding_job.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +# Job is triggered on an update to a Post. +class ::Jobs::ChatbotPostEmbeddingJob < Jobs::Base + sidekiq_options retry: 5, dead: false + + def execute(opts) + begin + post_id = opts[:id] + + ::DiscourseChatbot.progress_debug_message("100. Creating/updating a Post Embedding for Post id: #{post_id}") + + post_embedding = ::DiscourseChatbot::PostEmbeddingProcess.new + + post_embedding.upsert_embedding(post_id) + rescue => e + Rails.logger.error ("OpenAIBot Post Embedding: There was a problem, but will retry til limit: #{e}") + end + end +end diff --git a/app/models/embedding.rb b/app/models/embedding.rb new file mode 100644 index 0000000..a2ef3b4 --- /dev/null +++ b/app/models/embedding.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +class ::DiscourseChatbot::PostEmbedding < ActiveRecord::Base + self.table_name = 'chatbot_post_embeddings' + + validates :post_id, presence: true, uniqueness: true +end diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 801f920..168dcd7 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -47,6 +47,95 @@ en: title: "The subject of this conversation is %{topic_title}" first_post: "The first thing someone said was %{username} who said %{raw}" post: "%{username} said %{raw}" + function: + calculator: + description: | + Useful for getting the result of a math expression. It is a general purpose calculator. It works with Ruby expressions. + + You can retrieve the current date from it too and using the core Ruby Time method to calculate dates. + + The input to this tool should be a valid mathematical expression that could be executed by the base Ruby programming language with no extensions. + + Be certain to prefix any functions with 'Math.' + + Usage: + Action Input: 1 + 1 + Action Input: 3 * 2 / 4 + Action Input: 9 - 7 + Action Input: Time.now - 2 * 24 * 60 * 60 + Action Input: Math.cbrt(13) + Math.cbrt(12) + Action Input: Math.sqrt(8) + Action Input: (4.1 + 2.3) / (2.0 - 5.6) * 3 + parameters: + input: the mathematical expression you need to process and get the answer to. Make sure it is Ruby compatible. + error: "'%{parameter}' is an invalid mathematical expression, make sure if you are trying to calculate dates use Ruby Time class" + forum_search: + description: | + Search the local forum for information that may help you answer the question. Especially useful when the forum specialises in the subject matter of the query. + Searching the local forum is preferable to searching google or the internet and should be considered higher priority. It is quicker and cheaper. + + Input should be a search query. You can optionally also specify the number of posts you wish returned from your query. + + Outputs text from the Post and a url link to it you can provide the user. When presenting the url in your reply, do not embed in an anchor, just write the straight link. + parameters: + query: "search query for looking up information on the forum" + number_of_posts: "specify the number of posts you want returned from your query" + answer_summary: "The top %{number_of_posts} posts on the forum related to this query are, best match first:\n\n" + answer: "Number %{rank}: the post is at this web address: %{url}, it was written by '%{username}' on %{date} and the text is '%{raw}'.\n\n" + error: "'%{query}': my search for this on the forum failed." + google_search: + description: | + A wrapper around Google Search. + + Useful for when you need to answer questions about current events. + Always one of the first options when you need to find information on internet. + + Input should be a search query. + parameters: + query: "search query for looking up information on the internet" + error: "%{query}: my search for this on the internet failed." + news: + description: | + A wrapper around the News API. + + Useful for when you need to answer questions about current events in the news, current events or affairs. + + Input should be a search query and a date from which to search news, so if the request is today, the search should be for todays date + parameters: + query: "query string for searching current news and events" + start_date: "start date from which to search for news in format YYYY-MM-DD" + answer: "The latest news about this is: " + error: "ERROR: Had trouble retrieving the news!" + stock_data: + description: | + An API for MarketStack stock data. You need to call it using the stock ticker. You can optionally also provide a specific date. + parameters: + ticker: "ticker for share or stock query" + date: "date for data in format YYYY-MM-DD" + answer: "Ticker %{ticker} had a day close of %{close} on %{date}, with a high of %{high} and a low of %{low}" + error: "ERROR: Had trouble retrieving information from Market Stack for stock market information!" + wikipedia: + description: | + A wrapper around Wikipedia. + + Useful for when you need to answer general questions about + people, places, companies, facts, historical events, or other subjects. + + Input should be a search query + parameters: + query: "query string for wikipedia search" + answer: "The relevant wikipedia page has the following summary: '%{summary}' and the article can be found at this url link: %{url}" + error: "ERROR: Had trouble retrieving information from Wikipedia!" + agent: + handle_function_call: + answer: "The answer is %{result}." + call_function: + error: "There was something wrong with your function arguments" + final_thought_answer: + opener: "To answer the question I will use these step by step instructions.\n\n" + thought_declaration: "I will use the %{function_name} function to calculate the answer with arguments %{arguments}.\n\n" + final_thought: "%{thoughts} Based on the above, I will now answer the question, this message will only be seen by me so answer with the assumption with that the user has not seen this message." + errors: general: "Sorry, I'm not well right now. Lets talk some other time. Meanwhile, please ask the admin to check the logs, thank you!" retries: "I've tried working out a response for you several times, but ultimately failed. Please contact the admin if this persists, thank you!" diff --git a/config/settings.yml b/config/settings.yml index 6c38e55..a9d5f1e 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -66,11 +66,13 @@ plugins: default: gpt-3.5-turbo choices: - gpt-3.5-turbo - - gpt-3.5-turbo-16k - gpt-3.5-turbo-0613 + - gpt-3.5-turbo-16k + - gpt-3.5-turbo-16k-0613 - gpt-4 - - gpt-4-32k - gpt-4-0613 + - gpt-4-32k + - gpt-4-32k-0613 chatbot_reply_job_time_delay: client: false default: 3 diff --git a/db/migrate/20230820010101_enable_embedding_extension.rb b/db/migrate/20230820010101_enable_embedding_extension.rb new file mode 100644 index 0000000..a57666c --- /dev/null +++ b/db/migrate/20230820010101_enable_embedding_extension.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +class EnableEmbeddingExtension < ActiveRecord::Migration[7.0] + def change + begin + enable_extension :embedding + rescue Exception => e + if DB.query_single("SELECT 1 FROM pg_available_extensions WHERE name = 'embedding';").empty? + STDERR.puts "----------------------------DISCOURSE CHATBOT ERROR----------------------------------" + STDERR.puts " Discourse Chatbot now requires the embedding extension on the PostgreSQL database." + STDERR.puts " Run a `./launcher rebuild app` to fix it on a standard install." + STDERR.puts " Alternatively, you can remove Discourse Chatbot to rebuild." + STDERR.puts "----------------------------DISCOURSE CHATBOT ERROR----------------------------------" + end + raise e + end + end +end diff --git a/db/migrate/20230820010103_create_chatbot_embeddings_table.rb b/db/migrate/20230820010103_create_chatbot_embeddings_table.rb new file mode 100644 index 0000000..37bc53e --- /dev/null +++ b/db/migrate/20230820010103_create_chatbot_embeddings_table.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +class CreateChatbotEmbeddingsTable < ActiveRecord::Migration[7.0] + def change + create_table :chatbot_embeddings do |t| + t.integer :post_id, null: false, index: { unique: true }, foreign_key: true + t.column :embedding, "real[]", null: false + t.timestamps + end + end +end diff --git a/db/migrate/20230820010105_create_chatbot_embeddings_index.rb b/db/migrate/20230820010105_create_chatbot_embeddings_index.rb new file mode 100644 index 0000000..47d5d85 --- /dev/null +++ b/db/migrate/20230820010105_create_chatbot_embeddings_index.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +class CreateChatbotEmbeddingsIndex < ActiveRecord::Migration[7.0] + def up + execute <<-SQL + CREATE INDEX hnsw_index_on_chatbot_embeddings ON chatbot_embeddings USING hnsw(embedding) + WITH (dims=1536, m=64, efconstruction=64, efsearch=64); + SQL + end + + def down + execute <<-SQL + DROP INDEX hnsw_index_on_chatbot_embeddings; + SQL + end +end diff --git a/db/migrate/20230826010101_rename_chatbot_embeddings_table.rb b/db/migrate/20230826010101_rename_chatbot_embeddings_table.rb new file mode 100644 index 0000000..983900a --- /dev/null +++ b/db/migrate/20230826010101_rename_chatbot_embeddings_table.rb @@ -0,0 +1,13 @@ + +# frozen_string_literal: true + +class RenameChatbotEmbeddingsTable < ActiveRecord::Migration[7.0] + def change + begin + Migration::SafeMigrate.disable! + rename_table :chatbot_embeddings, :chatbot_post_embeddings + ensure + Migration::SafeMigrate.enable! + end + end +end diff --git a/db/migrate/20230826010103_rename_chatbot_embeddings_index.rb b/db/migrate/20230826010103_rename_chatbot_embeddings_index.rb new file mode 100644 index 0000000..0e3763f --- /dev/null +++ b/db/migrate/20230826010103_rename_chatbot_embeddings_index.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +class RenameChatbotEmbeddingsIndex < ActiveRecord::Migration[7.0] + def change + rename_index :chatbot_post_embeddings, 'hnsw_index_on_chatbot_embeddings', 'hnsw_index_on_chatbot_post_embeddings' + end +end diff --git a/lib/discourse_chatbot/bots/open_ai_agent.rb b/lib/discourse_chatbot/bots/open_ai_agent.rb index 8b63c0e..2e6c7ba 100644 --- a/lib/discourse_chatbot/bots/open_ai_agent.rb +++ b/lib/discourse_chatbot/bots/open_ai_agent.rb @@ -3,19 +3,19 @@ module ::DiscourseChatbot - class OpenAIAgent < Bot + class OpenAIAgent < OpenAIBotBase def initialize super - @model_name = SiteSetting.chatbot_open_ai_model_custom ? SiteSetting.chatbot_open_ai_model_custom_name : SiteSetting.chatbot_open_ai_model - calculator_function = ::DiscourseChatbot::CalculatorFunction.new wikipedia_function = ::DiscourseChatbot::WikipediaFunction.new news_function = ::DiscourseChatbot::NewsFunction.new google_search_function = ::DiscourseChatbot::GoogleSearchFunction.new + forum_search_function = ::DiscourseChatbot::ForumSearchFunction.new stock_data_function = ::DiscourseChatbot::StockDataFunction.new - functions = [calculator_function, wikipedia_function] + + functions = [calculator_function, wikipedia_function, forum_search_function] functions << news_function if !SiteSetting.chatbot_news_api_token.blank? functions << google_search_function if !SiteSetting.chatbot_serp_api_key.blank? @@ -106,7 +106,7 @@ def handle_function_call(res) func_name = first_message["function_call"]["name"] args_str = first_message["function_call"]["arguments"] result = call_function(func_name, args_str) - res_msg = { 'role' => 'assistant', 'content' => "The answer is #{result}." } + res_msg = { 'role' => 'assistant', 'content' => I18n.t("chatbot.prompt.agent.handle_function_call.answer", result: result) } @internal_thoughts << res_msg end @@ -121,24 +121,26 @@ def call_function(func_name, args_str) func = @func_mapping[func_name] res = func.process(args) res - rescue - "There was something wrong with your function arguments" + rescue + I18n.t("chatbot.prompt.agent.call_function.error") end end def final_thought_answer - thoughts = "To answer the question I will use these step by step instructions.\n\n" + thoughts = I18n.t("chatbot.prompt.agent.final_thought_answer.opener") @internal_thoughts.each do |thought| if thought.key?('function_call') - thoughts += "I will use the #{thought['function_call']['name']} function to calculate the answer with arguments #{thought['function_call']['arguments']}.\n\n" + thoughts += I18n.t("chatbot.prompt.agent.final_thought_answer.thought_declaration", function_name: thought['function_call']['name'], arguments: thought['function_call']['arguments']) else thoughts += "#{thought['content']}\n\n" end end + final_thought = { 'role' => 'assistant', - 'content' => "#{thoughts} Based on the above, I will now answer the question, this message will only be seen by me so answer with the assumption with that the user has not seen this message." + 'content' => I18n.t("chatbot.prompt.agent.final_thought_answer.final_thought", thoughts: thoughts) } + final_thought end diff --git a/lib/discourse_chatbot/bots/open_ai_bot.rb b/lib/discourse_chatbot/bots/open_ai_bot.rb index f44f30b..fc75aba 100644 --- a/lib/discourse_chatbot/bots/open_ai_bot.rb +++ b/lib/discourse_chatbot/bots/open_ai_bot.rb @@ -3,7 +3,7 @@ module ::DiscourseChatbot - class OpenAIBot < Bot + class OpenAIBot < OpenAIBotBase def initialize super @@ -13,11 +13,9 @@ def get_response(prompt) system_message = { "role": "system", "content": I18n.t("chatbot.prompt.system.basic") } prompt.unshift(system_message) - model_name = SiteSetting.chatbot_open_ai_model_custom ? SiteSetting.chatbot_open_ai_model_custom_name : SiteSetting.chatbot_open_ai_model - response = @client.chat( parameters: { - model: model_name, + model: @model_name, messages: prompt, max_tokens: SiteSetting.chatbot_max_response_tokens, temperature: SiteSetting.chatbot_request_temperature / 100.0, diff --git a/lib/discourse_chatbot/bots/open_ai_bot_base.rb b/lib/discourse_chatbot/bots/open_ai_bot_base.rb new file mode 100644 index 0000000..f329be0 --- /dev/null +++ b/lib/discourse_chatbot/bots/open_ai_bot_base.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true +require "openai" + +module ::DiscourseChatbot + + class OpenAIBotBase < Bot + def initialize + ::OpenAI.configure do |config| + config.access_token = SiteSetting.chatbot_open_ai_token + end + if !SiteSetting.chatbot_open_ai_model_custom_url.blank? + ::OpenAI.configure do |config| + config.uri_base = SiteSetting.chatbot_open_ai_model_custom_url + end + end + if SiteSetting.chatbot_open_ai_model_custom_api_type == "azure" + ::OpenAI.configure do |config| + config.api_type = :azure + config.api_version = SiteSetting.chatbot_open_ai_model_custom_api_version + end + end + @client = ::OpenAI::Client.new + @model_name = SiteSetting.chatbot_open_ai_model_custom ? SiteSetting.chatbot_open_ai_model_custom_name : SiteSetting.chatbot_open_ai_model + end + + def get_response(prompt) + raise "Overwrite me!" + end + + end +end diff --git a/lib/discourse_chatbot/functions/calculator_function.rb b/lib/discourse_chatbot/functions/calculator_function.rb index 0dbdf2b..626001b 100644 --- a/lib/discourse_chatbot/functions/calculator_function.rb +++ b/lib/discourse_chatbot/functions/calculator_function.rb @@ -10,28 +10,12 @@ def name end def description - <<~EOS - Useful for getting the result of a math expression. It is a general purpose calculator. It works with Ruby expressions. - - You can retrieve the current date from it too and using the core Ruby Time method to calculate dates. - - The input to this tool should be a valid mathematical expression that could be executed by the base Ruby programming language with no extensions. - - Be certain to prefix any functions with 'Math.' - Usage: - Action Input: 1 + 1 - Action Input: 3 * 2 / 4 - Action Input: 9 - 7 - Action Input: Time.now - 2 * 24 * 60 * 60 - Action Input: Math.cbrt(13) + Math.cbrt(12) - Action Input: Math.sqrt(8) - Action Input: (4.1 + 2.3) / (2.0 - 5.6) * 3" - EOS + I18n.t("chatbot.prompt.function.calculator.description") end def parameters [ - { name: "input", type: String, description: "the mathematical expression you need to process and get the answer to. Make sure it is Ruby compatible." } , + { name: "input", type: String, description: I18n.t("chatbot.prompt.function.calculator.parameters.input") } , ] end @@ -45,7 +29,7 @@ def process(args) SafeRuby.eval(args[parameters[0][:name]], timeout: 5) rescue - "\"#{args[parameters[0][:name]]}\" is an invalid mathematical expression, make sure if you are trying to calculate dates use Ruby Time class" + I18n.t("chatbot.prompt.function.calculator.error", parameter: args[parameters[0][:name]]) end end end diff --git a/lib/discourse_chatbot/functions/forum_search_function.rb b/lib/discourse_chatbot/functions/forum_search_function.rb new file mode 100644 index 0000000..ab89e52 --- /dev/null +++ b/lib/discourse_chatbot/functions/forum_search_function.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +require_relative '../function' + +module DiscourseChatbot + class ForumSearchFunction < Function + + def name + 'local_forum_search' + end + + def description + I18n.t("chatbot.prompt.function.forum_search.description") + end + + def parameters + [ + { name: "query", type: String, description: I18n.t("chatbot.prompt.function.forum_search.parameters.query") } , + { name: "number_of_posts", type: Integer, description: I18n.t("chatbot.prompt.function.stock_data.parameters.number_of_posts") } + ] + end + + def required + ['query'] + end + + def process(args) + begin + super(args) + query = args[parameters[0][:name]] + number_of_posts = args[parameters[1][:name]].blank? ? 3 : args[parameters[1][:name]] + number_of_posts = number_of_posts > 10 ? 10 : number_of_posts + + post_embedding = ::DiscourseChatbot::PostEmbeddingProcess.new + results = post_embedding.semantic_search(query) + + top_results = results[0..(number_of_posts - 1)] + + response = I18n.t("chatbot.prompt.function.forum_search.answer_summary", number_of_posts: number_of_posts) + + top_results.each_with_index do |result, index| + current_post = ::Post.find(result.to_i) + url = "https://#{Discourse.current_hostname}/t/slug/#{current_post.topic_id}/#{current_post.post_number}" + raw = current_post.raw + username = User.find(current_post.user_id).username + date = current_post.created_at.to_date + response += I18n.t("chatbot.prompt.function.forum_search.answer", url: url, username: username, date: date, raw: raw, rank: index + 1) + end + response + rescue + I18n.t("chatbot.prompt.function.forum_search.error", query: args[parameters[0][:name]]) + end + end + end +end diff --git a/lib/discourse_chatbot/functions/google_search_function.rb b/lib/discourse_chatbot/functions/google_search_function.rb index 25743fb..d8b1aaf 100644 --- a/lib/discourse_chatbot/functions/google_search_function.rb +++ b/lib/discourse_chatbot/functions/google_search_function.rb @@ -11,19 +11,12 @@ def name end def description - <<~EOS - A wrapper around Google Search. - - Useful for when you need to answer questions about current events. - Always one of the first options when you need to find information on internet. - - Input should be a search query. - EOS + I18n.t("chatbot.prompt.function.google_search.description") end def parameters [ - { name: "query", type: String, description: "search query for looking up information on the internet" } , + { name: "query", type: String, description: I18n.t("chatbot.prompt.function.google_search.parameters.query") } , ] end @@ -42,7 +35,7 @@ def process(args) hash_results.dig(:answer_box, :snippet) || hash_results.dig(:organic_results, 0, :snippet) rescue - "\"#{args[parameters[0][:name]]}\": my search for this on the internet failed." + I18n.t("chatbot.prompt.function.google_search.error", query: args[parameters[0][:name]]) end end end diff --git a/lib/discourse_chatbot/functions/news_function.rb b/lib/discourse_chatbot/functions/news_function.rb index f08865a..8a707d2 100644 --- a/lib/discourse_chatbot/functions/news_function.rb +++ b/lib/discourse_chatbot/functions/news_function.rb @@ -10,19 +10,13 @@ def name end def description - <<~EOS - A wrapper around the News API. - - Useful for when you need to answer questions about current events in the news, current events or affairs. - - Input should be a search query and a date from which to search news, so if the request is today, the search should be for todays date' - EOS + I18n.t("chatbot.prompt.function.news.description") end def parameters [ - { name: 'query', type: String, description: "query string for searching current news and events" }, - { name: 'start_date', type: String, description: "start date from which to search for news in format YYYY-MM-DD" } + { name: 'query', type: String, description: I18n.t("chatbot.prompt.function.news.parameters.query") }, + { name: 'start_date', type: String, description: I18n.t("chatbot.prompt.function.news.parameters.start_date") } ] end @@ -57,13 +51,13 @@ def process(args) all_articles = response_body["articles"] - news = "The latest news about this is: " + news = I18n.t("chatbot.prompt.function.news.answer") all_articles.each do |a| news += "#{a["title"]}. " end news rescue - "ERROR: Had trouble retrieving the news!" + I18n.t("chatbot.prompt.function.news.error") end end end diff --git a/lib/discourse_chatbot/functions/stock_data_function.rb b/lib/discourse_chatbot/functions/stock_data_function.rb index 4633559..17f5f45 100644 --- a/lib/discourse_chatbot/functions/stock_data_function.rb +++ b/lib/discourse_chatbot/functions/stock_data_function.rb @@ -14,15 +14,13 @@ def name end def description - <<~EOS - An API for MarketStack stock data. You need to call it using the stock ticker. You can optionally also provide a specific date. - EOS + I18n.t("chatbot.prompt.function.stock_data.description") end def parameters [ - { name: 'ticker', type: String, description: "ticker for share or stock query" }, - { name: 'date', type: String, description: "date for data in format YYYY-MM-DD" } + { name: 'ticker', type: String, description: I18n.t("chatbot.prompt.function.stock_data.parameters.ticker") }, + { name: 'date', type: String, description: I18n.t("chatbot.prompt.function.stock_data.parameters.date") } ] end @@ -58,9 +56,9 @@ def process(args) stock_data = api_response['data'][0] - "Ticker #{stock_data['symbol']} had a day close of #{stock_data['close'].to_s} on #{stock_data['date'].to_s}, with a high of #{stock_data['high'].to_s} and a low of #{stock_data['low'].to_s}" + I18n.t("chatbot.prompt.function.stock_data.answer", ticker: stock_data['symbol'], close: stock_data['close'].to_s, date: stock_data['date'].to_s, high: stock_data['high'].to_s, low: stock_data['low'].to_s) rescue - "ERROR: Had trouble retrieving information from Market Stack for stock market information!" + I18n.t("chatbot.prompt.function.stock_data.error") end end end diff --git a/lib/discourse_chatbot/functions/wikipedia_function.rb b/lib/discourse_chatbot/functions/wikipedia_function.rb index 3678d2a..35653c3 100644 --- a/lib/discourse_chatbot/functions/wikipedia_function.rb +++ b/lib/discourse_chatbot/functions/wikipedia_function.rb @@ -12,19 +12,12 @@ def name end def description - <<~EOS - A wrapper around Wikipedia. - - Useful for when you need to answer general questions about - people, places, companies, facts, historical events, or other subjects. - - Input should be a search query - EOS + I18n.t("chatbot.prompt.function.wikipedia.description") end def parameters [ - { name: 'query', type: String, description: "query string for wikipedia search" } + { name: 'query', type: String, description: I18n.t("chatbot.prompt.function.wikipedia.parameters.query") } ] end @@ -38,9 +31,9 @@ def process(args) page = ::Wikipedia.find(args[parameters[0][:name]]) - page.summary + I18n.t("chatbot.prompt.function.wikipedia.answer", summary: page.summary, url: page.fullurl) rescue - "ERROR: Had trouble retrieving information from Wikipedia!" + I18n.t("chatbot.prompt.function.wikipedia.error") end end end diff --git a/lib/discourse_chatbot/post_embedding_process.rb b/lib/discourse_chatbot/post_embedding_process.rb new file mode 100644 index 0000000..abbf05c --- /dev/null +++ b/lib/discourse_chatbot/post_embedding_process.rb @@ -0,0 +1,84 @@ +# frozen_string_literal: true +require "openai" + +module ::DiscourseChatbot + + class PostEmbeddingProcess + + def initialize + ::OpenAI.configure do |config| + config.access_token = SiteSetting.chatbot_open_ai_token + end + if !SiteSetting.chatbot_open_ai_model_custom_url.blank? + ::OpenAI.configure do |config| + config.uri_base = SiteSetting.chatbot_open_ai_model_custom_url + end + end + if SiteSetting.chatbot_open_ai_model_custom_api_type == "azure" + ::OpenAI.configure do |config| + config.api_type = :azure + config.api_version = SiteSetting.chatbot_open_ai_model_custom_api_version + end + end + @model_name = ::DiscourseChatbot::EMBEDDING_MODEL + @client = ::OpenAI::Client.new + end + + def upsert_embedding(post_id) + benchmark_user = User.where(trust_level: 1, active: true, admin: false, suspended_at: nil).last + if benchmark_user.nil? + raise StandardError, "No benchmark user exists for Post embedding suitability check, please add a basic user" + end + benchmark_user_guardian = Guardian.new(benchmark_user) + + post = ::Post.find_by(id: post_id) + + return if post.nil? + + if benchmark_user_guardian.can_see?(post) + response = @client.embeddings( + parameters: { + model: @model_name, + input: post.raw[0..::DiscourseChatbot::EMBEDDING_CHAR_LIMIT] + } + ) + + embedding_vector = response.dig("data", 0, "embedding") + + ::DiscourseChatbot::PostEmbedding.upsert({ post_id: post_id, embedding: embedding_vector }, on_duplicate: :update, unique_by: :post_id) + end + end + + def semantic_search(query) + response = @client.embeddings( + parameters: { + model: @model_name, + input: query[0..::DiscourseChatbot::EMBEDDING_CHAR_LIMIT] + } + ) + + query_vector = response.dig("data", 0, "embedding") + + begin + search_result_post_ids = + DB.query(<<~SQL, query_embedding: query_vector, limit: 10).map( + SELECT + post_id + FROM + chatbot_post_embeddings + ORDER BY + embedding::real[] <-> array[:query_embedding] + LIMIT :limit + SQL + &:post_id + ) + rescue PG::Error => e + Rails.logger.error( + "Error #{e} querying embeddings for search #{query}", + ) + raise MissingEmbeddingError + end + search_result_post_ids + end + end +end diff --git a/lib/tasks/chatbot.rake b/lib/tasks/chatbot.rake new file mode 100644 index 0000000..057c2fd --- /dev/null +++ b/lib/tasks/chatbot.rake @@ -0,0 +1,83 @@ +# frozen_string_literal: true +desc "Update embeddings for each post" +task "chatbot:refresh_embeddings", %i[missing_only delay] => :environment do |_, args| + ENV["RAILS_DB"] ? refresh_embeddings(args) : refresh_embeddings_all_sites(args) +end + +desc "Refresh embeddings for all posts matching string/regex and optionally delay the loop" +task "chatbot:refresh_embeddings_match", %i[pattern type delay] => [:environment] do |_, args| + args.with_defaults(type: "string") + pattern = args[:pattern] + type = args[:type]&.downcase + delay = args[:delay]&.to_i + + if !pattern + puts "ERROR: Expecting rake chatbot:refresh_embeddings_match[pattern,type,delay]" + exit 1 + elsif delay && delay < 1 + puts "ERROR: delay parameter should be an integer and greater than 0" + exit 1 + elsif type != "string" && type != "regex" + puts "ERROR: Expecting rake chatbot:refresh_embeddings_match[pattern,type] where type is string or regex" + exit 1 + end + + search = Post.raw_match(pattern, type) + + refreshed = 0 + total = search.count + + search.find_each do |post| + post_embedding = ::DiscourseChatbot::PostEmbeddingProcess.new + post_embedding.upsert_embedding(post.id) + print_status(refreshed += 1, total) + sleep(delay) if delay + end + + puts "", "#{refreshed} posts done!", "" +end + +def refresh_embeddings_all_sites(args) + RailsMultisite::ConnectionManagement.each_connection { |db| refresh_embeddings(args) } +end + +def refresh_embeddings(args) + puts "-" * 50 + puts "Refreshing embeddings for posts for '#{RailsMultisite::ConnectionManagement.current_db}'" + puts "-" * 50 + + missing_only = args[:missing_only]&.to_i + delay = args[:delay]&.to_i + + puts "for missing only" if !missing_only.to_i.zero? + puts "with a delay of #{delay} second(s) between API calls" if !delay.to_i.zero? + puts "-" * 50 + + if delay && delay < 1 + puts "ERROR: delay parameter should be an integer and greater than 0" + exit 1 + end + + begin + total = Post.count + refreshed = 0 + batch = 1000 + + (0..(total - 1).abs).step(batch) do |i| + Post + .order(id: :desc) + .offset(i) + .limit(batch) + .each do |post| + if !missing_only.to_i.zero? && ::DiscourseChatbot::PostEmbedding.find_by(post_id: post.id).nil? || missing_only.to_i.zero? + post_embedding = ::DiscourseChatbot::PostEmbeddingProcess.new + post_embedding.upsert_embedding(post.id) + sleep(delay) if delay + end + print_status(refreshed += 1, total) + end + end + end + + puts "", "#{refreshed} posts done!", "-" * 50 +end diff --git a/plugin.rb b/plugin.rb index 1149086..b7646e8 100644 --- a/plugin.rb +++ b/plugin.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true # name: discourse-chatbot # about: a plugin that allows you to have a conversation with a configurable chatbot in Discourse Chat, Topics and Private Messages -# version: 0.30 +# version: 0.31 # authors: merefield # url: https://github.com/merefield/discourse-chatbot @@ -23,6 +23,8 @@ module ::DiscourseChatbot CHATBOT_QUERIES_CUSTOM_FIELD = "chatbot_queries" POST_TYPES_REGULAR_ONLY = [1] POST_TYPES_INC_WHISPERS = [1, 4] + EMBEDDING_MODEL = "text-embedding-ada-002".freeze + EMBEDDING_CHAR_LIMIT = 32000 def progress_debug_message(message) if SiteSetting.chatbot_enable_verbose_console_response_progress_logging @@ -48,9 +50,13 @@ def progress_debug_message(message) %w( ../lib/discourse_chatbot/event_evaluation.rb + ../app/models/embedding.rb + ../lib/discourse_chatbot/post_embedding_process.rb + ../app/jobs/regular/chatbot_post_embedding_job.rb ../lib/discourse_chatbot/message/message_evaluation.rb ../lib/discourse_chatbot/post/post_evaluation.rb ../lib/discourse_chatbot/bot.rb + ../lib/discourse_chatbot/bots/open_ai_bot_base.rb ../lib/discourse_chatbot/bots/open_ai_bot.rb ../lib/discourse_chatbot/bots/open_ai_agent.rb ../lib/discourse_chatbot/function.rb @@ -58,6 +64,7 @@ def progress_debug_message(message) ../lib/discourse_chatbot/functions/news_function.rb ../lib/discourse_chatbot/functions/wikipedia_function.rb ../lib/discourse_chatbot/functions/google_search_function.rb + ../lib/discourse_chatbot/functions/forum_search_function.rb ../lib/discourse_chatbot/functions/stock_data_function.rb ../lib/discourse_chatbot/functions/parser.rb ../lib/discourse_chatbot/prompt_utils.rb @@ -77,19 +84,53 @@ def progress_debug_message(message) DiscourseEvent.on(:post_created) do |*params| post, opts, user = params - if SiteSetting.chatbot_enabled && (post.post_type == 1 || post.post_type == 4 && SiteSetting.chatbot_can_trigger_from_whisper) - ::DiscourseChatbot.progress_debug_message("1. trigger") + if SiteSetting.chatbot_enabled + if post.post_type == 1 + job_class = ::Jobs::ChatbotPostEmbeddingJob + job_class.perform_async(post.as_json) + end - bot_username = SiteSetting.chatbot_bot_user - bot_user = User.find_by(username: bot_username) + if (post.post_type == 1 || post.post_type == 4 && SiteSetting.chatbot_can_trigger_from_whisper) + ::DiscourseChatbot.progress_debug_message("1. trigger") - if bot_user && (user.id != bot_user.id) - event_evaluation = ::DiscourseChatbot::PostEvaluation.new - event_evaluation.on_submission(post) + bot_username = SiteSetting.chatbot_bot_user + bot_user = User.find_by(username: bot_username) + + if bot_user && (user.id != bot_user.id) + event_evaluation = ::DiscourseChatbot::PostEvaluation.new + event_evaluation.on_submission(post) + end end end end + DiscourseEvent.on(:post_edited) do |*params| + post, opts = params + + if SiteSetting.chatbot_enabled && post.post_type == 1 + job_class = ::Jobs::ChatbotPostEmbeddingJob + job_class.perform_async(post.as_json) + end + end + + DiscourseEvent.on(:post_recovered) do |*params| + post, opts = params + + if SiteSetting.chatbot_enabled && post.post_type == 1 + job_class = ::Jobs::ChatbotPostEmbeddingJob + job_class.perform_async(post.as_json) + end + end + + DiscourseEvent.on(:post_destroyed) do |*params| + post, opts, user = params + + if SiteSetting.chatbot_enabled && post.post_type == 1 + job_class = ::Jobs::ChatbotPostEmbeddingDeleteJob + job_class.perform_async(post.as_json) + end + end + DiscourseEvent.on(:chat_message_created) do |*params| chat_message, chat_channel, user = params @@ -105,5 +146,4 @@ def progress_debug_message(message) end end end - end