Skip to content

Commit

Permalink
IMPROVE: legalise URLs found in raw contents of Posts
Browse files Browse the repository at this point in the history
  • Loading branch information
merefield committed Jun 11, 2024
1 parent 70fef97 commit 1947198
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 7 deletions.
5 changes: 2 additions & 3 deletions lib/discourse_chatbot/bots/open_ai_bot_rag.rb
Original file line number Diff line number Diff line change
Expand Up @@ -269,13 +269,12 @@ def call_function(func_name, args_str, opts)
def legal_urls?(res, post_ids_found, topic_ids_found)
return true if res.blank?

post_url_regex = %r{\/t/[^/]+/(\d+)/(\d+)}
topic_url_regex = %r{\/t/[^/]+/(\d+)(?!\d|\/)}
post_url_regex = ::DiscourseChatbot::POST_URL_REGEX
topic_url_regex = ::DiscourseChatbot::TOPIC_URL_REGEX

topic_ids_in_text = res.scan(topic_url_regex).flatten
post_combos_in_text = res.scan(post_url_regex)


topic_ids_in_text.each do |topic_id_in_text|
if !topic_ids_found.include?(topic_id_in_text.to_i)
return false
Expand Down
33 changes: 33 additions & 0 deletions lib/discourse_chatbot/functions/forum_search_function.rb
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ def process(args)
break if post.nil?
next if post.deleted_at || !accepted_post_types.include?(post.post_type)
response += I18n.t("chatbot.prompt.function.forum_search.answer.topic.each.post", post_number: post_number, username: post.user.username, date: post.created_at, raw: post.raw)

topic_ids_in_raw_urls_found, post_ids_in_raw_urls_found = find_post_and_topic_ids_from_raw_urls(post.raw)

topic_ids_found = topic_ids_found | topic_ids_in_raw_urls_found
post_ids_found = post_ids_found | post_ids_in_raw_urls_found

post_ids_found << post.id
post_number += 1
end
Expand All @@ -115,6 +121,12 @@ def process(args)
username = User.find(current_post.user_id).username
date = current_post.created_at.to_date
response += I18n.t("chatbot.prompt.function.forum_search.answer.post.each", url: url, username: username, date: date, raw: raw, score: score, rank: index + 1)

topic_ids_in_raw_urls_found, post_ids_in_raw_urls_found = find_post_and_topic_ids_from_raw_urls(raw)

topic_ids_found = topic_ids_found | topic_ids_in_raw_urls_found
post_ids_found = post_ids_found | post_ids_in_raw_urls_found

post_ids_found << current_post.id
end
end
Expand All @@ -124,5 +136,26 @@ def process(args)
{ result: I18n.t("chatbot.prompt.function.forum_search.error", query: args[parameters[0][:name]]), topic_ids_found: [], post_ids_found: [] }
end
end

def find_post_and_topic_ids_from_raw_urls(raw)
post_ids_found = []

topic_ids_in_raw_topic_links = raw.scan(::DiscourseChatbot::TOPIC_URL_REGEX).flatten
topic_ids_found = topic_ids_in_raw_topic_links.map(&:to_i)

post_combos_in_raw_post_links = raw.scan(::DiscourseChatbot::POST_URL_REGEX)

post_combos_in_raw_post_links.each do |post_combo|
topic_id_in_text = post_combo[0]
post_number_in_text = post_combo[1]

post = ::Post.find_by(topic_id: topic_id_in_text.to_i, post_number: post_number_in_text.to_i)

post_ids_found << post.id
topic_ids_found << post.topic_id
end

return topic_ids_found, post_ids_found
end
end
end
5 changes: 4 additions & 1 deletion plugin.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# frozen_string_literal: true
# name: discourse-chatbot
# about: a plugin that allows you to have a conversation with a configurable chatbot in Discourse Chat, Topics and Private Messages
# version: 0.9.29
# version: 0.9.30
# authors: merefield
# url: https://github.com/merefield/discourse-chatbot

Expand Down Expand Up @@ -34,6 +34,9 @@ module ::DiscourseChatbot

EMBEDDING_PROCESS_POSTS_CHUNK = 300

TOPIC_URL_REGEX = %r{\/t/[^/]+/(\d+)(?!\d|\/)}
POST_URL_REGEX = %r{\/t/[^/]+/(\d+)/(\d+)(?!\d|\/)}

def progress_debug_message(message)
puts "Chatbot: #{message}" if SiteSetting.chatbot_enable_verbose_console_logging
Rails.logger.info("Chatbot: #{message}") if SiteSetting.chatbot_enable_verbose_rails_logging
Expand Down
11 changes: 8 additions & 3 deletions spec/lib/functions/forum_search_function_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
let(:post_3) { Fabricate(:post, topic: topic_1, raw: "on the plain", post_number: 3) }
let(:post_4) { Fabricate(:post, topic: topic_1, raw: "or so they say!", post_number: 4) }
let(:topic_2) { Fabricate(:topic, title: "weather in northern Europe") }
let(:post_5) { Fabricate(:post, topic: topic_2, raw: "rains everywhere", post_number: 1) }
let(:post_5) { Fabricate(:post, topic: topic_2, raw: "rains everywhere https://example.com/t/slug/#{post_2.topic_id}/#{post_2.post_number} ", post_number: 1) }
let(:topic_3) { Fabricate(:topic, title: "nothing to do with the weather")}
let(:post_6) { Fabricate(:post, topic: topic_3, raw: "cars go fast", post_number: 1) }

Expand Down Expand Up @@ -42,10 +42,11 @@
expect(topic_1).not_to be_nil
expect(topic_2).not_to be_nil
expect(topic_3).not_to be_nil
expect(subject.process(args)[:topic_ids_found]).to eq([])
expect(subject.process(args)[:topic_ids_found]).to eq([post_2.topic_id])
expect(subject.process(args)[:post_ids_found]).to include(post_5.id)
expect(subject.process(args)[:post_ids_found]).to include(post_3.id)
expect(subject.process(args)[:post_ids_found]).not_to include(post_2.id)
expect(subject.process(args)[:post_ids_found]).to include(post_2.id)
expect(subject.process(args)[:post_ids_found]).not_to include(post_4.id)
expect(subject.process(args)[:result]).to include(post_3.raw)
end

Expand All @@ -71,4 +72,8 @@
expect(subject.process(args)[:result]).not_to include(topic_3.title)
expect(subject.process(args)[:result]).not_to include(post_4.raw)
end

it "finds urls with a post id" do
expect(subject.find_post_and_topic_ids_from_raw_urls(post_5.raw)).to eq([[post_2.topic_id], [post_2.id]])
end
end

0 comments on commit 1947198

Please sign in to comment.