-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathbot.py
412 lines (317 loc) · 13.9 KB
/
bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
import os
import sys
import logging
import re
import openai
import zulip
from dotenv import load_dotenv
import tiktoken
import sqlite3
import datetime
# Load the .env file
load_dotenv()
# Set up logging
LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO').upper()
logging.basicConfig(level=LOGLEVEL)
if not os.path.exists('data'):
os.makedir('data')
if os.path.isfile('data.db'):
os.rename('data.db', 'data/data.db')
db_file = 'data.db' if os.path.isfile('data.db') else 'data/data.db'
conn = sqlite3.connect(db_file)
cur = conn.cursor()
# Set up GPT-3 API key
openai.api_key = os.environ['OPENAI_API_KEY']
# Set up Zulip client
client = zulip.Client(config_file=".zuliprc")
PERMISSIONS_SET_CONTEXT = os.environ['PERMISSIONS_SET_CONTEXT']
DEFAULT_MODEL_NAME = os.environ['DEFAULT_MODEL_NAME']
BOT_NAME = os.environ['BOT_NAME']
VERSION = "1.2.0"
contexts = {}
def num_tokens_from_messages(messages, model="gpt-3.5-turbo"):
"""Returns the number of tokens used by a list of messages."""
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
encoding = tiktoken.get_encoding("cl100k_base")
# note: future models may deviate from this
if model.startswith("gpt-3") or model.startswith("gpt-4"):
num_tokens = 0
for message in messages:
# every message follows <im_start>{role/name}\n{content}<im_end>\n
num_tokens += 4
for key, value in message.items():
num_tokens += len(encoding.encode(value))
if key == "name": # if there's a name, the role is omitted
num_tokens += -1 # role is always required and always 1 token
num_tokens += 2 # every reply is primed with <im_start>assistant
return num_tokens
else:
raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}.
See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")
def send_reply(reply, message):
if message['type'] == 'private':
response = {
'type': 'private',
'to': message['sender_email'],
'content': reply,
}
else:
response = {
'type': 'stream',
'to': message['display_recipient'],
'subject': message['subject'],
'content': reply,
}
client.send_message(response)
def get_gpt_response(messages, model=DEFAULT_MODEL_NAME):
try:
response = openai.ChatCompletion.create(
model=model,
messages=messages,
)
return response.choices[0].message.content.strip()
except Exception as e:
logging.error(e)
return "OpenAI API error. Please try again later."
def print_help(msg):
# return multiline string with help message
help_message = """# ChatGPT Assistant
This is a chatbot assistant that uses OpenAI's ChatGPT API to generate responses to your messages.
## How to use
To use the bot, simply mention it in a message, e.g. @**{bot}** hello!. The bot will then generate a response and send it back to you.
You can also write a private message to the bot without mentioning it.
## Subcommands
Subcommands are words starting with an exclamation mark, e.g. `!new`.
You can use the following subcommands to control the bot:
### General:
- `!help` - show this help message
### Context:
- `!topic` - use context from the current topic (default behaviour; subcommand not implemented/needed)
- `!stream` - use context from the current stream
- `!new` - start a new conversation; no previous context (the bot will use context from the previous conversation by default which may affect the generated response)
- `!contexts` - list all available contexts (e.g. `!cicada`, `!frankie`) and their values
Example custom defined context: `!cicada` - add system context for Cicada; this may provide more accurate responses
### Model (default depends on server settings):
- `!gpt3` - use GPT-3.5 Turbo (4K tokens, up to 2.5K for input)
- `!gpt4` - use GPT-4 (8K tokens, up to 6K for input)
### Global settings:
- `!set` - (not implemented yet) show current settings
- `!set context <name> <value> - upsert a context like !cicada. Example: `!set context cicada Cicada is a business wallet`
- `!unset context <name>` - delete a context
### User settings (not implemented yet):
- `!me` - show your current settings
- `!me model gpt3` - set your default model to GPT-3.5 Turbo
- `!me model gpt4` - set your default model to GPT-4
## Example usage
- `@{bot} !gpt4 !stream Can you summarise previous messages?` - use GPT-4 and context from the current stream
- `@{bot} !new I have a question...` - start a new conversation using GPT-3.5 Turbo and no context (previous messages will be ignored)
Bot version: {version}
""".format(bot=BOT_NAME, version=VERSION)
send_reply(help_message, msg)
def get_subcommands(content):
content_chunks = content.strip().split()
subcommands = [word.lower().replace("!", "")
for word in content_chunks if word.startswith("!")]
return subcommands
def remove_subcommands(content, subcommands):
for subcommand in subcommands:
content = re.sub(f"!{subcommand} ", "", content,
flags=re.IGNORECASE).strip()
content = re.sub(f"!{subcommand}", "", content,
flags=re.IGNORECASE).strip()
return content
def with_previous_messages(client, msg, messages, subcommands, token_limit, append_after_index):
if msg['type'] == 'private':
query = {
'anchor': msg['id'],
'num_before': 100, # adjust this value as needed
'num_after': 0,
'apply_markdown': False,
'include_anchor': False,
'narrow': [{'operand': msg['sender_email'], 'operator': 'pm-with'}],
}
else:
narrow = [
{'operand': msg['display_recipient'], 'operator': 'stream'},
]
# filter to topic by default
if ("stream" not in subcommands):
narrow.append({'operand': msg['subject'], 'operator': 'topic'})
query = {
'anchor': msg['id'],
'num_before': 100, # adjust this value as needed
'num_after': 0,
'apply_markdown': False,
'include_anchor': False,
'narrow': narrow,
}
previous_messages = client.get_messages(query)['messages']
previous_messages.reverse()
new_messages = messages.copy()
for msg in previous_messages:
content = msg['content'].strip()
# remove mentions of the bot
content = re.sub("@\*\*{bot}\*\*".format(bot=BOT_NAME), "", content)
content = content.strip()
# get subcommands (words starting with exclamation mark)
subcommands = get_subcommands(content)
# don't remove in previous messages for now, as it breaks with some code blocks
# content = remove_subcommands(content, subcommands)
if client.email == msg['sender_email']:
role = "assistant"
else:
role = "user"
new_messages.insert(append_after_index, {
"role": role, "content": content.strip()})
tokens = num_tokens_from_messages(messages=new_messages)
if tokens > token_limit:
# remove message from index 1
new_messages = new_messages[:append_after_index] + \
new_messages[append_after_index+1:]
break
return new_messages
def is_admin(client, msg):
member = client.get_user_by_id(msg['sender_id'])
return member.get("user", {}).get("is_admin")
def upsert_context(context_name, context_value):
context_exists = cur.execute(
"SELECT * FROM contexts WHERE name = ?", (context_name,)).fetchone()
if context_exists:
cur.execute("UPDATE contexts SET value = ? WHERE name = ?",
(context_value, context_name))
else:
cur.execute("INSERT INTO contexts (name, value) VALUES (?, ?)",
(context_name, context_value))
conn.commit()
refetch_contexts()
def delete_context(context_name):
cur.execute("DELETE FROM contexts WHERE name = ?", (context_name,))
conn.commit()
refetch_contexts()
def refetch_contexts():
global contexts
contexts = cur.execute("SELECT * FROM contexts").fetchall()
def process_set_subcommands(client, msg, messages, subcommands, content):
content_chunks = content.strip().split()
command = content_chunks[0].lower()
if command == "context":
if PERMISSIONS_SET_CONTEXT == "admin" and not is_admin(client, msg):
send_reply("Sorry, only admins can un/set contexts", msg)
return
context_name = content_chunks[1].lower()
disabled_contexts = ["topic", "stream", "new", "help",
"contexts", "gpt3", "gpt4", "set", "unset", "me", "admin", "stats"]
if context_name in disabled_contexts:
send_reply(f"Sorry, you can't set context for {context_name}", msg)
return
context_value = " ".join(content_chunks[2:])
upsert_context(context_name, context_value)
send_reply(f"I have set !{context_name} to: {context_value}", msg)
def process_unset_subcommands(client, msg, messages, subcommands, content):
content_chunks = content.strip().split()
command = content_chunks[0].lower()
if command == "context":
if PERMISSIONS_SET_CONTEXT == "admin" and not is_admin(client, msg):
send_reply("Sorry, only admins can un/set contexts", msg)
return
context_name = content_chunks[1].lower()
delete_context(context_name)
send_reply(f"I have unset !{context_name}", msg)
def handle_message(event):
global contexts
logging.debug("Handling event type: {type}".format(type=event['type']))
if event['type'] != 'message':
return
msg = event['message']
content = msg['content'].strip()
if msg['sender_email'] == client.email:
logging.debug("Ignoring message sent by myself")
return
if msg['type'] != 'private' and not re.search("@\*\*{bot}\*\*".format(bot=BOT_NAME), content) and not re.search("@{bot}".format(bot=BOT_NAME), content):
logging.debug(
"Ignoring message not mentioning the bot or sent in private")
return
# get subcommands (words starting with exclamation mark)
subcommands = get_subcommands(content)
current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
logging.info("%s Prompt from %s with subcommands: %s is: %s", str(current_time), str(msg['sender_email']), ", ".join(subcommands), content)
# first get rid of the command or mention trigger
content = re.sub("@\*\*{bot}\*\*".format(bot=BOT_NAME), "", content)
content = re.sub("@{bot}".format(bot=BOT_NAME), "", content)
content = content.strip()
content = remove_subcommands(content, subcommands)
if subcommands and "help" in subcommands:
print_help(msg)
return
model_tokens = {
# input limit for GPT-3.5 Turbo (context 4k, prompt 2.5k, response 1.5k)
'gpt-3.5-turbo': 2500,
'gpt-3.5-turbo-0301': 2500,
# input limit for GPT-4 (context 8k, prompt 6k, response 2k)
'gpt-4': 6000,
'gpt-4-0314': 6000,
'gpt-4-0613': 6000,
}
model = DEFAULT_MODEL_NAME or 'gpt-3.5-turbo'
# available_models = ['gpt-3.5-turbo', 'gpt-3.5-turbo-0301', 'gpt4']
# TODO get default model from settings or check !settings
if "gpt3" in subcommands:
model = 'gpt-3.5-turbo'
elif "gpt4" in subcommands:
model = 'gpt-4'
token_limit = model_tokens[model]
messages = [
{"role": "system", "content": os.environ['BOT_ROLE']},
{"role": "user", "content": f"{content}"},
]
context_names = [context[0] for context in contexts]
context_map = {context[0]: context[1] for context in contexts}
if "contexts" in subcommands:
help_message = "Available contexts:\n"
for context_name, context_value in contexts:
help_message += f"- `!{context_name}`: {context_value}\n"
send_reply(help_message, msg)
return
if "me" in subcommands:
send_reply("This functionality is not implemented yet.", msg)
return
if "set" in subcommands:
process_set_subcommands(client, msg, messages, subcommands, content)
return
if "unset" in subcommands:
process_unset_subcommands(client, msg, messages, subcommands, content)
return
# new messages items will be appended after this index
# as we add custom role: system messages here
# and then add history messages later too between system and latest user message
append_after_index = 1
# iterate context_names and check if any of them is in subcommands
for context_name in context_names:
if context_name in subcommands:
context_value = context_map[context_name]
messages.insert(append_after_index, {
"role": "system", "content": f"{context_value}"})
append_after_index += 1
if not subcommands or "new" not in subcommands:
messages = with_previous_messages(
client, msg, messages, subcommands, token_limit, append_after_index)
response = get_gpt_response(messages, model=model)
send_reply(response, msg)
def main():
global contexts
logging.info("Initiate DB...")
cur.execute("CREATE TABLE IF NOT EXISTS contexts(name PRIMARY KEY, value)")
refetch_contexts()
logging.info("Contexts")
logging.info(contexts)
result = client.get_profile()
logging.debug(result)
if (result.get('code') == 'UNAUTHORIZED'):
logging.error("Invalid API key")
sys.exit(1)
logging.info("Starting the GPT Zulip bot named: {bot}".format(bot=BOT_NAME))
client.call_on_each_event(handle_message, event_types=['message'])
if __name__ == "__main__":
main()