Skip to content

Commit

Permalink
feat(llm):
Browse files Browse the repository at this point in the history
- improve prompt
- add rejection
  • Loading branch information
MorvanZhou committed Jul 16, 2024
1 parent 78a1391 commit d679a90
Show file tree
Hide file tree
Showing 16 changed files with 370 additions and 134 deletions.
3 changes: 3 additions & 0 deletions src/retk/const/response_codes.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class CodeEnum(IntEnum):
LLM_TIMEOUT = 38
LLM_SERVICE_ERROR = 39
LLM_NO_CHOICE = 40
LLM_INVALID_RESPONSE_FORMAT = 41


@dataclass
Expand Down Expand Up @@ -108,6 +109,7 @@ class CodeMessage:
CodeEnum.LLM_TIMEOUT: CodeMessage(zh="模型超时", en="Model timeout"),
CodeEnum.LLM_SERVICE_ERROR: CodeMessage(zh="模型服务错误", en="Model service error"),
CodeEnum.LLM_NO_CHOICE: CodeMessage(zh="无回复", en="No response"),
CodeEnum.LLM_INVALID_RESPONSE_FORMAT: CodeMessage(zh="无效的回复格式", en="Invalid response format"),
}

CODE2STATUS_CODE: Dict[CodeEnum, int] = {
Expand Down Expand Up @@ -152,6 +154,7 @@ class CodeMessage:
CodeEnum.LLM_TIMEOUT: 408,
CodeEnum.LLM_SERVICE_ERROR: 500,
CodeEnum.LLM_NO_CHOICE: 404,
CodeEnum.LLM_INVALID_RESPONSE_FORMAT: 500,
}


Expand Down
13 changes: 13 additions & 0 deletions src/retk/controllers/ai/knowledge.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,16 @@ async def accept_extended_node(
requestId=au.request_id,
node=get_node_data(n),
)


async def reject_extended_node(
au: AuthedUser,
eid: str,
) -> schemas.RequestIdResponse:
await core.ai.llm.knowledge.extended.reject_extended_node(
au=au,
eid=eid,
)
return schemas.RequestIdResponse(
requestId=au.request_id,
)
53 changes: 1 addition & 52 deletions src/retk/core/ai/llm/knowledge/__init__.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,3 @@
from pathlib import Path
from typing import Tuple

from retk import const
from . import extended
from .extending import extend_on_node_update, extend_on_node_post, LLM_SERVICES
from ..api.base import BaseLLMService, MessagesType

system_summary_prompt = (Path(__file__).parent / "system_summary.md").read_text(encoding="utf-8")
system_extend_prompt = (Path(__file__).parent / "system_extend.md").read_text(encoding="utf-8")


async def _send(
llm_service: BaseLLMService,
model: str,
system_prompt: str,
md: str,
req_id: str,
) -> Tuple[str, const.CodeEnum]:
_msgs: MessagesType = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": md},
]
return await llm_service.complete(messages=_msgs, model=model, req_id=req_id)


async def summary(
llm_service: BaseLLMService,
model: str,
md: str,
req_id: str = None,
) -> Tuple[str, const.CodeEnum]:
return await _send(
llm_service=llm_service,
model=model,
system_prompt=system_summary_prompt,
md=md,
req_id=req_id,
)


async def extend(
llm_service: BaseLLMService,
model: str,
md: str,
req_id: str = None,
) -> Tuple[str, const.CodeEnum]:
return await _send(
llm_service=llm_service,
model=model,
system_prompt=system_extend_prompt,
md=md,
req_id=req_id,
)
from .ops import summary, extend
26 changes: 19 additions & 7 deletions src/retk/core/ai/llm/knowledge/extended.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Tuple
from typing import List, Tuple, Optional

from bson import ObjectId

Expand All @@ -22,18 +22,21 @@ async def get_extended_nodes(
async def accept_extended_node(
au: AuthedUser,
eid: str,
) -> Tuple[Node, CodeEnum]:
) -> Tuple[Optional[Node], CodeEnum]:
if not is_local_db():
doc = await client.coll.llm_extended_node.find_one_and_delete(
{"_id": ObjectId(eid)},
{"_id": ObjectId(eid), "uid": au.u.id},
)
else:
doc = await client.coll.llm_extended_node.find_one(
{"_id": ObjectId(eid)},
)
await client.coll.llm_extended_node.delete_one(
{"_id": ObjectId(eid)},
{"_id": ObjectId(eid), "uid": au.u.id},
)
if doc is not None:
await client.coll.llm_extended_node.delete_one(
{"_id": ObjectId(eid), "uid": au.u.id},
)
if doc is None:
return None, CodeEnum.NODE_NOT_EXIST
title = doc["sourceMd"].split("\n", 1)[0].strip()
at_node = get_at_node_md_link(title, doc["sourceNid"])
md = doc["extendMd"] + "\n\n" + at_node
Expand All @@ -43,3 +46,12 @@ async def accept_extended_node(
from_nid=doc["sourceNid"],
)
return n, code


async def reject_extended_node(
au: AuthedUser,
eid: str,
):
await client.coll.llm_extended_node.delete_one(
{"_id": ObjectId(eid), "uid": au.u.id},
)
8 changes: 4 additions & 4 deletions src/retk/core/ai/llm/knowledge/extending.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ async def extend_on_node_post(data: Node):
uid=data["uid"],
nid=data["id"],
modifiedAt=int(data["modifiedAt"].replace(tzinfo=utc).timestamp()),
summaryService="tencent",
summaryModel=api.TencentModelEnum.HUNYUAN_LITE.value,
extendService="tencent",
extendModel=api.TencentModelEnum.HUNYUAN_LITE.value,
summaryService="baidu",
summaryModel=api.BaiduModelEnum.ERNIE_SPEED_8K.value,
extendService="moonshot",
extendModel=api.MoonshotModelEnum.V1_8K.value,
)

# sort by _id desc
Expand Down
62 changes: 62 additions & 0 deletions src/retk/core/ai/llm/knowledge/ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from pathlib import Path
from typing import Tuple

from retk import const
from .utils import parse_json_pattern, remove_links
from ..api.base import BaseLLMService, MessagesType

system_summary_prompt = (Path(__file__).parent / "system_summary.md").read_text(encoding="utf-8")
system_extend_prompt = (Path(__file__).parent / "system_extend.md").read_text(encoding="utf-8")


async def _send(
llm_service: BaseLLMService,
model: str,
system_prompt: str,
md: str,
req_id: str,
) -> Tuple[str, const.CodeEnum]:
_msgs: MessagesType = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": md},
]
return await llm_service.complete(messages=_msgs, model=model, req_id=req_id)


async def summary(
llm_service: BaseLLMService,
model: str,
md: str,
req_id: str = None,
) -> Tuple[str, const.CodeEnum]:
md_ = remove_links(md)
return await _send(
llm_service=llm_service,
model=model,
system_prompt=system_summary_prompt,
md=md_,
req_id=req_id,
)


async def extend(
llm_service: BaseLLMService,
model: str,
md: str,
req_id: str = None,
) -> Tuple[str, const.CodeEnum]:
msg, code = await _send(
llm_service=llm_service,
model=model,
system_prompt=system_extend_prompt,
md=md,
req_id=req_id,
)
if code != const.CodeEnum.OK:
return msg, code

try:
title, content = parse_json_pattern(msg)
except ValueError as e:
return str(e), const.CodeEnum.LLM_INVALID_RESPONSE_FORMAT
return f"{title}\n\n{content}", const.CodeEnum.OK
49 changes: 38 additions & 11 deletions src/retk/core/ai/llm/knowledge/system_extend.md
Original file line number Diff line number Diff line change
@@ -1,25 +1,52 @@
你是一个博学多才的人,拥有非常丰富的知识,而且会融会贯通不同知识,
也会对相似知识之间的关系做通熟易懂的类比
也会对相似知识之间的关系做通熟易懂的类比并延展到不同领域

接下来,我将展示我最近接触到的一些知识点
请依据你的内在丰富的知识网络,帮我推荐出一条我会感兴趣的 **新知识**
接下来,我将展示我最近接触到的一条信息
请依据你的内在丰富的知识网络,帮我推荐出一条我会感兴趣的 **新知识**,并以 json 格式返回结果

下面是一个推荐新知识的案例
案例 1

# 我展示的知识点
# 我展示的信息

"""
标题:幼儿因果关系认知的局限性

知识点
关键点

1. **特点**:2-4岁的儿童由于脑部发育阶段的特性,无法推理长线的因果关系。
2. **原因**:这种现象的一个原因是前额叶的发展不足,无法模拟和推理未来发生的事情。
3. **长短期反馈**:他们无法理解一段时间后的结果,例如不吃饭会导致晚上肚子饿。尽管如此,他们可以理解短期反馈,例如挥手打人或者给脸色会有直接的结果。
"""

# 你需要返回的新知识
# 你需要返回的结果

儿童发展中的同理心培养
{
"title": "儿童发展中的同理心培养",
"content": "- 富有同理心的小孩能理解和感受他人情感,有助于儿童建立良好的人际关系和社交技巧。\n-
儿童的同理心发展分为不同阶段,从2岁开始,他们能够感知到他人的情感,而4-5岁时,他们开始能够理解他人的观点和需求。\n-
家长和教育者可以通过共情、角色扮演、讲述故事、以及引导儿童关注他人的感受等方法,帮助儿童培养同理心。"
}

- 富有同理心的小孩能理解和感受他人情感,有助于儿童建立良好的人际关系和社交技巧。
- 儿童的同理心发展分为不同阶段,从2岁开始,他们能够感知到他人的情感,而4-5岁时,他们开始能够理解他人的观点和需求。
- 家长和教育者可以通过共情、角色扮演、讲述故事、以及引导儿童关注他人的感受等方法,帮助儿童培养同理心。
案例 2:

# 我展示的信息

"""
标题:水的分子结构及其在生命中的重要性

关键点:

1. 水是生命的基础
2. 水分子结构:H2O,氢氧共价键
3. 水的偏电性:氢正电荷,氧负电荷
4. 水作为良好溶剂:吸附其他分子,如盐
5. 生命过程中水的作用:输送养分和排除废物
"""

# 你需要返回的结果

{
"title": "水的凝聚力和表面张力现象",
"content": "
凝聚力使水分子紧密相连,表面张力导致水成球状以减小表面积。这些现象在植物水分运输、清洁剂使用和雨伞设计等方面具有重要作用。通过探讨这些现象,可以更深入地理解水的特性及其在自然和生活中的应用。"
}
41 changes: 34 additions & 7 deletions src/retk/core/ai/llm/knowledge/system_summary.md
Original file line number Diff line number Diff line change
@@ -1,23 +1,50 @@
你是一个博学多才的人,拥有非常丰富的知识,十分善于用简练的语言总结复杂的概念。

接下来,我将展示我最近接触到的一些知识和信息,请帮我提炼总结这段认知的关键信息,总结一个简短标题,并简短罗列出知识点来。
接下来,我将展示我最近接触到的一些知识或信息,请帮我提炼总结这段认知的关键信息,总结一个简短标题,并简短罗列出知识点来。

比如下面的这个例子
案例 1

# 我展示的信息
# 我展示的信息

"""
小孩建立长线的因果关系

因为脑部发育阶段的特性,2-4岁的儿童没办法推理比较长线的因果关系,比如不吃饭,晚上会肚子饿。其中的一个原因是前额叶的发展不够,没办法模拟和推理未来发生的事情,也就没办法思考一段时间后的结果。

但是短期反馈还是有的,比如挥手要打人或者给脸色的时候能有直接的映射结果,这点他们理解
"""

# 你需要返回的总结格式
# 你需要返回的总结格式

"""
标题:幼儿因果关系认知的局限性

知识点
关键点

1. **特点**:2-4岁的儿童由于脑部发育阶段的特性,无法推理长线的因果关系。
2. **原因**:这种现象的一个原因是前额叶的发展不足,无法模拟和推理未来发生的事情。
3. **长短期反馈**:他们无法理解一段时间后的结果,例如不吃饭会导致晚上肚子饿。尽管如此,他们可以理解短期反馈,例如挥手打人或者给脸色会有直接的结果。
"""

案例 2:

# 我展示的信息:

"""
水是原子层面生命的基础
水 H2O ,氢原子分享了一个电子给氧,达成了稳固态。这种结合让氢这边带一点正电荷,氧这边带点负电荷
它可以用这种特性吸附是他分子,成为很好的容器
可以想象成水把其它分子拆散的过程,比如盐溶于水。这让水可以很容易将生命需要的各种养料和废物输入输出。所以是生命形成的关键因素。
"""

# 你需要返回的总结格式:

"""
标题:水的分子结构及其在生命中的重要性

关键点:

1. 水是生命的基础
2. 水分子结构:H2O,氢氧共价键
3. 水的偏电性:氢正电荷,氧负电荷
4. 水作为良好溶剂:吸附其他分子,如盐
5. 生命过程中水的作用:输送养分和排除废物
"""
22 changes: 22 additions & 0 deletions src/retk/core/ai/llm/knowledge/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import json
import re
from typing import Tuple

JSON_PTN = re.compile(r"^({\s*?\"title\":\s?\".+?\",\s*?\"content\":\s?\".+?\"\s*?})", re.DOTALL | re.MULTILINE)
IMG_PTN = re.compile(r"!\[.*?\]\(.+?\)")
LINK_PTN = re.compile(r"\[(.*?)]\(.+?\)")


def parse_json_pattern(text: str) -> Tuple[str, str]:
m = JSON_PTN.search(text)
if m:
json_str = m.group(1)
d = json.loads(json_str)
return d["title"], d["content"]
raise ValueError(f"Invalid JSON pattern: {text}")


def remove_links(text: str) -> str:
t_ = IMG_PTN.sub("", text)
t_ = LINK_PTN.sub(r"\1", t_)
return t_
2 changes: 1 addition & 1 deletion src/retk/core/scheduler/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def init_tasks():
run_every_at(
job_id="deliver_unscheduled_node_extend",
func=tasks.extend_node.deliver_unscheduled_extend_nodes,
minute=0,
second=0,
)
return

Expand Down
4 changes: 2 additions & 2 deletions src/retk/core/scheduler/tasks/extend_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ async def async_deliver_unscheduled_extend_nodes() -> str:
if code != const.CodeEnum.OK:
logger.error(f"knowledge summary error: {code}")
continue
oneline_s = _summary.replace('\n', '\n\n')
oneline_s = _summary.replace('\n', '\\n')
logger.debug(f"summary: {oneline_s}")
e0 = time.perf_counter()
_extended, code = await knowledge.extend(
Expand All @@ -60,7 +60,7 @@ async def async_deliver_unscheduled_extend_nodes() -> str:
if code != const.CodeEnum.OK:
logger.error(f"knowledge extend error: {code}")
continue
oneline_e = _extended.replace('\n', '\n\n')
oneline_e = _extended.replace('\n', '\\n')
logger.debug(f"extended: {oneline_e}")
ext = ExtendedNode(
uid=item["uid"],
Expand Down
Loading

0 comments on commit d679a90

Please sign in to comment.