Skip to content

Commit

Permalink
change scheduler
Browse files Browse the repository at this point in the history
  • Loading branch information
shihaobai authored Feb 11, 2025
1 parent 5640f9e commit 7b22df4
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 18 deletions.
37 changes: 31 additions & 6 deletions lightllm/server/core/objs/req.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,13 +290,38 @@ def post_init(
return


class ChunkedPrefillReq(NormalReq):
class SplitFuseReq(Req):
_pack_ = 4

def get_tuple_tokens(self, is_busy, router_max_new_token_len):
has_out_len = self.shm_cur_output_len
if self.sample_params.ignore_eos:
cur_max_new_token_len = self.sample_params.max_new_tokens
elif is_busy:
cur_max_new_token_len = self.sample_params.max_new_tokens
else:
cur_max_new_token_len = min(
self.sample_params.max_new_tokens, max(int(1.1 * has_out_len), router_max_new_token_len)
)

a_len = max(self.input_len + has_out_len + 1, self.shm_cur_kv_len + 1)
b_len = (
(self.input_len + has_out_len - self.shm_cur_kv_len + self.splitfuse_block_size - 1)
// self.splitfuse_block_size
+ cur_max_new_token_len
- has_out_len
- 1
)
b_len = max(0, b_len) + ADDED_OUTPUT_LEN

return (a_len, b_len)

def get_decode_need_tokens(self):
"""
splitfuse 调度模式的实现
"""
return min(self.input_len + self.shm_cur_output_len - self.shm_cur_kv_len, self.splitfuse_block_size)

def get_first_router_need_tokens(self):
need_tokens = min(self.chunked_prefill_size, self.remaining_prefill_size)
return need_tokens

def update_remaining_prefill_size(self):
self.remaining_prefill_size = max(0, self.remaining_prefill_size - self.chunked_prefill_size)
return
return min(self.input_len + self.shm_cur_output_len, self.splitfuse_block_size)
12 changes: 0 additions & 12 deletions lightllm/server/router/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,6 @@ def filter_out_finished_req(self, shm_req_manager: ShmReqManager):
self.id_to_reqs = {req.request_id: req for req in self.reqs}
return

def filter_out_chunked_req(self):
chunked_reqs = []
nochunked_reqs = [] # not chunked req or chunked req which completed prefill
for req in self.reqs:
if req.remaining_prefill_size > 0:
chunked_reqs.append(req)
else:
nochunked_reqs.append(req)
self.reqs = nochunked_reqs
self.id_to_reqs = {req.request_id: req for req in self.reqs}
return chunked_reqs

def pop_req(self, req_id):
self.reqs = [req for req in self.reqs if req.request_id != req_id]
self.id_to_reqs.pop(req_id)
Expand Down

0 comments on commit 7b22df4

Please sign in to comment.