mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Feature] [PD] add simple router and refine splitwise deployment (#4709)
* add simple router and refine splitwise deployment * fix
This commit is contained in:
@@ -475,8 +475,10 @@ class PaddleDisWorkerProc:
|
||||
|
||||
# Execute model to generate token. The generated token will be written to the buffer.
|
||||
# These generated tokens can be obtained through get_output op.
|
||||
start_execute_time = time.time()
|
||||
self.worker.execute_model(req_dicts, num_running_requests)
|
||||
self.exist_prefill_task_signal.value[0] = self.worker.exist_prefill()
|
||||
logger.debug(f"execute model cost: {time.time()-start_execute_time:.5f} s")
|
||||
|
||||
def initialize_kv_cache(self) -> None:
|
||||
"""Profiles the peak memory usage of the model to determine how many
|
||||
|
||||
Reference in New Issue
Block a user