mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Perf] Support tensor transmission between work and engine with zero-copy to improve efficiency (#4839)
* feat(zmq): support tensor transmission with zero-copy for improved efficiency * perf: zmq.send disable copy * zmq recv data for debug * convert logprobs tensor to cpu
This commit is contained in:
@@ -2735,7 +2735,7 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
|
||||
logprobs_tensors = self.in_progress_prompt_logprobs.get(req_id)
|
||||
if not logprobs_tensors:
|
||||
logprobs_tensors = LogprobsTensors.empty(num_prompt_tokens - 1, num_prompt_logprobs + 1)
|
||||
logprobs_tensors = LogprobsTensors.empty_cpu(num_prompt_tokens - 1, num_prompt_logprobs + 1)
|
||||
self.in_progress_prompt_logprobs[req_id] = logprobs_tensors
|
||||
start_idx = request.prefill_start_index
|
||||
start_tok = start_idx + 1
|
||||
|
||||
Reference in New Issue
Block a user