mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Perf] Support tensor transmission between work and engine with zero-copy to improve efficiency (#4839)
* feat(zmq): support tensor transmission with zero-copy for improved efficiency * perf: zmq.send disable copy * zmq recv data for debug * convert logprobs tensor to cpu
This commit is contained in:
@@ -334,7 +334,9 @@ class Sampler(nn.Layer):
|
||||
else:
|
||||
indices = token_ids
|
||||
top_logprobs = token_logprobs
|
||||
|
||||
indices = indices.cpu()
|
||||
top_logprobs = top_logprobs.cpu()
|
||||
token_ranks = token_ranks.cpu()
|
||||
return LogprobsTensors(indices, top_logprobs, token_ranks)
|
||||
|
||||
def forward_cuda(
|
||||
|
||||
Reference in New Issue
Block a user