[Perf] Support tensor transmission between work and engine with zero-copy to improve efficiency (#4839)

* feat(zmq): support tensor transmission with zero-copy for improved efficiency

* perf: zmq.send disable copy

* zmq recv data for debug

* convert logprobs tensor to cpu
This commit is contained in:
SunLei
2025-11-11 15:43:11 +08:00
committed by GitHub
parent 8b61f01c68
commit 3098aee05f
8 changed files with 23 additions and 18 deletions

View File

@@ -135,6 +135,7 @@ class SamplingParams:
reasoning_max_tokens=None,
min_tokens=1,
logprobs=None,
prompt_logprobs=None,
bad_words=None,
guided_decoding=None,
bad_words_token_ids=None,
@@ -158,6 +159,7 @@ class SamplingParams:
reasoning_max_tokens=reasoning_max_tokens,
min_tokens=min_tokens,
logprobs=logprobs,
prompt_logprobs=prompt_logprobs,
bad_words=bad_words,
guided_decoding=guided_decoding,
bad_words_token_ids=bad_words_token_ids,