mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Feature] Support return logprob of generated tokens (#2784)
* online chat support logprobs * check xpu * check vl_gpu_model_runner * only cuda support logprob * get_worker() check platform --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -720,7 +720,7 @@ class XPUModelRunner(ModelRunnerBase):
|
||||
# 4. Compute logits, Sample
|
||||
logits = self.model.compute_logits(hiddden_states)
|
||||
|
||||
sampled_token_ids = self.sampler(logits, self.sampling_metadata)
|
||||
sampler_output = self.sampler(logits, self.sampling_metadata)
|
||||
|
||||
# 5. Speculative decode
|
||||
|
||||
@@ -749,7 +749,7 @@ class XPUModelRunner(ModelRunnerBase):
|
||||
accept_tokens=None,
|
||||
accept_num=None,
|
||||
)
|
||||
xpu_post_process(sampled_token_ids=sampled_token_ids,
|
||||
xpu_post_process(sampled_token_ids=sampler_output.sampled_token_ids,
|
||||
model_output=model_output_data)
|
||||
|
||||
# 7. Updata 'infer_seed' and step_paddle()
|
||||
|
Reference in New Issue
Block a user