mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 08:16:42 +08:00
@@ -431,8 +431,7 @@ class XPUModelRunner(ModelRunnerBase):
|
||||
self.share_inputs["is_block_step"][idx : idx + 1] = False
|
||||
continue
|
||||
|
||||
if len(request.eos_token_ids) < self.parallel_config.eos_tokens_lens:
|
||||
request.eos_token_ids.append(request.eos_token_ids[0])
|
||||
assert len(request.eos_token_ids) == self.model_config.eos_tokens_lens
|
||||
self.share_inputs["eos_token_id"][:] = np.array(request.eos_token_ids, dtype="int64").reshape(-1, 1)
|
||||
|
||||
self.share_inputs["top_p"][idx : idx + 1] = request.get("top_p", 0.7)
|
||||
@@ -472,8 +471,7 @@ class XPUModelRunner(ModelRunnerBase):
|
||||
idx = request.idx
|
||||
length = request.prompt_token_ids_len
|
||||
self.share_inputs["input_ids"][idx : idx + 1, :length] = np.array(request.prompt_token_ids)
|
||||
if len(request.eos_token_ids) < self.parallel_config.eos_tokens_lens:
|
||||
request.eos_token_ids.append(request.eos_token_ids[0])
|
||||
assert len(request.eos_token_ids) == self.model_config.eos_tokens_lens
|
||||
self.share_inputs["eos_token_id"][:] = np.array(request.eos_token_ids, dtype="int64").reshape(-1, 1)
|
||||
self.share_inputs["pre_ids"][idx : idx + 1] = -1
|
||||
self.share_inputs["top_p"][idx : idx + 1] = request.get("top_p", 0.7)
|
||||
@@ -543,10 +541,10 @@ class XPUModelRunner(ModelRunnerBase):
|
||||
)
|
||||
self.share_inputs["input_ids"] = paddle.full(
|
||||
[max_num_seqs, self.parallel_config.max_model_len],
|
||||
self.parallel_config.pad_token_id,
|
||||
self.model_config.pad_token_id,
|
||||
dtype="int64",
|
||||
)
|
||||
self.share_inputs["eos_token_id"] = paddle.full([self.parallel_config.eos_tokens_lens, 1], 0, dtype="int64")
|
||||
self.share_inputs["eos_token_id"] = paddle.full([self.model_config.eos_tokens_lens, 1], 0, dtype="int64")
|
||||
self.share_inputs["top_p"] = paddle.full([max_num_seqs, 1], self.model_config.top_p, dtype="float32")
|
||||
self.share_inputs["top_k"] = paddle.full([max_num_seqs, 1], 0, dtype="int64")
|
||||
self.share_inputs["min_p"] = paddle.full([max_num_seqs, 1], 0.0, dtype="float32")
|
||||
@@ -815,7 +813,9 @@ class XPUModelRunner(ModelRunnerBase):
|
||||
for i in range(batch_size):
|
||||
idx = i
|
||||
self.share_inputs["input_ids"][idx : idx + 1, :input_length] = np.array([5] * input_length)
|
||||
self.share_inputs["eos_token_id"][:] = np.array([2], dtype="int64").reshape(-1, 1)
|
||||
self.share_inputs["eos_token_id"][:] = np.array(
|
||||
[2] * self.model_config.eos_tokens_lens, dtype="int64"
|
||||
).reshape(-1, 1)
|
||||
self.seq_lens_this_time_buffer[idx : idx + 1] = input_length
|
||||
self.share_inputs["step_seq_lens_encoder"][idx : idx + 1] = input_length
|
||||
self.share_inputs["seq_lens_encoder"][idx : idx + 1] = input_length
|
||||
|
Reference in New Issue
Block a user