mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 00:33:03 +08:00
[Feature 2.0.2] support top_k_top_p sampling (#2789)
* support top_k_top_p sampling * fix * add api param * add api para * fix * fix * fix * fix * fix * fix * fix * fix * change func name
This commit is contained in:
@@ -13,10 +13,10 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import argparse
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
@@ -24,6 +24,9 @@ import paddle.distributed.fleet as fleet
|
||||
from paddleformers.transformers.model_utils import load_tp_checkpoint
|
||||
from safetensors import safe_open
|
||||
|
||||
from fastdeploy.config import (DeviceConfig, FDConfig, KVCacheConfig,
|
||||
LoadConfig, ModelConfig, MoEConfig, MoEPhase,
|
||||
ParallelConfig, SpeculativeConfig)
|
||||
from fastdeploy.input.ernie_tokenizer import ErnieBotTokenizer
|
||||
from fastdeploy.input.mm_processor import DataProcessor
|
||||
from fastdeploy.model_executor.layers.attention import get_attention_backend
|
||||
@@ -44,9 +47,6 @@ from fastdeploy.platforms import current_platform
|
||||
from fastdeploy.worker.forward_meta import ForwardMeta
|
||||
from fastdeploy.worker.utils import check_safetensors_model
|
||||
from fastdeploy.worker.vl_model_runner_base import VLModelRunnerBase
|
||||
from fastdeploy.config import (DeviceConfig, FDConfig, KVCacheConfig,
|
||||
LoadConfig, ModelConfig, MoEConfig,
|
||||
MoEPhase, ParallelConfig, SpeculativeConfig)
|
||||
|
||||
if current_platform.is_cuda() and current_platform.available():
|
||||
from fastdeploy.model_executor.layers.utils import (
|
||||
|
Reference in New Issue
Block a user