mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-26 12:31:27 +08:00
[XPU] Enable XPU V1 mode based on environment variable (#4213)
* Enable XPU V1 mode based on environment variable * add default param to xft_moe_fc_block_eb for latest xvllm compatibility; update run_ci_xpu to use latest xvllm
This commit is contained in:
@@ -72,6 +72,7 @@ void MoeExpertFFNImpl(xftblock::Tensor* ffn_in,
|
||||
is_padding_input ? token_num_info : nullptr,
|
||||
expert_num,
|
||||
1, // moe_topk
|
||||
0, // group_size
|
||||
ffn1_out_shape.size() == 2 ? xftblock::MoeFCInputMode::DENSE
|
||||
: xftblock::MoeFCInputMode::SPARSE);
|
||||
PD_CHECK(ret == 0);
|
||||
@@ -134,6 +135,7 @@ void MoeExpertFFNImpl(xftblock::Tensor* ffn_in,
|
||||
is_padding_input ? token_num_info : nullptr,
|
||||
expert_num,
|
||||
1, // moe_topk
|
||||
0, // group_size
|
||||
ffn1_out_shape.size() == 2
|
||||
? xftblock::MoeFCInputMode::DENSE
|
||||
: xftblock::MoeFCInputMode::SPARSE); // bias_mode
|
||||
|
@@ -424,7 +424,7 @@ class EngineArgs:
|
||||
envs.ENABLE_V1_KVCACHE_SCHEDULER = 0
|
||||
if self.splitwise_role != "mixed" and self.cache_transfer_protocol != "rdma":
|
||||
envs.ENABLE_V1_KVCACHE_SCHEDULER = 0
|
||||
if not current_platform.is_cuda():
|
||||
if not current_platform.is_cuda() and not current_platform.is_xpu():
|
||||
envs.ENABLE_V1_KVCACHE_SCHEDULER = 0
|
||||
if self.guided_decoding_backend != "off":
|
||||
envs.ENABLE_V1_KVCACHE_SCHEDULER = 0
|
||||
|
@@ -778,7 +778,7 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig:
|
||||
envs.ENABLE_V1_KVCACHE_SCHEDULER = 0
|
||||
if args.splitwise_role != "mixed" and args.cache_transfer_protocol != "rdma":
|
||||
envs.ENABLE_V1_KVCACHE_SCHEDULER = 0
|
||||
if not current_platform.is_cuda():
|
||||
if not current_platform.is_cuda() and not current_platform.is_xpu():
|
||||
logger.info("Set ENABLE_V1_KVCACHE_SCHEDULER to 0 due to not supported.")
|
||||
envs.ENABLE_V1_KVCACHE_SCHEDULER = 0
|
||||
if parallel_config.guided_decoding_backend != "off":
|
||||
|
@@ -24,10 +24,7 @@ python -m pip install paddlepaddle-xpu -i https://www.paddlepaddle.org.cn/packag
|
||||
echo "build whl"
|
||||
bash custom_ops/xpu_ops/download_dependencies.sh develop
|
||||
export CLANG_PATH=$(pwd)/custom_ops/xpu_ops/third_party/xtdk
|
||||
# export XVLLM_PATH=$(pwd)/custom_ops/xpu_ops/third_party/xvllm
|
||||
# 由于xvllm更新导致编译报错暂时锁定xvllm版本
|
||||
wget https://klx-sdk-release-public.su.bcebos.com/xinfer/daily/eb/20250921/output.tar.gz --no-proxy && tar xf output.tar.gz && mv output xvllm
|
||||
export XVLLM_PATH=${PWD}/xvllm
|
||||
export XVLLM_PATH=$(pwd)/custom_ops/xpu_ops/third_party/xvllm
|
||||
bash build.sh || exit 1
|
||||
|
||||
echo "pip others"
|
||||
|
Reference in New Issue
Block a user