From f7e832efaf450fc3d063e9594a70482e39ed1824 Mon Sep 17 00:00:00 2001 From: kevin Date: Tue, 9 Dec 2025 11:51:00 +0800 Subject: [PATCH] [BugFix] fix mm cudagraph (#5266) * fix mm cudagraph * fix test_prompt_ids bug * update code * update ci code * update ci code * update ci code --- .github/workflows/_base_test.yml | 2 +- fastdeploy/config.py | 3 --- tests/ce/deploy/deploy.py | 2 +- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/_base_test.yml b/.github/workflows/_base_test.yml index 60e650e91..d5dffb02d 100644 --- a/.github/workflows/_base_test.yml +++ b/.github/workflows/_base_test.yml @@ -209,7 +209,7 @@ jobs: export TEMPLATE=TOKEN_NORMAL curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \ -H "Content-Type: application/json" \ - -d "{\"--model\": \"/MODELDATA/ERNIE-4.5-VL-28B-A3B-Thinking\", \"--reasoning-parser\": \"ernie-45-vl-thinking\", \"--tool-call-parser\": \"ernie-45-vl-thinking\", \"--tensor-parallel-size\": 1, \"--quantization\": \"wint4\", \"--max-model-len\": 131072, \"--max-num-seqs\": 32}" + -d "{\"--model\": \"/MODELDATA/ERNIE-4.5-VL-28B-A3B-Thinking\", \"--reasoning-parser\": \"ernie-45-vl-thinking\", \"--tool-call-parser\": \"ernie-45-vl-thinking\", \"--tensor-parallel-size\": 1, \"--quantization\": \"wint4\", \"--max-model-len\": 131072, \"--max-num-seqs\": 32, \"--no-enable-prefix-caching\": true}" check_service 90 python -m pytest -sv test_prompt_ids.py || TEST_EXIT_CODE=1 diff --git a/fastdeploy/config.py b/fastdeploy/config.py index f1eb23852..11867a798 100644 --- a/fastdeploy/config.py +++ b/fastdeploy/config.py @@ -1747,9 +1747,6 @@ class FDConfig: else: # It will hang when real batch_size < tp_size self.graph_opt_config.filter_capture_size(tp_size=self.parallel_config.tensor_parallel_size) - if self.model_config.enable_mm and self.graph_opt_config.use_cudagraph: - self.cache_config.enable_prefix_caching = False - logger.info("Multi-modal models do not support prefix caching when using CUDAGraph!") if self.scheduler_config.splitwise_role == "mixed": self._disable_sequence_parallel_moe_if_needed("Mixed") diff --git a/tests/ce/deploy/deploy.py b/tests/ce/deploy/deploy.py index be6a4f0bf..856a7b594 100644 --- a/tests/ce/deploy/deploy.py +++ b/tests/ce/deploy/deploy.py @@ -89,7 +89,7 @@ def build_command(config): # 添加配置参数 for key, value in config.items(): - if "--enable" in key: + if "--enable" in key or "--no-enable" in key: value = bool(value if isinstance(value, bool) else eval(value)) if value: cmd.append(key)