diff --git a/tests/xpu_ci/test_ep4tp1_online.py b/tests/xpu_ci/test_ep4tp1_online.py index 8acb5da3a..c248b24e2 100644 --- a/tests/xpu_ci/test_ep4tp1_online.py +++ b/tests/xpu_ci/test_ep4tp1_online.py @@ -63,6 +63,7 @@ def test_ep4tp1_online(xpu_env): "--tensor-parallel-size", "1", "--enable-expert-parallel", + "--enable-prefix-caching", "--data-parallel-size", "4", "--max-model-len", diff --git a/tests/xpu_ci/test_ep4tp4_all2all.py b/tests/xpu_ci/test_ep4tp4_all2all.py index e2fbbf227..8c5a81e0b 100644 --- a/tests/xpu_ci/test_ep4tp4_all2all.py +++ b/tests/xpu_ci/test_ep4tp4_all2all.py @@ -65,6 +65,7 @@ def test_ep4tp4_all2all(xpu_env): "--tensor-parallel-size", "4", "--enable-expert-parallel", + "--enable-prefix-caching", "--data-parallel-size", "1", "--max-model-len", diff --git a/tests/xpu_ci/test_ep4tp4_online.py b/tests/xpu_ci/test_ep4tp4_online.py index 3850b4efb..6f64016be 100644 --- a/tests/xpu_ci/test_ep4tp4_online.py +++ b/tests/xpu_ci/test_ep4tp4_online.py @@ -64,6 +64,7 @@ def test_ep4tp4_online(xpu_env): "--tensor-parallel-size", "4", "--enable-expert-parallel", + "--enable-prefix-caching", "--data-parallel-size", "1", "--max-model-len",