dummy import fd (#5192)

2025-12-24 13:28:13 +08:00 · 2025-11-24 20:23:07 +08:00
parent 8e4e3ff510
commit f69e0839f7
1 changed files with 95 additions and 65 deletions
--- a/fastdeploy/cache_manager/ops.py
+++ b/fastdeploy/cache_manager/ops.py
@@ -1,77 +1,107 @@
 """
 # Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 import paddle
 from fastdeploy.platforms import current_platform
-if current_platform.is_cuda():
+try:
-    from fastdeploy.model_executor.ops.gpu import (
+    if current_platform.is_cuda():
-        cuda_host_alloc,
+        from fastdeploy.model_executor.ops.gpu import (
-        cuda_host_free,
+            cuda_host_alloc,
-        get_data_ptr_ipc,
+            cuda_host_free,
-        get_output_kv_signal,
+            get_data_ptr_ipc,
-        ipc_sent_key_value_cache_by_remote_ptr,
+            get_output_kv_signal,
-        ipc_sent_key_value_cache_by_remote_ptr_block_sync,
+            ipc_sent_key_value_cache_by_remote_ptr,
-        set_data_ipc,
+            ipc_sent_key_value_cache_by_remote_ptr_block_sync,
-        share_external_data,
+            set_data_ipc,
-        swap_cache_all_layers,
+            share_external_data,
-        unset_data_ipc,
+            swap_cache_all_layers,
-    )
+            unset_data_ipc,
        )
-    memory_allocated = paddle.device.cuda.memory_allocated
+        memory_allocated = paddle.device.cuda.memory_allocated
-    def get_peer_mem_addr(*args, **kwargs):
+        def get_peer_mem_addr(*args, **kwargs):
-        raise RuntimeError("CUDA no need of get_peer_mem_addr!")
+            raise RuntimeError("CUDA no need of get_peer_mem_addr!")
-elif current_platform.is_xpu():
+    elif current_platform.is_xpu():
-    from fastdeploy.model_executor.ops.xpu import (
+        from fastdeploy.model_executor.ops.xpu import (
-        cuda_host_alloc,
+            cuda_host_alloc,
-        cuda_host_free,
+            cuda_host_free,
-        get_output_kv_signal,
+            get_output_kv_signal,
-        get_peer_mem_addr,
+            get_peer_mem_addr,
-        set_data_ipc,
+            set_data_ipc,
-        share_external_data,
+            share_external_data,
-        swap_cache_all_layers,
+            swap_cache_all_layers,
-    )
+        )
        unset_data_ipc = None
        memory_allocated = paddle.device.xpu.memory_allocated
        def get_data_ptr_ipc(*args, **kwargs):
            raise RuntimeError("XPU get_data_ptr_ipc UNIMPLENENTED!")
        def ipc_sent_key_value_cache_by_remote_ptr(*args, **kwargs):
            raise RuntimeError("XPU ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED")
        def ipc_sent_key_value_cache_by_remote_ptr_block_sync(*args, **kwargs):
            raise RuntimeError("XPU No ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED")
    else:
        raise RuntimeError("Prefix cache ops only supported CUDA nor XPU platform ")
    def set_device(device):
        if current_platform.is_cuda():
            paddle.set_device(f"gpu:{device}")
        elif current_platform.is_xpu():
            paddle.set_device(f"xpu:{device}")
        else:
            raise RuntimeError("No supported platform")
    def share_external_data_(cache, cache_name, cache_shape, use_ipc):
        if current_platform.is_cuda():
            cache = share_external_data(cache, cache_name, cache_shape)
        elif current_platform.is_xpu():
            cache = share_external_data(cache, cache_name, cache_shape, use_ipc)
        else:
            raise RuntimeError("No supported platform")
        return cache
    def get_all_visible_devices():
        if current_platform.is_xpu():
            return "XPU_VISIBLE_DEVICES=0,1,2,3,4,5,6,7"
        else:
            return "CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7"
 except:
    cuda_host_alloc = None
    cuda_host_free = None
    set_data_ipc = None
    share_external_data_ = None
    swap_cache_all_layers = None
    unset_data_ipc = None
-    memory_allocated = paddle.device.xpu.memory_allocated
+    set_device = None
-
+    memory_allocated = None
-    def get_data_ptr_ipc(*args, **kwargs):
+    get_output_kv_signal = None
-        raise RuntimeError("XPU get_data_ptr_ipc UNIMPLENENTED!")
+    get_data_ptr_ipc = None
-
+    ipc_sent_key_value_cache_by_remote_ptr = None
-    def ipc_sent_key_value_cache_by_remote_ptr(*args, **kwargs):
+    ipc_sent_key_value_cache_by_remote_ptr_block_sync = None
-        raise RuntimeError("XPU ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED")
+    get_peer_mem_addr = None
-
+    get_all_visible_devices = None
    def ipc_sent_key_value_cache_by_remote_ptr_block_sync(*args, **kwargs):
        raise RuntimeError("XPU No ipc_sent_key_value_cache_by_remote_ptr UNIMPLENENTED")
 else:
    raise RuntimeError("Prefix cache ops only supported CUDA nor XPU platform ")
 def set_device(device):
    if current_platform.is_cuda():
        paddle.set_device(f"gpu:{device}")
    elif current_platform.is_xpu():
        paddle.set_device(f"xpu:{device}")
    else:
        raise RuntimeError("No supported platform")
 def share_external_data_(cache, cache_name, cache_shape, use_ipc):
    if current_platform.is_cuda():
        cache = share_external_data(cache, cache_name, cache_shape)
    elif current_platform.is_xpu():
        cache = share_external_data(cache, cache_name, cache_shape, use_ipc)
    else:
        raise RuntimeError("No supported platform")
    return cache
 def get_all_visible_devices():
    if current_platform.is_xpu():
        return "XPU_VISIBLE_DEVICES=0,1,2,3,4,5,6,7"
    else:
        return "CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7"
 __all__ = [