FastDeploy/fastdeploy/cache_manager/ops.py

import paddle

from fastdeploy.platforms import current_platform

if current_platform.is_cuda():
    from fastdeploy.model_executor.ops.gpu import (
        cuda_host_alloc,
        cuda_host_free,
        set_data_ipc,
        share_external_data,
        swap_cache_all_layers,
        unset_data_ipc,
    )

    memory_allocated = paddle.device.cuda.memory_allocated
elif current_platform.is_xpu():
    from fastdeploy.model_executor.ops.xpu import (
        cuda_host_alloc,
        cuda_host_free,
        set_data_ipc,
        share_external_data,
        swap_cache_all_layers,
    )

    unset_data_ipc = None
    memory_allocated = paddle.device.xpu.memory_allocated

else:
    raise RuntimeError("Prefix cache ops only supported CUDA nor XPU platform ")


def set_device(device):
    if current_platform.is_cuda():
        paddle.set_device(f"gpu:{device}")
    elif current_platform.is_xpu():
        paddle.set_device(f"xpu:{device}")
    else:
        raise RuntimeError("No supported platform")


def share_external_data_(cache, cache_name, cache_shape, use_ipc):
    if current_platform.is_cuda():
        cache = share_external_data(cache, cache_name, cache_shape)
    elif current_platform.is_xpu():
        cache = share_external_data(cache, cache_name, cache_shape, use_ipc)
    else:
        raise RuntimeError("No supported platform")
    return cache


__all__ = [
    "cuda_host_alloc",
    "cuda_host_free",
    "set_data_ipc",
    "share_external_data_",
    "swap_cache_all_layers",
    "unset_data_ipc",  # XPU是 None
    "set_device",
    "memory_allocated",
]