[BugFix]Set default OMP_NUM_THREADS=3 and fix extra GPU memory usage in DeepSeek (#5219)

* fix bug * update * update * update * fix copy * update
2025-12-24 13:28:13 +08:00 · 2025-11-28 14:22:04 +08:00
parent 7dc06cac6e
commit 1539fd6056
6 changed files with 29 additions and 16 deletions
--- a/fastdeploy/model_executor/layers/utils.py
+++ b/fastdeploy/model_executor/layers/utils.py
@@ -141,7 +141,10 @@ def get_tensor(input: Union[paddle.Tensor, np.ndarray, str], model_path=None) ->

    if isinstance(input, paddle.Tensor):
        if input.place.is_cpu_place():
-            return input.to(paddle.device.get_device())
+            if current_platform.is_cuda():
+                return input.cuda()
+            else:
+                return input.to(paddle.device.get_device())
        return input
    elif isinstance(input, np.ndarray):
        return paddle.to_tensor(input)