diff --git a/fastdeploy/envs.py b/fastdeploy/envs.py index 23030d6a8..7c6eb5941 100644 --- a/fastdeploy/envs.py +++ b/fastdeploy/envs.py @@ -153,8 +153,12 @@ environment_variables: dict[str, Callable[[], Any]] = { "FD_MODEL_USE_SAFETENSORS": lambda: int(os.getenv("FD_MODEL_USE_SAFETENSORS", "1")) == 1, "FD_MODEL_USE_OFFLINE_QUANT": lambda: int(os.getenv("FD_MODEL_USE_OFFLINE_QUANT", "1")) == 1, "FD_MOE_QUANT_TYPE": lambda: os.getenv("FD_MOE_QUANT_TYPE", "w4a8"), + # The AK of bos storing the features while multi_modal infer "ENCODE_FEATURE_BOS_AK": lambda: os.getenv("ENCODE_FEATURE_BOS_AK"), + # The SK of bos storing the features while multi_modal infer "ENCODE_FEATURE_BOS_SK": lambda: os.getenv("ENCODE_FEATURE_BOS_SK"), + # The ENDPOINT of bos storing the features while multi_modal infer + "ENCODE_FEATURE_ENDPOINT": lambda: os.getenv("ENCODE_FEATURE_ENDPOINT"), # Enable offline perf test mode for PD disaggregation "FD_OFFLINE_PERF_TEST_FOR_PD": lambda: int(os.getenv("FD_OFFLINE_PERF_TEST_FOR_PD", "0")), "FD_ENABLE_E2W_TENSOR_CONVERT": lambda: int(os.getenv("FD_ENABLE_E2W_TENSOR_CONVERT", "0")), diff --git a/fastdeploy/utils.py b/fastdeploy/utils.py index 17838db6e..492f23b0e 100644 --- a/fastdeploy/utils.py +++ b/fastdeploy/utils.py @@ -972,7 +972,8 @@ def init_bos_client(): from baidubce.services.bos.bos_client import BosClient cfg = BceClientConfiguration( - credentials=BceCredentials(envs.ENCODE_FEATURE_BOS_AK, envs.ENCODE_FEATURE_BOS_SK), endpoint="bj.bcebos.com" + credentials=BceCredentials(envs.ENCODE_FEATURE_BOS_AK, envs.ENCODE_FEATURE_BOS_SK), + endpoint=envs.ENCODE_FEATURE_ENDPOINT, ) return BosClient(cfg)