diff --git a/fastdeploy/cache_manager/cache_transfer_manager.py b/fastdeploy/cache_manager/cache_transfer_manager.py index c9f062201..0aeed80af 100644 --- a/fastdeploy/cache_manager/cache_transfer_manager.py +++ b/fastdeploy/cache_manager/cache_transfer_manager.py @@ -382,4 +382,5 @@ if __name__ == "__main__": args = parse_args() logger = get_logger("cache_transfer_manager", "cache_transfer_manager.log") + paddle.set_device(f"gpu:{args.device_id}") main() diff --git a/fastdeploy/engine/config.py b/fastdeploy/engine/config.py index 035cea96c..ef7d11bd8 100644 --- a/fastdeploy/engine/config.py +++ b/fastdeploy/engine/config.py @@ -293,10 +293,11 @@ class Config: ) if not self.cache_config.enable_chunked_prefill: - assert self.max_num_batched_tokens >= self.max_model_len, ( - f"max_num_batched_tokens: {self.max_num_batched_tokens} " - f"should be larger than or equal to max_model_len: {self.max_model_len}" - ) + if not int(os.getenv("FD_ENABLE_INTERNAL_ADAPTER", "0")): + assert self.max_num_batched_tokens >= self.max_model_len, ( + f"max_num_batched_tokens: {self.max_num_batched_tokens} " + f"should be larger than or equal to max_model_len: {self.max_model_len}" + ) else: assert self.max_num_batched_tokens >= self.cache_config.block_size, ( f"max_num_batched_tokens: {self.max_num_batched_tokens} "