[BugFix] fix total_block_num init error in worker_process (#4553)

* fix total_block_num init error in worker_process

* fix req and token client

* fix req and token client

* fix xpu xi

* fix xpu ci
This commit is contained in:
RichardWooSJTU
2025-10-29 11:42:12 +08:00
committed by GitHub
parent 14e7d88ea4
commit 0dde936e93
2 changed files with 12 additions and 4 deletions

View File

@@ -480,7 +480,7 @@ def parse_args():
help="model dir",
)
parser.add_argument("-mbs", "--max_num_seqs", type=int, default=34, help="max batch size")
parser.add_argument("--total_block_num", type=int, default=2000)
parser.add_argument("--num_gpu_blocks_override", type=int, default=None)
parser.add_argument("--block_size", type=int, default=64)
parser.add_argument("--pod_ip", type=str, default="127.0.0.1")
parser.add_argument("--engine_worker_queue_port", type=str, default="9923")
@@ -715,6 +715,7 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig:
parallel_config = ParallelConfig(vars(args))
cache_config = CacheConfig(vars(args))
scheduler_config = SchedulerConfig(vars(args))
parallel_config.tensor_parallel_rank = local_rank % parallel_config.tensor_parallel_size
parallel_config.data_parallel_rank = local_rank // parallel_config.tensor_parallel_size
# config for EP