[Feature] [PD] add simple router and refine splitwise deployment (#4709)

* add simple router and refine splitwise deployment

* fix
This commit is contained in:
Juncai
2025-11-06 14:56:02 +08:00
committed by GitHub
parent 831266da7a
commit 08ca0f6aea
39 changed files with 2397 additions and 171 deletions

View File

@@ -352,7 +352,7 @@ async def benchmark(
ignore_eos=ignore_eos,
debug=debug,
extra_body=extra_body,
response_format=response_format
response_format=response_format,
)
print("test_input:", test_input)
@@ -384,7 +384,7 @@ async def benchmark(
logprobs=logprobs,
ignore_eos=ignore_eos,
extra_body=extra_body,
response_format=response_format
response_format=response_format,
)
profile_output = await request_func(request_func_input=profile_input)
if profile_output.success:
@@ -444,7 +444,7 @@ async def benchmark(
debug=debug,
ignore_eos=ignore_eos,
extra_body=extra_body,
response_format=response_format
response_format=response_format,
)
tasks.append(asyncio.create_task(limited_request_func(request_func_input=request_func_input, pbar=pbar)))
outputs: list[RequestFuncOutput] = await asyncio.gather(*tasks)
@@ -460,7 +460,7 @@ async def benchmark(
api_url=base_url + "/stop_profile",
output_len=test_output_len,
logprobs=logprobs,
response_format=response_format
response_format=response_format,
)
profile_output = await request_func(request_func_input=profile_input)
if profile_output.success: