mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-25 01:20:43 +08:00
[Feature] Set prefix caching as default (#3814)
* Set prefix caching as default * Set prefix caching as default * Set prefix caching as default * skip dynamic load scene * fix kill bug * fix kill bug * fix kill bug * fix * fix * fix ci
This commit is contained in:
@@ -30,8 +30,8 @@ def test_normal_schedule():
|
||||
max_num_seqs=max_num_seqs, config=fd_config, tensor_parallel_size=8, splitwise_role="mixed"
|
||||
)
|
||||
req1 = Request.from_dict({"request_id": "req1", "prompt_token_ids": [1] * 3199, "prompt_token_ids_len": 3199})
|
||||
req2 = Request.from_dict({"request_id": "req2", "prompt_token_ids": [1] * 3201, "prompt_token_ids_len": 3201})
|
||||
req3 = Request.from_dict({"request_id": "req3", "prompt_token_ids": [1] * 3200, "prompt_token_ids_len": 3200})
|
||||
req2 = Request.from_dict({"request_id": "req2", "prompt_token_ids": [2] * 3201, "prompt_token_ids_len": 3201})
|
||||
req3 = Request.from_dict({"request_id": "req3", "prompt_token_ids": [3] * 3200, "prompt_token_ids_len": 3200})
|
||||
resource_manager_v1.add_request(req1)
|
||||
resource_manager_v1.add_request(req2)
|
||||
resource_manager_v1.add_request(req3)
|
||||
@@ -93,7 +93,7 @@ def test_preempted_request():
|
||||
max_num_seqs=max_num_seqs, config=fd_config, tensor_parallel_size=8, splitwise_role="mixed"
|
||||
)
|
||||
req1 = Request.from_dict({"request_id": "req1", "prompt_token_ids": [1] * 3200, "prompt_token_ids_len": 3200})
|
||||
req2 = Request.from_dict({"request_id": "req2", "prompt_token_ids": [1] * 3200, "prompt_token_ids_len": 3200})
|
||||
req2 = Request.from_dict({"request_id": "req2", "prompt_token_ids": [2] * 3200, "prompt_token_ids_len": 3200})
|
||||
resource_manager_v1.add_request(req1)
|
||||
resource_manager_v1.add_request(req2)
|
||||
# step 1
|
||||
|
||||
Reference in New Issue
Block a user