mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[Feature] Support block scheduler v1 for FD (#2928)
* Support FD block scheduler v1 * Support FD block scheduler v1 * Support FD block scheduler v1 * Fix according to copilot review * Fix according to review * Remove is_dummy * Fix bug when real_bsz=1 * Fix infer first token cost time --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -18,6 +18,7 @@ from __future__ import annotations
|
||||
|
||||
import time
|
||||
from dataclasses import asdict, dataclass, fields
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
@@ -27,6 +28,19 @@ from fastdeploy.utils import data_processor_logger
|
||||
from fastdeploy.worker.output import LogprobsLists
|
||||
|
||||
|
||||
class RequestStatus(Enum):
|
||||
WAITING = 0
|
||||
RUNNING = 1
|
||||
PREEMPTED = 2
|
||||
FINISHED = 3
|
||||
|
||||
|
||||
class RequestType(Enum):
|
||||
PREFILL = 0
|
||||
DECODE = 1
|
||||
PREEMPTED = 2
|
||||
|
||||
|
||||
@dataclass
|
||||
class Request:
|
||||
def __init__(
|
||||
@@ -93,6 +107,15 @@ class Request:
|
||||
self.enable_thinking = enable_thinking
|
||||
self.trace_carrier = trace_carrier
|
||||
|
||||
# token num
|
||||
self.block_tables = []
|
||||
self.output_token_ids = []
|
||||
self.num_computed_tokens = 0
|
||||
# status
|
||||
self.status = RequestStatus.WAITING
|
||||
self.task_type = RequestType.PREFILL
|
||||
self.idx = None
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: dict):
|
||||
data_processor_logger.debug(f"{d}")
|
||||
@@ -125,6 +148,21 @@ class Request:
|
||||
trace_carrier=d.get("trace_carrier", {}),
|
||||
)
|
||||
|
||||
@property
|
||||
def num_total_tokens(self):
|
||||
"""
|
||||
Total tokens of the request, include prompt tokens and generated tokens.
|
||||
"""
|
||||
return self.prompt_token_ids_len + len(self.output_token_ids)
|
||||
|
||||
def __eq__(self, other):
|
||||
"""
|
||||
EQ operator.
|
||||
"""
|
||||
if not isinstance(other, Request):
|
||||
return False
|
||||
return self.request_id == other.request_id
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""convert Request into a serializable dict"""
|
||||
data = {
|
||||
|
Reference in New Issue
Block a user