[Feature] Support block scheduler v1 for FD (#2928)

* Support FD block scheduler v1

* Support FD block scheduler v1

* Support FD block scheduler v1

* Fix according to copilot review

* Fix according to review

* Remove is_dummy

* Fix bug when real_bsz=1

* Fix infer first token cost time

---------

Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
chenjian
2025-07-23 20:31:31 +08:00
committed by GitHub
parent ca0f71bd39
commit 85a78d695d
16 changed files with 898 additions and 40 deletions

View File

@@ -18,6 +18,7 @@ from __future__ import annotations
import time
from dataclasses import asdict, dataclass, fields
from enum import Enum
from typing import Any, Dict, Optional, Union
import numpy as np
@@ -27,6 +28,19 @@ from fastdeploy.utils import data_processor_logger
from fastdeploy.worker.output import LogprobsLists
class RequestStatus(Enum):
WAITING = 0
RUNNING = 1
PREEMPTED = 2
FINISHED = 3
class RequestType(Enum):
PREFILL = 0
DECODE = 1
PREEMPTED = 2
@dataclass
class Request:
def __init__(
@@ -93,6 +107,15 @@ class Request:
self.enable_thinking = enable_thinking
self.trace_carrier = trace_carrier
# token num
self.block_tables = []
self.output_token_ids = []
self.num_computed_tokens = 0
# status
self.status = RequestStatus.WAITING
self.task_type = RequestType.PREFILL
self.idx = None
@classmethod
def from_dict(cls, d: dict):
data_processor_logger.debug(f"{d}")
@@ -125,6 +148,21 @@ class Request:
trace_carrier=d.get("trace_carrier", {}),
)
@property
def num_total_tokens(self):
"""
Total tokens of the request, include prompt tokens and generated tokens.
"""
return self.prompt_token_ids_len + len(self.output_token_ids)
def __eq__(self, other):
"""
EQ operator.
"""
if not isinstance(other, Request):
return False
return self.request_id == other.request_id
def to_dict(self) -> dict:
"""convert Request into a serializable dict"""
data = {