fix t2i (#4163)
Some checks failed
Publish Job / publish_pre_check (push) Has been cancelled
Publish Job / print_publish_pre_check_outputs (push) Has been cancelled
Publish Job / FD-Clone-Linux (push) Has been cancelled
Publish Job / Show Code Archive Output (push) Has been cancelled
Publish Job / BUILD_SM8090 (push) Has been cancelled
Publish Job / BUILD_SM8689 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8090 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8689 (push) Has been cancelled
Publish Job / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
Publish Job / Run FastDeploy LogProb Tests (push) Has been cancelled
Publish Job / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
Publish Job / Run Base Tests (push) Has been cancelled
Publish Job / Run Accuracy Tests (push) Has been cancelled
Publish Job / Run Stable Tests (push) Has been cancelled
CI Images Build / FD-Clone-Linux (push) Has been cancelled
CI Images Build / Show Code Archive Output (push) Has been cancelled
CI Images Build / CI Images Build (push) Has been cancelled
CI Images Build / BUILD_SM8090 (push) Has been cancelled
CI Images Build / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
CI Images Build / Run FastDeploy LogProb Tests (push) Has been cancelled
CI Images Build / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
CI Images Build / Run Base Tests (push) Has been cancelled
CI Images Build / Run Accuracy Tests (push) Has been cancelled
CI Images Build / Run Stable Tests (push) Has been cancelled
CI Images Build / Publish Docker Images Pre Check (push) Has been cancelled
CE Compile Job / ce_job_pre_check (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled

Co-authored-by: Yuanle Liu <yuanlehome@163.com>
This commit is contained in:
RichardWooSJTU
2025-09-19 18:07:13 +08:00
committed by GitHub
parent cc6e14d2ec
commit 91912cc2e1
5 changed files with 8 additions and 2 deletions

View File

@@ -890,7 +890,9 @@ class CacheConfig:
self.kv_cache_ratio = 1.0 self.kv_cache_ratio = 1.0
else: else:
self.kv_cache_ratio = 0.75 self.kv_cache_ratio = 0.75
self.enc_dec_block_num = 0 if current_platform.is_iluvatar() or current_platform.is_maca() else 2 self.enc_dec_block_num = (
0 if current_platform.is_iluvatar() or current_platform.is_maca() else envs.FD_ENC_DEC_BLOCK_NUM
)
self.prealloc_dec_block_slot_num_threshold = 12 self.prealloc_dec_block_slot_num_threshold = 12
self.cache_dtype = "bfloat16" self.cache_dtype = "bfloat16"
self.model_cfg = None self.model_cfg = None

View File

@@ -704,7 +704,7 @@ class EngineArgs:
cache_group.add_argument( cache_group.add_argument(
"--prealloc-dec-block-slot-num-threshold", "--prealloc-dec-block-slot-num-threshold",
type=int, type=int,
default=12, default=EngineArgs.prealloc_dec_block_slot_num_threshold,
help="Number of token slot threadshold to allocate next blocks for decoding.", help="Number of token slot threadshold to allocate next blocks for decoding.",
) )

View File

@@ -304,6 +304,7 @@ class CompletionOutput:
"index": self.index, "index": self.index,
"send_idx": self.send_idx, "send_idx": self.send_idx,
"token_ids": self.token_ids, "token_ids": self.token_ids,
"decode_type": self.decode_type,
"logprob": self.logprob, "logprob": self.logprob,
"top_logprobs": self.top_logprobs, "top_logprobs": self.top_logprobs,
"logprobs": self.logprobs, "logprobs": self.logprobs,

View File

@@ -82,6 +82,8 @@ environment_variables: dict[str, Callable[[], Any]] = {
"EXPORTER_OTLP_HEADERS": lambda: os.getenv("EXPORTER_OTLP_HEADERS"), "EXPORTER_OTLP_HEADERS": lambda: os.getenv("EXPORTER_OTLP_HEADERS"),
# enable kv cache block scheduler v1 (no need for kv_cache_ratio) # enable kv cache block scheduler v1 (no need for kv_cache_ratio)
"ENABLE_V1_KVCACHE_SCHEDULER": lambda: int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "1")), "ENABLE_V1_KVCACHE_SCHEDULER": lambda: int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "1")),
# set prealloc block num for decoder
"FD_ENC_DEC_BLOCK_NUM": lambda: int(os.getenv("FD_ENC_DEC_BLOCK_NUM", "2")),
# Whether to use PLUGINS. # Whether to use PLUGINS.
"FD_PLUGINS": lambda: None if "FD_PLUGINS" not in os.environ else os.environ["FD_PLUGINS"].split(","), "FD_PLUGINS": lambda: None if "FD_PLUGINS" not in os.environ else os.environ["FD_PLUGINS"].split(","),
# set trace attribute job_id. # set trace attribute job_id.

View File

@@ -306,6 +306,7 @@ class LocalScheduler:
if response.request_id not in self.responses: if response.request_id not in self.responses:
self.responses[response.request_id] = [response] self.responses[response.request_id] = [response]
continue continue
scheduler_logger.debug(f"append response {response.raw}")
self.responses[response.request_id].append(response) self.responses[response.request_id].append(response)
self.responses_not_empty.notify_all() self.responses_not_empty.notify_all()