mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
fix t2i (#4163)
Some checks failed
Publish Job / publish_pre_check (push) Has been cancelled
Publish Job / print_publish_pre_check_outputs (push) Has been cancelled
Publish Job / FD-Clone-Linux (push) Has been cancelled
Publish Job / Show Code Archive Output (push) Has been cancelled
Publish Job / BUILD_SM8090 (push) Has been cancelled
Publish Job / BUILD_SM8689 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8090 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8689 (push) Has been cancelled
Publish Job / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
Publish Job / Run FastDeploy LogProb Tests (push) Has been cancelled
Publish Job / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
Publish Job / Run Base Tests (push) Has been cancelled
Publish Job / Run Accuracy Tests (push) Has been cancelled
Publish Job / Run Stable Tests (push) Has been cancelled
CI Images Build / FD-Clone-Linux (push) Has been cancelled
CI Images Build / Show Code Archive Output (push) Has been cancelled
CI Images Build / CI Images Build (push) Has been cancelled
CI Images Build / BUILD_SM8090 (push) Has been cancelled
CI Images Build / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
CI Images Build / Run FastDeploy LogProb Tests (push) Has been cancelled
CI Images Build / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
CI Images Build / Run Base Tests (push) Has been cancelled
CI Images Build / Run Accuracy Tests (push) Has been cancelled
CI Images Build / Run Stable Tests (push) Has been cancelled
CI Images Build / Publish Docker Images Pre Check (push) Has been cancelled
CE Compile Job / ce_job_pre_check (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Some checks failed
Publish Job / publish_pre_check (push) Has been cancelled
Publish Job / print_publish_pre_check_outputs (push) Has been cancelled
Publish Job / FD-Clone-Linux (push) Has been cancelled
Publish Job / Show Code Archive Output (push) Has been cancelled
Publish Job / BUILD_SM8090 (push) Has been cancelled
Publish Job / BUILD_SM8689 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8090 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8689 (push) Has been cancelled
Publish Job / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
Publish Job / Run FastDeploy LogProb Tests (push) Has been cancelled
Publish Job / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
Publish Job / Run Base Tests (push) Has been cancelled
Publish Job / Run Accuracy Tests (push) Has been cancelled
Publish Job / Run Stable Tests (push) Has been cancelled
CI Images Build / FD-Clone-Linux (push) Has been cancelled
CI Images Build / Show Code Archive Output (push) Has been cancelled
CI Images Build / CI Images Build (push) Has been cancelled
CI Images Build / BUILD_SM8090 (push) Has been cancelled
CI Images Build / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
CI Images Build / Run FastDeploy LogProb Tests (push) Has been cancelled
CI Images Build / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
CI Images Build / Run Base Tests (push) Has been cancelled
CI Images Build / Run Accuracy Tests (push) Has been cancelled
CI Images Build / Run Stable Tests (push) Has been cancelled
CI Images Build / Publish Docker Images Pre Check (push) Has been cancelled
CE Compile Job / ce_job_pre_check (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Co-authored-by: Yuanle Liu <yuanlehome@163.com>
This commit is contained in:
@@ -890,7 +890,9 @@ class CacheConfig:
|
|||||||
self.kv_cache_ratio = 1.0
|
self.kv_cache_ratio = 1.0
|
||||||
else:
|
else:
|
||||||
self.kv_cache_ratio = 0.75
|
self.kv_cache_ratio = 0.75
|
||||||
self.enc_dec_block_num = 0 if current_platform.is_iluvatar() or current_platform.is_maca() else 2
|
self.enc_dec_block_num = (
|
||||||
|
0 if current_platform.is_iluvatar() or current_platform.is_maca() else envs.FD_ENC_DEC_BLOCK_NUM
|
||||||
|
)
|
||||||
self.prealloc_dec_block_slot_num_threshold = 12
|
self.prealloc_dec_block_slot_num_threshold = 12
|
||||||
self.cache_dtype = "bfloat16"
|
self.cache_dtype = "bfloat16"
|
||||||
self.model_cfg = None
|
self.model_cfg = None
|
||||||
|
@@ -704,7 +704,7 @@ class EngineArgs:
|
|||||||
cache_group.add_argument(
|
cache_group.add_argument(
|
||||||
"--prealloc-dec-block-slot-num-threshold",
|
"--prealloc-dec-block-slot-num-threshold",
|
||||||
type=int,
|
type=int,
|
||||||
default=12,
|
default=EngineArgs.prealloc_dec_block_slot_num_threshold,
|
||||||
help="Number of token slot threadshold to allocate next blocks for decoding.",
|
help="Number of token slot threadshold to allocate next blocks for decoding.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -304,6 +304,7 @@ class CompletionOutput:
|
|||||||
"index": self.index,
|
"index": self.index,
|
||||||
"send_idx": self.send_idx,
|
"send_idx": self.send_idx,
|
||||||
"token_ids": self.token_ids,
|
"token_ids": self.token_ids,
|
||||||
|
"decode_type": self.decode_type,
|
||||||
"logprob": self.logprob,
|
"logprob": self.logprob,
|
||||||
"top_logprobs": self.top_logprobs,
|
"top_logprobs": self.top_logprobs,
|
||||||
"logprobs": self.logprobs,
|
"logprobs": self.logprobs,
|
||||||
|
@@ -82,6 +82,8 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
|||||||
"EXPORTER_OTLP_HEADERS": lambda: os.getenv("EXPORTER_OTLP_HEADERS"),
|
"EXPORTER_OTLP_HEADERS": lambda: os.getenv("EXPORTER_OTLP_HEADERS"),
|
||||||
# enable kv cache block scheduler v1 (no need for kv_cache_ratio)
|
# enable kv cache block scheduler v1 (no need for kv_cache_ratio)
|
||||||
"ENABLE_V1_KVCACHE_SCHEDULER": lambda: int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "1")),
|
"ENABLE_V1_KVCACHE_SCHEDULER": lambda: int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "1")),
|
||||||
|
# set prealloc block num for decoder
|
||||||
|
"FD_ENC_DEC_BLOCK_NUM": lambda: int(os.getenv("FD_ENC_DEC_BLOCK_NUM", "2")),
|
||||||
# Whether to use PLUGINS.
|
# Whether to use PLUGINS.
|
||||||
"FD_PLUGINS": lambda: None if "FD_PLUGINS" not in os.environ else os.environ["FD_PLUGINS"].split(","),
|
"FD_PLUGINS": lambda: None if "FD_PLUGINS" not in os.environ else os.environ["FD_PLUGINS"].split(","),
|
||||||
# set trace attribute job_id.
|
# set trace attribute job_id.
|
||||||
|
@@ -306,6 +306,7 @@ class LocalScheduler:
|
|||||||
if response.request_id not in self.responses:
|
if response.request_id not in self.responses:
|
||||||
self.responses[response.request_id] = [response]
|
self.responses[response.request_id] = [response]
|
||||||
continue
|
continue
|
||||||
|
scheduler_logger.debug(f"append response {response.raw}")
|
||||||
self.responses[response.request_id].append(response)
|
self.responses[response.request_id].append(response)
|
||||||
self.responses_not_empty.notify_all()
|
self.responses_not_empty.notify_all()
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user