diff --git a/custom_ops/xpu_ops/src/plugin/src/kernel/kunlun3cpp/mtp_kernel/speculate_update_repeat_times.xpu b/custom_ops/xpu_ops/src/plugin/src/kernel/kunlun3cpp/mtp_kernel/speculate_update_repeat_times.xpu index ce3898fb2..4f42fd69f 100644 --- a/custom_ops/xpu_ops/src/plugin/src/kernel/kunlun3cpp/mtp_kernel/speculate_update_repeat_times.xpu +++ b/custom_ops/xpu_ops/src/plugin/src/kernel/kunlun3cpp/mtp_kernel/speculate_update_repeat_times.xpu @@ -190,7 +190,7 @@ __device__ void speculate_update_repeat_times_optimized( buffer_ptr_pre_ids.toggle(); } } - // each core loads all the needed pre_ids into lm without mfence inbetween + // each core loads all the needed pre_ids into lm without mfence in between // according to the index recorded by previous iteration else { int cnt = -1; diff --git a/fastdeploy/engine/common_engine.py b/fastdeploy/engine/common_engine.py index 648617423..4375452b2 100644 --- a/fastdeploy/engine/common_engine.py +++ b/fastdeploy/engine/common_engine.py @@ -514,7 +514,7 @@ class EngineService: main_process_metrics.num_requests_waiting.dec(len(tasks)) main_process_metrics.num_requests_running.inc(len(tasks)) except Exception as e: - err_msg = f"Error happend while insert task to engine: {e}, {traceback.format_exc()!s}." + err_msg = f"Error happened while insert task to engine: {e}, {traceback.format_exc()!s}." llm_logger.error(err_msg) def _scheduler_task_to_worker_v1(self): @@ -569,7 +569,7 @@ class EngineService: time.sleep(0.005) except Exception as e: - err_msg = "Error happend while insert task to engine: {}, {}.".format(e, str(traceback.format_exc())) + err_msg = "Error happened while insert task to engine: {}, {}.".format(e, str(traceback.format_exc())) llm_logger.error(err_msg) def start_zmq_service(self, api_server_pid=None): @@ -651,7 +651,7 @@ class EngineService: self.zmq_server.send_multipart(request_id, [error_result]) except Exception as e: llm_logger.error( - f"Error happend while receiving new request from zmq, details={e}, " + f"Error happened while receiving new request from zmq, details={e}, " f"traceback={traceback.format_exc()}" ) @@ -669,7 +669,7 @@ class EngineService: self.zmq_server.send_multipart(request_id, contents) except Exception as e: - llm_logger.error(f"Unexcepted error happend: {e}, {traceback.format_exc()!s}") + llm_logger.error(f"Unexcepted error happened: {e}, {traceback.format_exc()!s}") def split_mode_get_tasks(self): """ diff --git a/fastdeploy/engine/engine.py b/fastdeploy/engine/engine.py index 00f24d998..9109cc7b6 100644 --- a/fastdeploy/engine/engine.py +++ b/fastdeploy/engine/engine.py @@ -563,7 +563,7 @@ class LLMEngine: try: req_id = self._format_and_add_data(prompts) except Exception as e: - llm_logger.error(f"Error happend while adding request, details={e}, {str(traceback.format_exc())}") + llm_logger.error(f"Error happened while adding request, details={e}, {str(traceback.format_exc())}") raise EngineError(str(e), error_code=400) # Get the result of the current request diff --git a/fastdeploy/entrypoints/engine_client.py b/fastdeploy/entrypoints/engine_client.py index fa23aaaee..777689c73 100644 --- a/fastdeploy/entrypoints/engine_client.py +++ b/fastdeploy/entrypoints/engine_client.py @@ -204,8 +204,8 @@ class EngineClient: f"preprocess time cost {preprocess_cost_time}" ) - self.vaild_parameters(task) - api_server_logger.debug(f"Recieve task: {task}") + self.valid_parameters(task) + api_server_logger.debug(f"Receive task: {task}") try: if not self.enable_mm: self.zmq_client.send_json(task) @@ -215,7 +215,7 @@ class EngineClient: api_server_logger.error(f"zmq_client send task error: {e}, {str(traceback.format_exc())}") raise EngineError(str(e), error_code=400) - def vaild_parameters(self, data): + def valid_parameters(self, data): """ Validate stream options """ diff --git a/fastdeploy/entrypoints/llm.py b/fastdeploy/entrypoints/llm.py index d69068b6f..68a4d3a64 100644 --- a/fastdeploy/entrypoints/llm.py +++ b/fastdeploy/entrypoints/llm.py @@ -125,7 +125,7 @@ class LLM: continue self.req_output[request_id].add(result) except Exception as e: - llm_logger.error(f"Unexcepted error happend: {e}, {traceback.format_exc()!s}") + llm_logger.error(f"Unexcepted error happened: {e}, {traceback.format_exc()!s}") def generate( self, diff --git a/fastdeploy/model_executor/layers/attention/moba_attention_backend.py b/fastdeploy/model_executor/layers/attention/moba_attention_backend.py index 7ddba90d1..024e97ee2 100644 --- a/fastdeploy/model_executor/layers/attention/moba_attention_backend.py +++ b/fastdeploy/model_executor/layers/attention/moba_attention_backend.py @@ -124,7 +124,7 @@ class MobaAttentionBackend(AttentionBackend): kv_cache_quant_type: str = None, ): """ - Caculate kv cache shape + Calculate kv cache shape """ if kv_cache_quant_type is not None and kv_cache_quant_type == "int4_zp": return ( diff --git a/fastdeploy/model_executor/layers/lm_head.py b/fastdeploy/model_executor/layers/lm_head.py index a62e46d61..3af96bbe5 100644 --- a/fastdeploy/model_executor/layers/lm_head.py +++ b/fastdeploy/model_executor/layers/lm_head.py @@ -56,7 +56,7 @@ class ParallelLMHead(nn.Layer): embedding_dim (int): size of hidden state. prefix (str): The name of current layer. Defaults to "". with_bias (bool): whether to have bias. Default: False. - dtype (str): The dtype of weight. Defalut: None. + dtype (str): The dtype of weight. Default: None. """ super(ParallelLMHead, self).__init__() self.weight_key: str = prefix + ".weight" diff --git a/fastdeploy/model_executor/layers/sample/sampler.py b/fastdeploy/model_executor/layers/sample/sampler.py index f8fd1755a..51ae0aec4 100644 --- a/fastdeploy/model_executor/layers/sample/sampler.py +++ b/fastdeploy/model_executor/layers/sample/sampler.py @@ -364,7 +364,7 @@ class Sampler(nn.Layer): ) if sampling_metadata.enable_early_stop: # will set the stop batch in stop_flags - assert sampling_metadata.stop_flags is not None, "need stop_flags for eary stop" + assert sampling_metadata.stop_flags is not None, "need stop_flags for early stop" self.early_stopper.process(probs, next_tokens, sampling_metadata.stop_flags) sampler_output = SamplerOutput( diff --git a/fastdeploy/model_executor/ops/triton_ops/triton_utils.py b/fastdeploy/model_executor/ops/triton_ops/triton_utils.py index c6ebd2742..2a2a00d0d 100644 --- a/fastdeploy/model_executor/ops/triton_ops/triton_utils.py +++ b/fastdeploy/model_executor/ops/triton_ops/triton_utils.py @@ -683,7 +683,7 @@ class KernelInterface: op_dict = {"op_name": op_name, "reset_zero_when_tune": ""} op_dict["triton_kernel_args"] = ",".join(modified_arg_exclude_constexpr) op_dict["key"] = ",".join(self.key_args) - # when tunning, we need to reset the out to zero. + # when tuning, we need to reset the out to zero. if "reset_zero_when_tune" in other_config.keys(): op_dict["reset_zero_when_tune"] = other_config["reset_zero_when_tune"] diff --git a/fastdeploy/output/token_processor.py b/fastdeploy/output/token_processor.py index 8245e5657..8915b62ab 100644 --- a/fastdeploy/output/token_processor.py +++ b/fastdeploy/output/token_processor.py @@ -178,7 +178,7 @@ class TokenProcessor: ) except Exception as e: - print(f"Recieve message error: {e}") + print(f"Receive message error: {e}") continue else: is_blocking = True diff --git a/fastdeploy/rl/dynamic_weight_manager.py b/fastdeploy/rl/dynamic_weight_manager.py index e687c707e..f47928e6b 100644 --- a/fastdeploy/rl/dynamic_weight_manager.py +++ b/fastdeploy/rl/dynamic_weight_manager.py @@ -105,7 +105,7 @@ class DynamicWeightManager: def clear_parameters(self, pid: int = 0) -> None: """Clear all model parameters and free memory.""" - logger.info("start clear paramaters") + logger.info("start clear parameters") paddle.device.cuda.empty_cache() for param in self.model.state_dict().values(): param._clear_data() diff --git a/fastdeploy/rl/rollout_model.py b/fastdeploy/rl/rollout_model.py index f6c390120..3282e4548 100644 --- a/fastdeploy/rl/rollout_model.py +++ b/fastdeploy/rl/rollout_model.py @@ -146,7 +146,7 @@ class Ernie4_5_MoeForCausalLMRL(Ernie4_5_MoeForCausalLM, BaseRLModel): return "Ernie4_5_MoeForCausalLMRL" def get_name_mappings_to_training(self, trainer_degree=None) -> Dict[str, str]: - """Generate mapping between inference and training parameter for RL(donot delete!).""" + """Generate mapping between inference and training parameter for RL(do not delete!).""" if self._mappings_built: return self.infer_to_train_mapping @@ -225,7 +225,7 @@ class Ernie4_5_VLMoeForConditionalGenerationRL(Ernie4_5_VLMoeForConditionalGener return "Ernie4_5_VLMoeForConditionalGenerationRL" def get_name_mappings_to_training(self, trainer_degree=None) -> Dict[str, str]: - """Generate mapping between inference and training parameter for RL(donot delete!).""" + """Generate mapping between inference and training parameter for RL(do not delete!).""" if self._mappings_built: return self.infer_to_train_mapping @@ -331,7 +331,7 @@ class Qwen2ForCausalLMRL(Qwen2ForCausalLM, BaseRLModel): return "Qwen2ForCausalLMRL" def get_name_mappings_to_training(self, trainer_degree=None) -> Dict[str, str]: - """Generate mapping between inference and training parameter for RL(donot delete!).""" + """Generate mapping between inference and training parameter for RL(do not delete!).""" if self._mappings_built: return self.infer_to_train_mapping @@ -380,7 +380,7 @@ class Qwen3MoeForCausalLMRL(Qwen3MoeForCausalLM, BaseRLModel): return "Qwen3MoeForCausalLMRL" def get_name_mappings_to_training(self, trainer_degree=None) -> Dict[str, str]: - """Generate mapping between inference and training parameter for RL(donot delete!).""" + """Generate mapping between inference and training parameter for RL(do not delete!).""" if self._mappings_built: return self.infer_to_train_mapping diff --git a/fastdeploy/scheduler/global_scheduler.py b/fastdeploy/scheduler/global_scheduler.py index f3962992c..db1f85c83 100644 --- a/fastdeploy/scheduler/global_scheduler.py +++ b/fastdeploy/scheduler/global_scheduler.py @@ -648,7 +648,7 @@ class GlobalScheduler: stolen_responses[response_queue_name].append(response.serialize()) continue - scheduler_logger.error(f"Scheduler has recieved a non-existent response from engine: {[response]}") + scheduler_logger.error(f"Scheduler has received a non-existent response from engine: {[response]}") with self.mutex: for request_id, responses in local_responses.items(): diff --git a/fastdeploy/worker/dcu_worker.py b/fastdeploy/worker/dcu_worker.py index 0945f512f..c87a27c29 100644 --- a/fastdeploy/worker/dcu_worker.py +++ b/fastdeploy/worker/dcu_worker.py @@ -49,7 +49,7 @@ class DcuWorker(GpuWorker): """ self.max_chips_per_node = 8 if self.device_config.device_type == "cuda" and paddle.device.is_compiled_with_cuda(): - # Set evironment variable + # Set environment variable self.device_ids = self.parallel_config.device_ids.split(",") self.device = f"gpu:{self.local_rank % self.max_chips_per_node}" paddle.device.set_device(self.device)