enable_mm: True max_model_len: 32768 max_num_seqs: 128 gpu_memory_utilization: 0.9 kv_cache_ratio: 0.71 tensor_parallel_size: 1 enable_chunked_prefill: True max_num_batched_tokens: 384 reasoning_parser: ernie-45-vl