max_model_len: 32768 max_num_seqs: 16 gpu_memory_utilization: 0.9 kv_cache_ratio: 0.9 tensor_parallel_size: 4 splitwise_role: prefill enable_prefix_caching: True cache_queue_port: 55664 engine_worker_queue_port: 6677 cache_transfer_protocol: "rdma,ipc" rdma_comm_ports: "7675,7676,7677,7678" pd_comm_port: "2333" quantization: wint4