【Inference Optimize】DeepSeek-V3-model MLA Optimize (#3886)

* support MLA chunk_size auto search & cuda_graph
This commit is contained in:
AIbin
2025-09-11 10:46:09 +08:00
committed by GitHub
parent 637d96c6ae
commit a7392a0ff9
23 changed files with 375 additions and 310 deletions

View File

@@ -563,3 +563,11 @@ inline int GetSMVersion() {
return sm_version;
}
inline bool GetMlaUseTensorcore() {
static const bool flags_mla_use_tensorcore = get_mla_use_tensorcore();
static const bool enable_mla_tensorcore = GetSMVersion() >= 90 ? true : false;
const bool mla_use_tensorcore =
flags_mla_use_tensorcore && enable_mla_tensorcore;
return mla_use_tensorcore;
}