mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
【Inference Optimize】DeepSeek-V3-model MLA Optimize (#3886)
* support MLA chunk_size auto search & cuda_graph
This commit is contained in:
@@ -563,3 +563,11 @@ inline int GetSMVersion() {
|
||||
return sm_version;
|
||||
|
||||
}
|
||||
|
||||
inline bool GetMlaUseTensorcore() {
|
||||
static const bool flags_mla_use_tensorcore = get_mla_use_tensorcore();
|
||||
static const bool enable_mla_tensorcore = GetSMVersion() >= 90 ? true : false;
|
||||
const bool mla_use_tensorcore =
|
||||
flags_mla_use_tensorcore && enable_mla_tensorcore;
|
||||
return mla_use_tensorcore;
|
||||
}
|
||||
|
Reference in New Issue
Block a user