mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-21 15:49:31 +08:00
[MTP] optimize mtp infer speed (#2840)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
This commit is contained in:
@@ -266,18 +266,6 @@ void SpeculateVerify(
|
||||
seed++;
|
||||
offset++;
|
||||
|
||||
auto err = cudaDeviceSynchronize();
|
||||
if (err != 0) {
|
||||
printf("err %d\n", err);
|
||||
}
|
||||
|
||||
err = cudaGetLastError();
|
||||
|
||||
if (err != 0) {
|
||||
printf("err %d\n", err);
|
||||
}
|
||||
|
||||
// printf("inited curand\n");
|
||||
bool use_topk = false;
|
||||
char *env_var = getenv("SPECULATE_VERIFY_USE_TOPK");
|
||||
if (env_var) {
|
||||
|
Reference in New Issue
Block a user