mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[BugFix][Metax] Fix metax compile issue in get_block_shape_and_split_kv_block (#5000)
* fix metax compile * fix
This commit is contained in:
@@ -15,9 +15,9 @@
|
||||
#include "helper.h"
|
||||
#include "paddle/extension.h"
|
||||
#ifndef PADDLE_WITH_CUSTOM_DEVICE_METAX_GPU
|
||||
#include "paddle/phi/backends/gpu/cuda/cuda_graph_with_memory_pool.h"
|
||||
#include "paddle/phi/core/memory/memcpy.h"
|
||||
#endif
|
||||
#include "paddle/phi/backends/gpu/cuda/cuda_graph_with_memory_pool.h"
|
||||
#include "utils.cuh"
|
||||
|
||||
template <int THREADBLOCK_SIZE>
|
||||
@@ -290,10 +290,11 @@ void GetBlockShapeAndSplitKVBlock(
|
||||
bsz);
|
||||
// Note (sunxin): Skip capturing the DtoH copy (it's time-consuming); CPU data
|
||||
// is only for branching in attention.
|
||||
if (!phi::backends::gpu::IsCUDAGraphCapturing()) {
|
||||
#ifndef PADDLE_WITH_CUSTOM_DEVICE_METAX_GPU
|
||||
if (!phi::backends::gpu::IsCUDAGraphCapturing())
|
||||
#endif
|
||||
max_len_tensor_cpu.copy_(
|
||||
max_len_tensor_gpu, max_len_tensor_cpu.place(), false);
|
||||
}
|
||||
|
||||
auto max_len_cpu_ptr = max_len_tensor_cpu.data<int>();
|
||||
int max_len_this_time = max_len_cpu_ptr[0];
|
||||
@@ -404,10 +405,11 @@ void GetBlockShapeAndSplitKVBlock(
|
||||
group_size);
|
||||
// Note (sunxin): Skip capturing the DtoH copy (it's time-consuming); CPU
|
||||
// data is only for branching in attention.
|
||||
if (!phi::backends::gpu::IsCUDAGraphCapturing()) {
|
||||
#ifndef PADDLE_WITH_CUSTOM_DEVICE_METAX_GPU
|
||||
if (!phi::backends::gpu::IsCUDAGraphCapturing())
|
||||
#endif
|
||||
decoder_num_blocks_cpu.copy_(
|
||||
decoder_num_blocks_device, decoder_num_blocks_cpu.place(), false);
|
||||
}
|
||||
PADDLE_ENFORCE_GPU_SUCCESS(cudaMemsetAsync(
|
||||
decoder_chunk_size_device.data<int>(), 64, sizeof(int32_t), stream));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user