CUDA内存拷贝

Love The Way You Lie 2022-08-08 03:29 242阅读 0赞

1、cudaMemcpy()<—> cudaMalloc() //线性内存拷贝

  1. 1 //线性内存拷贝
  2. 2 cudaMalloc((void**)&dev_A, data_size);
  3. 3 cudaMemcpy(dev_A, host_A, data_size, cudaMemcpyHostToDevice);

2、cudaMemcpy2D()<—>cudaMallocPitch() //线性内存拷贝

  1. cudaError_t cudaMemcpy2D(
  2. void * dst,
  3. size_t dpitch,
  4. const void * src,
  5. size_t spitch,
  6. size_t width,
  7. size_t height,
  8. enum cudaMemcpyKind kind
  9. )

例:

  1. 1 cudaMallocPitch((void**)&devPtr, &pitch, width * sizeof(float), height);
  2. 2 cudaMemcpy2D( void* dstsize_t dpitchconst void* srcsize_t spitchsize_t widthsize_t heightenum cudaMemcpyKind kind )

3、cudaMemcpy2DToArray()<—>cudaMallocArray() //(二维)线性内存到2维数组的拷贝

  1. 1 cudaError_t cudaMemcpy2DToArray (
  2. 2 struct cudaArray * dst,
  3. 3 size_t wOffset,
  4. 4 size_t hOffset,
  5. 5 const void * src,
  6. 6 size_t spitch,
  7. 7 size_t width,
  8. 8 size_t height,
  9. 9 enum cudaMemcpyKind kind
  10. 10 )

例:

  1. 1 void mv(float *y, float *A, float *x, int m, int n)
  2. 2 {
  3. 3 int blkNum = (m >> 4) + ((m & 15) ? 1 : 0);
  4. 4 int height = blkNum << 4;
  5. 5 int width = (n & 255) ? (((n >> 8) + 1) << 8) : n;
  6. 6 dim3 threads(16, 16);
  7. 7 dim3 grid(blkNum, 1);
  8. 8 cudaArray *d_A;
  9. 9 float *d_x, *d_y;
  10. 10
  11. 11 cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float4>();
  12. 12 cudaMallocArray(&d_A, &channelDesc, width >> 2, height);
  13. 13 cudaMemcpy2DToArray(d_A, 0, 0, A, n * sizeof(float), n * sizeof(float), m, cudaMemcpyHostToDevice);
  14. 14 cudaBindTextureToArray(texRefA, d_A);
  15. 15 cudaMalloc((void **) &d_x, n * sizeof(float));
  16. 16 cudaMalloc((void **) &d_y, m * sizeof(float));
  17. 17
  18. 18 cudaMemcpy(d_x, x, n * sizeof(float), cudaMemcpyHostToDevice);
  19. 19 mv_kernel<<< grid, threads >>>(d_y, d_A, d_x, m, n);
  20. 20 cudaMemcpy(y, d_y, m * sizeof(float), cudaMemcpyDeviceToHost);
  21. 21
  22. 22 cudaFree(d_y);
  23. 23 cudaFree(d_x);
  24. 24 cudaUnbindTexture(texRefA);
  25. 25 cudaFreeArray(d_A);
  26. 26 }

4、cudaMemcpyToArray()<—>cudaMallocArray() //(1维)线性内存到2维数组的拷贝

  1. 1 cudaError_t cudaMemcpyToArray(
  2. 2 struct cudaArray * dst,
  3. 3 size_t wOffset,
  4. 4 size_t hOffset,
  5. 5 const void * src,
  6. 6 size_t count,
  7. 7 enum cudaMemcpyKind kind
  8. 8 )

例:

  1. 1 void initCudaTexture(float *h_volume, float2 *velocity)
  2. 2 {
  3. 3 cudaChannelFormatDesc desc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
  4. 4
  5. 5 cudaMallocArray(&d_volumeArray, &desc, 128, 128);
  6. 6
  7. 7 cudaMemcpyToArray(d_volumeArray, 0, 0, h_volume, sizeof(float)*128*128, cudaMemcpyDeviceToDevice);
  8. 8
  9. 9 tex.normalized = true;
  10. 10 tex.filterMode = cudaFilterModeLinear;
  11. 11 tex.addressMode[0] = cudaAddressModeWrap;
  12. 12 tex.addressMode[1] = cudaAddressModeWrap;
  13. 13
  14. 14 cutilSafeCall(cudaBindTextureToArray(tex, d_volumeArray));
  15. 15
  16. 16 }

5、cudaMemcpy3D()<—>cudaMalloc3DArray() //(1维)线性内存到3维数组的拷贝

  1. 1 cudaError_t cudaMemcpy3D(const struct cudaMemcpy3DParms * p)
  2. 2
  3. 3 struct cudaExtent {
  4. 4 size_t width;
  5. 5 size_t height;
  6. 6 size_t depth;
  7. 7 };
  8. 8 struct cudaExtent make_cudaExtent(size_t w, size_t h, size_t d);
  9. 9
  10. 10 struct cudaPos {
  11. 11 size_t x;
  12. 12 size_t y;
  13. 13 size_t z;
  14. 14 };
  15. 15 struct cudaPos make_cudaPos(size_t x, size_t y, size_t z);
  16. 16
  17. 17 struct cudaMemcpy3DParms {
  18. 18 struct cudaArray *srcArray;
  19. 19 struct cudaPos srcPos;
  20. 20 struct cudaPitchedPtr srcPtr;
  21. 21 struct cudaArray *dstArray;
  22. 22 struct cudaPos dstPos;
  23. 23 struct cudaPitchedPtr dstPtr;
  24. 24 struct cudaExtent extent;
  25. 25 enum cudaMemcpyKind kind;
  26. 26 };

例:

  1. 1 void initCudaTexture(const uchar *h_volume, cudaExtent volumeSize)
  2. 2 {
  3. 3 cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar>();
  4. 4
  5. 5 cutilSafeCall(cudaMalloc3DArray(&d_volumeArray, &channelDesc, volumeSize));
  6. 6
  7. 7 cudaMemcpy3DParms copyParams = {
  8. 0};
  9. 8 copyParams.srcPtr = make_cudaPitchedPtr((void*)h_volume, volumeSize.width*sizeof(uchar), volumeSize.width, volumeSize.height);
  10. 9 copyParams.dstArray = d_volumeArray;
  11. 10 copyParams.extent = volumeSize;
  12. 11 copyParams.kind = cudaMemcpyHostToDevice;
  13. 12 cutilSafeCall(cudaMemcpy3D(&copyParams));
  14. 13
  15. 14 tex.normalized = true;
  16. 15 tex.filterMode = cudaFilterModeLinear;
  17. 16 tex.addressMode[0] = cudaAddressModeWrap;
  18. 17 tex.addressMode[1] = cudaAddressModeWrap;
  19. 18 tex.addressMode[2] = cudaAddressModeWrap;
  20. 19
  21. 20 cutilSafeCall(cudaBindTextureToArray(tex, d_volumeArray, channelDesc));
  22. 21 }

6、cudaMemcpyToSymbol() //拷贝到常数存储器

  1. 1 __constant__ float constData[256];
  2. 2 float data[256];
  3. 3 cudaMemcpyToSymbol(constData, data, sizeof(data));
  4. 4 cudaMemcpyFromSymbol(data, constData, sizeof(data));
  5. 5 __device__ float devData; float value = 3.14f;
  6. 6 cudaMemcpyToSymbol(devData, &value, sizeof(float));
  7. 7 __device__ float* devPointer; float* ptr;
  8. 8 cudaMalloc(&ptr, 256 * sizeof(float));
  9. 9 cudaMemcpyToSymbol(devPointer, &ptr, sizeof(ptr));

发表评论

表情:
评论列表 (有 0 条评论,242人围观)

还没有评论,来说两句吧...

相关阅读