-
Notifications
You must be signed in to change notification settings - Fork 1
/
matrix_cuda.h
51 lines (40 loc) · 1.28 KB
/
matrix_cuda.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#ifndef _MATRIX_CUDA_H_
#define _MATRIX_CUDA_H_
// Internal Includes
#include "matrix.h"
// Library Includes
#include "cutil_inline.h"
// Standard Includes
#include <iostream>
// Allocate a device matrix of same size as M.
inline Matrix AllocateDeviceMatrix(const Matrix M)
{
Matrix Mdevice = M;
int size = M.width * M.height * sizeof(float);
cudaError_t cu_err;
cu_err = cudaMalloc((void**)&Mdevice.elements, size);
if (cudaSuccess != cu_err)
{
std::cout << "Unable to cudaMalloc!" << std::endl;
exit(0);
}
cudaMemset(&Mdevice.elements, 0, size); //initializes values to zero
return Mdevice;
}
// Copy a host matrix to a device matrix.
inline void CopyToDeviceMatrix(Matrix Mdevice, const Matrix Mhost)
{
int size = Mhost.width * Mhost.height * sizeof(float);
Mdevice.height = Mhost.height;
Mdevice.width = Mhost.width;
Mdevice.pitch = Mhost.pitch;
Mdevice.quantization = Mhost.quantization;
cutilSafeCall( cudaMemcpy(Mdevice.elements, Mhost.elements, size, cudaMemcpyHostToDevice));
}
// Copy a device matrix to a host matrix.
inline void CopyFromDeviceMatrix(Matrix Mhost, const Matrix Mdevice)
{
int size = Mdevice.width * Mdevice.height * sizeof(float);
cutilSafeCall(cudaMemcpy(Mhost.elements, Mdevice.elements, size, cudaMemcpyDeviceToHost));
}
#endif // _MATRIX_CUDA_H_