#include "mex.h" #include "gpu/mxGPUArray.h" #define BLOCKDIM 256 void __global__ TimesTwo(float const * const A, float const * const B, float * const C, int const N) { int const tid = blockDim.x * blockIdx.x + threadIdx.x; if (tid < N) { C[tid] = A[tid] + B[tid]; } } void mexFunction(int nlhs, mxArray *plhs[], int nrhs, mxArray const *prhs[]) { mxGPUArray const *A; mxGPUArray const *B; mxGPUArray *C; float const *d_A; float const *d_B; float *d_C; int N; int gridDim; char const * const errId = "parallel:gpu:mexGPUExample:InvalidInput"; char const * const errMsg = "Invalid input to MEX file."; mxInitGPU(); // Check if parameters are 2 GPU Arrays. if ((nrhs!=2) || !(mxIsGPUArray(prhs[0])) || !(mxIsGPUArray(prhs[1]))) { mexErrMsgIdAndTxt(errId, errMsg); } A = mxGPUCreateFromMxArray(prhs[0]); B = mxGPUCreateFromMxArray(prhs[1]); // Check if input are of type double if ((mxGPUGetClassID(A) != mxSINGLE_CLASS) || (mxGPUGetClassID(B) != mxSINGLE_CLASS)) { mexErrMsgIdAndTxt(errId, errMsg); } d_A = (float const *)(mxGPUGetDataReadOnly(A)); d_B = (float const *)(mxGPUGetDataReadOnly(B)); C = mxGPUCreateGPUArray(mxGPUGetNumberOfDimensions(A), mxGPUGetDimensions(A), mxGPUGetClassID(A), mxGPUGetComplexity(A), MX_GPU_DO_NOT_INITIALIZE); d_C = (float *)(mxGPUGetData(C)); N = (int)(mxGPUGetNumberOfElements(A)); gridDim = (N + BLOCKDIM - 1) / BLOCKDIM; TimesTwo<<>>(d_A, d_B, d_C, N); plhs[0] = mxGPUCreateMxArrayOnGPU(C); mxGPUDestroyGPUArray(A); mxGPUDestroyGPUArray(B); mxGPUDestroyGPUArray(C); }