#include "cuda_runtime.h" #include "device_launch_parameters.h" #include #include using namespace std; #define N 100 #define BLOCKDIM 10 __host__ void addOnGPU(int* a, int* b, int* c); __host__ void addOnCPU(int* a, int* b, int* c); __global__ void addKernel(int* a, int* b, int* c); int main(int argc, char** argv) { int i; int* a = new int[N]; int* b = new int[N]; int* c = new int[N]; for (i = 0; i> >(dev_a, dev_b, dev_c); cout << cudaGetErrorString(cudaGetLastError()) << endl; cudaDeviceSynchronize(); cout << "Kernel Excution time: " << clock() - start_kernel << endl; cudaMemcpy(c, dev_c, N * sizeof(int), cudaMemcpyDeviceToHost); cudaFree(dev_a); cudaFree(dev_b); cudaFree(dev_c); cout << "Full GPU time: " << clock() - start_gpu_full << endl; int correct = 1; for (int i = 0; i