1. 测试代码 “sample.cu”
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include
#include
using namespace std;
__global__ void Add(int *a, int *b, int *c){
c[0] = a[0]+b[0];
}
int main(void) {
// Declare variables
int addResult;
int *devicePointerA;
int *devicePointerB;
int *devicePointerC;
//Create two integers. (Host)
int a = 5;
int b = 9;
// Allocate device memory for these two integers and their add result. (Device)
cudaMalloc(&devicePointerA, sizeof(int));
cudaMalloc(&devicePointerB, sizeof(int));
cudaMalloc(&devicePointerC, sizeof(int));
// Copy these two integers to the device memory. (Host)
cudaMemcpy(devicePointerA, &a, sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(devicePointerB, &b, sizeof(int), cudaMemcpyHostToDevice);
// Call the kernel to add these two integers together. (Device)
Add<<<1, 1>>>(devicePointerA, devicePointerB, devicePointerC);
// Copy the result back to the host memory. (Device)
cudaMemcpy(&addResult, devicePointerC, sizeof(int), cudaMemcpyDeviceToHost);
// Print out the result. (Host)
printf("The add result is %d \n", addResult);
// Free the device memory. (Device)
cudaFree(devicePointerA);
cudaFree(devicePointerB);
return 0;
}
2. 客户端(Laptop with GeForce 940MX inside)上测试
nvcc sample.cu -o sample
./sample
3. 服务器端(Server with Tesla P100 inside)上测试
nvcc sample.cu -o sample
./sample
注:nvcc编译器的使用语法类似gcc/g++
参考
http://blog.csdn.net/canhui_wang/article/details/51584862