/* * Copyright 1993-2007 NVIDIA Corporation. All rights reserved. * * NOTICE TO USER: * * This source code is subject to NVIDIA ownership rights under U.S. and * international Copyright laws. Users and possessors of this source code * are hereby granted a nonexclusive, royalty-free license to use this code * in individual and commercial software. * * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE * OR PERFORMANCE OF THIS SOURCE CODE. * * U.S. Government End Users. This source code is a "commercial item" as * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of * "commercial computer software" and "commercial computer software * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) * and is provided to the U.S. Government only as a commercial end item. * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the * source code with only those rights set forth herein. * * Any use of this source code in individual and commercial software must * include, in the user documentation and internal comments to the code, * the above Disclaimer and U.S. Government End Users Notice. */ /* Template project which demonstrates the basics on how to setup a project * example application. * Host code. */ // includes, system #include #include #include #include // includes, project #include //////////////////////////////////////////////////////////////////////////////// // declaration, forward extern "C" void runTest( int argc, char** argv); extern "C" void computeGold( float* reference, float* idata, const unsigned int len); #define SDATA( index) CUT_BANK_CHECKER(sdata, index) //////////////////////////////////////////////////////////////////////////////// //! Simple test kernel for device functionality //! @param g_idata input data in global memory //! @param g_odata output data in global memory //////////////////////////////////////////////////////////////////////////////// __global__ void testKernel( float* g_idata, float* g_odata) { // shared memory // the size is determined by the host application extern __shared__ float sdata[]; // access thread id const unsigned int tid = threadIdx.x; // access number of threads in this block const unsigned int num_threads = blockDim.x; // read in input data from global memory // use the bank checker macro to check for bank conflicts during host // emulation SDATA(tid) = g_idata[tid]; __syncthreads(); // perform some computations SDATA(tid) = (float) num_threads * SDATA( tid); __syncthreads(); // write data to global memory g_odata[tid] = SDATA(tid); } //////////////////////////////////////////////////////////////////////////////// //! Run a simple test for CUDA //////////////////////////////////////////////////////////////////////////////// void runTest( int argc, char** argv) { printf("RUNTEST\n"); CUT_DEVICE_INIT(argc, argv); unsigned int timer = 0; CUT_SAFE_CALL( cutCreateTimer( &timer)); CUT_SAFE_CALL( cutStartTimer( timer)); unsigned int num_threads = 32; unsigned int mem_size = sizeof( float) * num_threads; // allocate host memory float* h_idata = (float*) malloc( mem_size); // initalize the memory for( unsigned int i = 0; i < num_threads; ++i) { h_idata[i] = (float) i; } // allocate device memory float* d_idata; CUDA_SAFE_CALL( cudaMalloc( (void**) &d_idata, mem_size)); // copy host memory to device CUDA_SAFE_CALL( cudaMemcpy( d_idata, h_idata, mem_size, cudaMemcpyHostToDevice) ); // allocate device memory for result float* d_odata; CUDA_SAFE_CALL( cudaMalloc( (void**) &d_odata, mem_size)); // setup execution parameters dim3 grid( 1, 1, 1); dim3 threads( num_threads, 1, 1); // execute the kernel testKernel<<< grid, threads, mem_size >>>( d_idata, d_odata); // check if kernel execution generated and error CUT_CHECK_ERROR("Kernel execution failed"); // allocate mem for the result on host side float* h_odata = (float*) malloc( mem_size); // copy result from device to host CUDA_SAFE_CALL( cudaMemcpy( h_odata, d_odata, sizeof( float) * num_threads, cudaMemcpyDeviceToHost) ); CUT_SAFE_CALL( cutStopTimer( timer)); printf( "Processing time: %f (ms)\n", cutGetTimerValue( timer)); CUT_SAFE_CALL( cutDeleteTimer( timer)); // compute reference solution float* reference = (float*) malloc( mem_size); computeGold( reference, h_idata, num_threads); // check result if( cutCheckCmdLineFlag( argc, (const char**) argv, "regression")) { // write file for regression test CUT_SAFE_CALL( cutWriteFilef( "./data/regression.dat", h_odata, num_threads, 0.0)); } else { // custom output handling when no regression test running // in this case check if the result is equivalent to the expected soluion CUTBoolean res = cutComparef( reference, h_odata, num_threads); printf( "Test %s\n", (1 == res) ? "PASSED" : "FAILED"); } // cleanup memory free( h_idata); free( h_odata); free( reference); CUDA_SAFE_CALL(cudaFree(d_idata)); CUDA_SAFE_CALL(cudaFree(d_odata)); }