Skip to content

Instantly share code, notes, and snippets.

@Dounm
Created March 18, 2021 11:01
Show Gist options
  • Save Dounm/bafb9d42feb18fd697df7103537e0b2b to your computer and use it in GitHub Desktop.
Save Dounm/bafb9d42feb18fd697df7103537e0b2b to your computer and use it in GitHub Desktop.
Test if cudaMemcpyAsync(H2D, different_streams) is sequential
#include <stdio.h>
#define NUM_STREAMS 4
cudaError_t memcpyUsingStreams (float *fDest,
float *fSrc,
int iBytes,
cudaMemcpyKind eDirection)
{
cudaStream_t *pCuStream = NULL;
int iIndex = 0 ;
cudaError_t cuError = cudaSuccess ;
int iOffset = 0 ;
iOffset = (iBytes / NUM_STREAMS) ;
/*Creating streams if not present */
pCuStream = (cudaStream_t *) malloc(NUM_STREAMS * sizeof(cudaStream_t));
for (iIndex = 0 ; iIndex < NUM_STREAMS; iIndex++)
{
cuError = cudaStreamCreate (&pCuStream[iIndex]) ;
}
if (cuError != cudaSuccess)
{
cuError = cudaMemcpy (fDest, fSrc, iBytes, eDirection) ;
}
else
{
for (iIndex = 0 ; iIndex < NUM_STREAMS; iIndex++)
{
iOffset = iIndex * iOffset ;
cuError = cudaMemcpyAsync (fDest + iOffset , fSrc + iOffset, iBytes / NUM_STREAMS , eDirection, pCuStream[iIndex]) ;
}
}
if (NULL != pCuStream)
{
for (iIndex = 0 ; iIndex < NUM_STREAMS; iIndex++)
{
cuError = cudaStreamDestroy (pCuStream[iIndex]) ;
}
free (pCuStream) ;
}
return cuError ;
}
int main()
{
float *hdata = NULL ;
float *ddata = NULL ;
int i, j, k, index ;
const int bytes = 256 * 256 * 256 * 16;
cudaMallocHost ((void **) &hdata, sizeof (float) * bytes) ;
cudaMalloc ((void **) &ddata, sizeof (float) * bytes) ;
for (i=0 ; i< 256 ; i++)
{
for (j=0; j< 256; j++)
{
for (k=0; k< 256 ; k++)
{
index = (((i * 256) + j) * 256) + k;
hdata [index] = index ;
}
}
}
memcpyUsingStreams (ddata, hdata, sizeof (float) * bytes, cudaMemcpyHostToDevice) ;
cudaFree (ddata) ;
cudaFreeHost (hdata) ;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment