Skip to content

Instantly share code, notes, and snippets.

@nattoheaven
Last active August 29, 2015 13:57
Show Gist options
  • Save nattoheaven/9369884 to your computer and use it in GitHub Desktop.
Save nattoheaven/9369884 to your computer and use it in GitHub Desktop.
Test for HSA Example
#include <iostream>
#include <string>
#include <ctime>
#define _mm_malloc(a, b) _aligned_malloc(a, b)
#include <CL/cl.h>
const char triad_kernel[] =
"__kernel void\n"
"triad(__global float *c,\n"
"__global const float *a,\n"
"__global const float *b)\n"
"{\n"
"size_t i;\n"
"i = get_global_id(0);\n"
"c[i] += a[i] * b[i];\n"
"}\n";
int
main()
{
cl_int clerr;
cl_uint nplatforms;
clerr = clGetPlatformIDs(0, 0, &nplatforms);
cl_platform_id *platforms = new cl_platform_id[nplatforms];
clerr = clGetPlatformIDs(nplatforms, platforms, 0);
for (cl_uint i = 0; i < nplatforms; ++i) {
std::cout << "PLATFORM:\t" << i << std::endl;
cl_platform_info param_names[] = {
CL_PLATFORM_PROFILE,
CL_PLATFORM_VERSION,
CL_PLATFORM_NAME,
CL_PLATFORM_VENDOR,
CL_PLATFORM_EXTENSIONS,
};
const size_t nparam_names = sizeof(param_names) / sizeof(cl_platform_info);
for (size_t j = 0; j < nparam_names; ++j) {
size_t param_value_size;
clerr = clGetPlatformInfo(platforms[i],
param_names[j],
0,
0,
&param_value_size);
char *param_value = new char[param_value_size];
clerr = clGetPlatformInfo(platforms[i],
param_names[j],
param_value_size,
param_value,
0);
std::cout << "\t" << param_value << std::endl;
delete[] param_value;
}
cl_uint ndevices;
clerr = clGetDeviceIDs(platforms[i],
CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_GPU,
0,
0,
&ndevices);
cl_device_id *devices = new cl_device_id[ndevices];
clerr = clGetDeviceIDs(platforms[i],
CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_GPU,
ndevices,
devices,
0);
for (cl_uint j = 0; j < ndevices; ++j) {
cl_device_type type;
clerr = clGetDeviceInfo(devices[j],
CL_DEVICE_TYPE,
sizeof(type),
&type,
0);
std::cout << "\tDEVICE:\t" << j << ":\t";
switch (type) {
case CL_DEVICE_TYPE_CPU:
std::cout << "CPU" << std::endl;
break;
case CL_DEVICE_TYPE_GPU:
std::cout << "GPU" << std::endl;
break;
default:
std::cout << "UNKNOWN" << std::endl;
break;
}
}
cl_context_properties context_properties[] = {
CL_CONTEXT_PLATFORM,
(cl_context_properties) platforms[i],
0
};
cl_context context = clCreateContext(context_properties,
ndevices,
devices,
0,
0,
&clerr);
const char *program_sources[] = { triad_kernel };
const size_t program_lengths[] = { sizeof(triad_kernel) };
cl_program program = clCreateProgramWithSource(context,
1,
program_sources,
program_lengths,
&clerr);
clerr = clBuildProgram(program,
ndevices,
devices,
0,
0,
0);
cl_command_queue *queues = new cl_command_queue[ndevices];
for (cl_uint j = 0; j < ndevices; ++j) {
size_t param_value_size;
clerr = clGetProgramBuildInfo(program,
devices[j],
CL_PROGRAM_BUILD_LOG,
0,
0,
&param_value_size);
char *param_value = new char[param_value_size];
clerr = clGetProgramBuildInfo(program,
devices[j],
CL_PROGRAM_BUILD_LOG,
param_value_size,
param_value,
0);
std::cout << param_value << std::endl;
delete[] param_value;
queues[j] = clCreateCommandQueue(context,
devices[j],
0,
&clerr);
}
const size_t n = 0x04000000;
const size_t alignment = 4096;
float *a =
reinterpret_cast<float *>(_mm_malloc(n * sizeof(float), alignment));
float *b =
reinterpret_cast<float *>(_mm_malloc(n * sizeof(float), alignment));
float *c =
reinterpret_cast<float *>(_mm_malloc(n * sizeof(float), alignment));
for (ptrdiff_t j = 0; j < n; ++j) {
a[j] = 1.0f;
b[j] = 2.0f;
c[j] = 0.0f;
}
cl_mem cl_a = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
n * sizeof(float),
a,
&clerr);
cl_mem cl_b = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
n * sizeof(float),
b,
&clerr);
cl_mem cl_c = clCreateBuffer(context,
CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
n * sizeof(float),
c,
&clerr);
cl_kernel kernel = clCreateKernel(program,
"triad",
&clerr);
clerr = clSetKernelArg(kernel,
0,
sizeof(cl_c),
&cl_c);
clerr = clSetKernelArg(kernel,
1,
sizeof(cl_a),
&cl_a);
clerr = clSetKernelArg(kernel,
2,
sizeof(cl_b),
&cl_b);
const int niter = 300;
unsigned long long *times = new unsigned long long[ndevices + 1];
for (cl_uint j = 0; j < ndevices + 1; ++j) {
times[j] = 0;
}
time_t time0;
time0 = time(0);
for (int iter = 0; iter < niter; ++iter) {
for (cl_uint j = 0; j < ndevices; ++j) {
time_t time1 = time0;
cl_event event;
clerr = clEnqueueNDRangeKernel(queues[j],
kernel,
1,
0,
&n,
0,
0,
0,
&event);
clerr = clWaitForEvents(1,
&event);
time0 = time(0);
times[j] += time0 - time1;
}
{
time_t time1 = time0;
#pragma omp parallel for
for (ptrdiff_t j = 0; j < n; ++j) {
c[j] += a[j] * b[j];
}
time0 = time(0);
times[ndevices] += time0 - time1;
}
}
std::cout << "Inter-Device Accesses" << std::endl;
for (cl_uint j = 0; j < ndevices + 1; ++j) {
std::cout << times[j] << "\tseconds" << std::endl;
double gflops = 3.0e-9 * n * niter / times[j];
std::cout << gflops << "\tGFLOPS" << std::endl;
double gbs = 3.0e-9 * n * niter * sizeof(float) / times[j];
std::cout << gbs << "\tGB/s" << std::endl;
}
for (cl_uint j = 0; j < ndevices + 1; ++j) {
times[j] = 0;
}
time0 = time(0);
for (cl_uint j = 0; j < ndevices; ++j) {
for (int iter = 0; iter < niter; ++iter) {
time_t time1 = time0;
cl_event event;
clerr = clEnqueueNDRangeKernel(queues[j],
kernel,
1,
0,
&n,
0,
0,
0,
&event);
clerr = clWaitForEvents(1,
&event);
time0 = time(0);
times[j] += time0 - time1;
}
}
{
for (int iter = 0; iter < niter; ++iter) {
time_t time1 = time0;
#pragma omp parallel for
for (ptrdiff_t j = 0; j < n; ++j) {
c[j] += a[j] * b[j];
}
time0 = time(0);
times[ndevices] += time0 - time1;
}
}
std::cout << "Intra-Device Accesses" << std::endl;
for (cl_uint j = 0; j < ndevices + 1; ++j) {
std::cout << times[j] << "\tseconds" << std::endl;
double gflops = 3.0e-9 * n * niter / times[j];
std::cout << gflops << "\tGFLOPS" << std::endl;
double gbs = 3.0e-9 * n * niter * sizeof(float) / times[j];
std::cout << gbs << "\tGB/s" << std::endl;
}
double sum = 0.0;
for (ptrdiff_t j = 0; j < n; ++j) {
sum += c[j];
}
std::cout << sum << std::endl;
std::cout << 2.0 * n * niter * (ndevices + 1) * 2 << std::endl;
}
std::string wait;
std::cin >> wait;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment