Created
April 23, 2019 14:13
-
-
Save mcleary/5915b184ada922d6739710d1ad54e575 to your computer and use it in GitHub Desktop.
Very simple OpenCL application.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <vector> | |
#define CL_HPP_TARGET_OPENCL_VERSION 200 | |
#define CL_HPP_ENABLE_EXCEPTIONS | |
#include <CL/cl2.hpp> | |
using namespace std; | |
int main() { | |
std::string kernelsSrc{ R"CLC( | |
kernel void add( | |
global float* restrict a, | |
global float* restrict b, | |
global float* restrict c, | |
size_t N, | |
float A, | |
float B, | |
float C | |
) | |
{ | |
const size_t i = get_global_id(0); | |
if (i < N) | |
{ | |
// Do some stupid calculations | |
for (int t = 0; t < 50; ++t) | |
c[i] = A * sin(a[i]) + B * cos(b[i]) + sqrt(A * cos(a[i]) * B * sin(b[i])); | |
c[i] /= C * tan(c[i]) + 1; | |
} | |
} | |
)CLC" }; | |
vector<cl::Platform> platforms; | |
cl::Platform::get(&platforms); | |
const size_t platformIndexToUse = 0; | |
const size_t deviceIndexToUse = 0; | |
cl::Platform platform; | |
cl::Device device; | |
for (size_t platIndex = 0; platIndex < platforms.size(); ++platIndex) | |
{ | |
const cl::Platform& plat = platforms.at(platIndex); | |
cout << "[" << platIndex << "]: " << plat.getInfo<CL_PLATFORM_NAME>() << endl; | |
vector<cl::Device> devices; | |
plat.getDevices(CL_DEVICE_TYPE_ALL, &devices); | |
for (size_t devIndex = 0; devIndex < devices.size(); ++devIndex) | |
{ | |
const cl::Device& dev = devices.at(devIndex); | |
cout << "\t[" << devIndex << "]: " << dev.getInfo<CL_DEVICE_NAME>() << endl; | |
} | |
if (platformIndexToUse == platIndex) | |
{ | |
platform = plat; | |
device = devices.at(deviceIndexToUse); | |
} | |
} | |
cout << endl; | |
cout << "Running on " << device.getInfo<CL_DEVICE_NAME>() << endl; | |
cl_command_queue_properties props = CL_QUEUE_PROFILING_ENABLE; | |
cl::Context context{ device }; | |
cl::CommandQueue queue{ context, props }; | |
cl::Program program{ context, kernelsSrc }; | |
try | |
{ | |
program.build(); | |
} | |
catch (const cl::Error& err) | |
{ | |
cout << err.what() << endl; | |
for (auto p : program.getBuildInfo<CL_PROGRAM_BUILD_LOG>()) | |
{ | |
cout << p.second << endl; | |
} | |
exit(1); | |
} | |
float cA = 1234; | |
float cB = 4321; | |
float cC = 5678; | |
auto addKernel = cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer, size_t, float, float, float>(program, "add"); | |
// size_t N = 100'000'000; | |
size_t N = 10'000; | |
cout << "Initializing ... " << flush; | |
vector<float> A(N, 123); | |
vector<float> B(N, 111); | |
vector<float> C(N); | |
cout << "done" << endl; | |
const size_t bufSize = A.size() * sizeof(float); | |
cl::Buffer bufA{ context, CL_MEM_READ_ONLY, bufSize }; | |
cl::Buffer bufB{ context, CL_MEM_READ_ONLY, bufSize }; | |
cl::Buffer bufC{ context, CL_MEM_WRITE_ONLY, bufSize }; | |
cl::copy(queue, begin(A), end(A), bufA); | |
cl::copy(queue, begin(B), end(B), bufB); | |
cout << "Copying ... "; | |
queue.finish(); | |
cout << "done" << endl; | |
for (int i = 0; i < 5; ++i) | |
{ | |
cl::Event e = addKernel(cl::EnqueueArgs{ queue, cl::NDRange{A.size()} }, bufA, bufB, bufC, A.size(), cA, cB, cC); | |
e.wait(); | |
cout << "Queued : " << e.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>() << endl; | |
cout << "Submit : " << e.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>() << endl; | |
cout << "Start : " << e.getProfilingInfo<CL_PROFILING_COMMAND_START>() << endl; | |
cout << "End : " << e.getProfilingInfo<CL_PROFILING_COMMAND_END>() << endl; | |
cout << endl; | |
} | |
cout << "Copying back ..." << flush; | |
cl::copy(queue, bufC, begin(C), end(C)); | |
cout << "done" << endl; | |
cout << endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment