-
-
Save ddemidov/499e37801a326bd17a04feac1843ba0b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <vector> | |
#include <array> | |
#include <iostream> | |
#include <chrono> | |
#include <cassert> | |
#include <cmath> | |
#include <omp.h> | |
template< | |
class TContainerType, | |
class TIteratorType=decltype(std::declval<TContainerType>().begin()), | |
int TMaxThreads=128 | |
> | |
class BlockPartition | |
{ | |
public: | |
/** @param it_begin - iterator pointing at the beginning of the container | |
* @param it_end - iterator pointing to the end of the container | |
* @param Nchunks - number of threads to be used in the loop (must be lower than TMaxThreads) | |
*/ | |
BlockPartition(TIteratorType it_begin, | |
TIteratorType it_end, | |
int Nchunks = omp_get_max_threads()) | |
{ | |
//KRATOS_ERROR_IF(Nchunks < 1) << "Number of chunks must be > 0 (and not " << Nchunks << ")" << std::endl; | |
const std::ptrdiff_t size_container = it_end-it_begin; | |
if (size_container == 0) { | |
mNchunks = Nchunks; | |
} else { | |
// in case the container is smaller than the number of chunks | |
mNchunks = std::min(static_cast<int>(size_container), Nchunks); | |
} | |
const std::ptrdiff_t block_partition_size = size_container / mNchunks; | |
mBlockPartition[0] = it_begin; | |
mBlockPartition[mNchunks] = it_end; | |
for (int i=1; i<mNchunks; i++) { | |
mBlockPartition[i] = mBlockPartition[i-1] + block_partition_size; | |
} | |
} | |
/** @param rData - the continer to be iterated upon | |
* @param Nchunks - number of threads to be used in the loop (must be lower than TMaxThreads) | |
*/ | |
BlockPartition(TContainerType &&rData, int Nchunks = omp_get_max_threads()) | |
: BlockPartition(rData.begin(), rData.end(), Nchunks) | |
{} | |
virtual ~BlockPartition() = default; | |
/** @brief simple iteration loop. f called on every entry in rData | |
* @param f - must be a unary function accepting as input TContainerType::value_type& | |
*/ | |
template <class TUnaryFunction> | |
inline void for_each(TUnaryFunction&& f) | |
{ | |
#pragma omp parallel for | |
for (int i=0; i<mNchunks; ++i) { | |
for (auto it = mBlockPartition[i]; it != mBlockPartition[i+1]; ++it) { | |
f(*it); //note that we pass the value to the function, not the iterator | |
} | |
} | |
} | |
/** @brief loop allowing reductions. f called on every entry in rData | |
* the function f needs to return the values to be used by the reducer | |
* @param TReducer template parameter specifying the reduction operation to be done | |
* @param f - must be a unary function accepting as input TContainerType::value_type& | |
*/ | |
template <class TReducer, class TUnaryFunction> | |
inline typename TReducer::value_type for_each(TUnaryFunction &&f) | |
{ | |
TReducer global_reducer; | |
#pragma omp parallel for | |
for (int i=0; i<mNchunks; ++i) { | |
TReducer local_reducer; | |
for (auto it = mBlockPartition[i]; it != mBlockPartition[i+1]; ++it) { | |
local_reducer.LocalReduce(f(*it)); | |
} | |
global_reducer.ThreadSafeReduce(local_reducer); | |
} | |
return global_reducer.GetValue(); | |
} | |
/** @brief loop with thread local storage (TLS). f called on every entry in rData | |
* @param TThreadLocalStorage template parameter specifying the thread local storage | |
* @param f - must be a function accepting as input TContainerType::value_type& and the thread local storage | |
*/ | |
template <class TThreadLocalStorage, class TFunction> | |
inline void for_each(const TThreadLocalStorage& rThreadLocalStoragePrototype, TFunction &&f) | |
{ | |
static_assert(std::is_copy_constructible<TThreadLocalStorage>::value, "TThreadLocalStorage must be copy constructible!"); | |
#pragma omp parallel | |
{ | |
// copy the prototype to create the thread local storage | |
TThreadLocalStorage thread_local_storage(rThreadLocalStoragePrototype); | |
#pragma omp for | |
for(int i=0; i<mNchunks; ++i){ | |
for (auto it = mBlockPartition[i]; it != mBlockPartition[i+1]; ++it){ | |
f(*it, thread_local_storage); // note that we pass the value to the function, not the iterator | |
} | |
} | |
} | |
} | |
/** @brief loop with thread local storage (TLS) allowing reductions. f called on every entry in rData | |
* the function f needs to return the values to be used by the reducer | |
* @param TReducer template parameter specifying the reduction operation to be done | |
* @param TThreadLocalStorage template parameter specifying the thread local storage | |
* @param f - must be a function accepting as input TContainerType::value_type& and the thread local storage | |
*/ | |
template <class TReducer, class TThreadLocalStorage, class TFunction> | |
inline typename TReducer::value_type for_each(const TThreadLocalStorage& rThreadLocalStoragePrototype, TFunction &&f) | |
{ | |
static_assert(std::is_copy_constructible<TThreadLocalStorage>::value, "TThreadLocalStorage must be copy constructible!"); | |
TReducer global_reducer; | |
#pragma omp parallel | |
{ | |
// copy the prototype to create the thread local storage | |
TThreadLocalStorage thread_local_storage(rThreadLocalStoragePrototype); | |
#pragma omp for | |
for (int i=0; i<mNchunks; ++i) { | |
TReducer local_reducer; | |
for (auto it = mBlockPartition[i]; it != mBlockPartition[i+1]; ++it) { | |
local_reducer.LocalReduce(f(*it, thread_local_storage)); | |
} | |
global_reducer.ThreadSafeReduce(local_reducer); | |
} | |
} | |
return global_reducer.GetValue(); | |
} | |
private: | |
int mNchunks; | |
std::array<TIteratorType, TMaxThreads> mBlockPartition; | |
}; | |
/** @brief simplified version of the basic loop (without reduction) to enable template type deduction | |
* @param v - containers to be looped upon | |
* @param func - must be a unary function accepting as input TContainerType::value_type& | |
*/ | |
template <class TContainerType, class TFunctionType> | |
void block_for_each(TContainerType &&v, TFunctionType &&func) | |
{ | |
BlockPartition<TContainerType>(v.begin(), v.end()).for_each(std::forward<TFunctionType>(func)); | |
} | |
int main() { | |
std::vector<double> a{1.0,2.0,3.0}; | |
const auto& const_a = a; | |
block_for_each(a, [&](double& item) | |
{ | |
std::cout << item << std::endl; | |
}); | |
block_for_each(const_a, [&](const double& item) | |
{ | |
std::cout << item << std::endl; | |
}); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment