8template <
class HT,
size_t HN,
class DPTR>
14template <
class HT,
size_t HN,
class DPTR>
18T
dotProd_acc(
const T* a,
const T* b,
size_t nelem,
int queue);
21void dotProd_acc(T* ans,
const T* a,
const T* b,
size_t nelem,
int queue);
void reduceSum2_acc(HT(&h_ans)[HN], DPTR v, size_t nelem, int queue)
void reduceSumOnDevice_acc(T *, const T *, size_t, int)
T dotProd_acc(const T *a, const T *b, size_t nelem, int queue)
void scaleArray_acc(T *dst, T scal, size_t nelem, int queue)
T reduceSum_acc(const T *gpu_a, size_t nelem, int queue)
void reduceSum2OnDevice_acc(HT(&)[HN], DPTR, size_t, int)