8template <
class HT,
size_t HN,
class DPTR>
9void reduceSum2_cu(HT (&h_ans)[HN], DPTR v,
size_t nelem,
int queue);
14template <
class HT,
size_t HN,
class DPTR>
18void dotProd_cu(T* ans,
const T* a,
const T* b,
size_t nelem,
int queue);
void dotProd_cu(T *ans, const T *a, const T *b, size_t nelem, int queue)
void reduceSum2_cu(HT(&h_ans)[HN], DPTR v, size_t nelem, int queue)
void reduceSumOnDevice_cu(T *, const T *, size_t, int)
void scaleArray_cu(T *dst, T scal, size_t nelem, int queue)
void reduceSum2OnDevice_cu(HT(&)[HN], DPTR, size_t, int)
T reduceSum_cu(const T *a, size_t nelem, int queue)