2#include "math/parallelacc.h"
3#include "math/parallelcu.h"
4#include "tool/externfunc.h"
17 return TINKER_FCALL2(acc1, cu1,
reduceSum, gpu_a, nelem, queue);
27template <
class HT,
size_t HN,
class DPTR>
28void reduceSum2(HT (&h_ans)[HN], DPTR v,
size_t nelem,
int queue)
31 TINKER_FCALL2(acc1, cu1,
reduceSum2, h_ans, v, nelem, queue);
69template <
class HT,
size_t HN,
class DPTR>
82T
dotProd(
const T* a,
const T* b,
size_t nelem,
int queue)
85 return TINKER_FCALL1(acc1, cu0,
dotProd, a, b, nelem, queue);
91void dotProd(T* ans,
const T* a,
const T* b,
size_t nelem,
int queue)
94 TINKER_FCALL2(acc1, cu1,
dotProd, ans, a, b, nelem, queue);
105 TINKER_FCALL2(acc1, cu1,
scaleArray, dst, scal, nelem, queue);
void reduceSumOnDevice(T *dp_ans, const T *a, size_t nelem, int queue)
Sum over all of the elements of an 1D array. This routine will save the result on the device memory i...
Definition: parallel.h:46
T dotProd(const T *a, const T *b, size_t nelem, int queue)
Dot product of two linear arrays.
Definition: parallel.h:82
T reduceSum(const T *gpu_a, size_t nelem, int queue)
Sum over all of the elements of an 1D array.
Definition: parallel.h:14
void reduceSum2(HT(&h_ans)[HN], DPTR v, size_t nelem, int queue)
Sum over all of the elements of a 2D array.
Definition: parallel.h:28
void reduceSum2OnDevice(HT(&dref)[HN], DPTR v, size_t nelem, int queue)
Sum over all of the elements of a 2D array. This routine will save the result on the device memory in...
Definition: parallel.h:70
void scaleArray(T *dst, T scal, size_t nelem, int queue)
Multiply all of the elements in an 1D array by a scalar.
Definition: parallel.h:102