Tinker9 70bd052 (Thu Nov 9 12:11:35 2023 -0800)
Loading...
Searching...
No Matches
parallel.h
1#pragma once
2#include "math/parallelacc.h"
3#include "math/parallelcu.h"
4#include "tool/externfunc.h"
5#include "tool/macro.h"
6
7namespace tinker {
13template <class T>
14T reduceSum(const T* gpu_a, size_t nelem, int queue)
15{
16 // TINKER_FVOID2(acc1, cu1, T, reduceSum, const T*, size_t, int);
17 return TINKER_FCALL2(acc1, cu1, reduceSum, gpu_a, nelem, queue);
18}
19
27template <class HT, size_t HN, class DPTR>
28void reduceSum2(HT (&h_ans)[HN], DPTR v, size_t nelem, int queue)
29{
30 // TINKER_FVOID2(acc1, cu1, HT (&)[HN], DPTR, size_t, int);
31 TINKER_FCALL2(acc1, cu1, reduceSum2, h_ans, v, nelem, queue);
32}
33
45template <class T>
46void reduceSumOnDevice(T* dp_ans, const T* a, size_t nelem, int queue)
47{
48 // TINKER_FVOID2(acc1, cu1, reduceSumOnDevice, T*, const T*, size_t, int);
49 TINKER_FCALL2(acc1, cu1, reduceSumOnDevice, dp_ans, a, nelem, queue);
50}
51
69template <class HT, size_t HN, class DPTR>
70void reduceSum2OnDevice(HT (&dref)[HN], DPTR v, size_t nelem, int queue)
71{
72 // TINKER_FVOID2(cu, 1, acc, 1 reduceSum2OnDevice, ...);
73 TINKER_FCALL2(acc1, cu1, reduceSum2OnDevice, dref, v, nelem, queue);
74}
75
81template <class T>
82T dotProd(const T* a, const T* b, size_t nelem, int queue)
83{
84 // TINKER_FVOID1(acc1, cu0, T, dotProd, const T*, const T*, size_t, int);
85 return TINKER_FCALL1(acc1, cu0, dotProd, a, b, nelem, queue);
86}
87
90template <class T>
91void dotProd(T* ans, const T* a, const T* b, size_t nelem, int queue)
92{
93 // TINKER_FVOID2(acc1, cu1, dotProd, T*, const T*, const T*, size_t, int);
94 TINKER_FCALL2(acc1, cu1, dotProd, ans, a, b, nelem, queue);
95}
96
101template <class T>
102void scaleArray(T* dst, T scal, size_t nelem, int queue)
103{
104 // TINKER_FVOID2(acc1, cu1, scaleArray, T*, T, size_t, int);
105 TINKER_FCALL2(acc1, cu1, scaleArray, dst, scal, nelem, queue);
106}
107}
void reduceSumOnDevice(T *dp_ans, const T *a, size_t nelem, int queue)
Sum over all of the elements of an 1D array. This routine will save the result on the device memory i...
Definition: parallel.h:46
T dotProd(const T *a, const T *b, size_t nelem, int queue)
Dot product of two linear arrays.
Definition: parallel.h:82
T reduceSum(const T *gpu_a, size_t nelem, int queue)
Sum over all of the elements of an 1D array.
Definition: parallel.h:14
void reduceSum2(HT(&h_ans)[HN], DPTR v, size_t nelem, int queue)
Sum over all of the elements of a 2D array.
Definition: parallel.h:28
void reduceSum2OnDevice(HT(&dref)[HN], DPTR v, size_t nelem, int queue)
Sum over all of the elements of a 2D array. This routine will save the result on the device memory in...
Definition: parallel.h:70
void scaleArray(T *dst, T scal, size_t nelem, int queue)
Multiply all of the elements in an 1D array by a scalar.
Definition: parallel.h:102
Definition: testrt.h:9