Tinker9 70bd052 (Thu Nov 9 12:11:35 2023 -0800)
Loading...
Searching...
No Matches
sinhc.h
1#pragma once
2#include "math/libfunc.h"
3#include "seq/seq.h"
4#include "tool/macro.h"
5
6// sinhc on host
7namespace tinker {
10template <class T>
11T sinhc(T x)
12{
13 T y = std::fabs(x);
14 if (y <= (T)1.0e-8)
15 return (T)1;
16 else
17 return std::sinh(y) / y;
18}
19}
20
21// sinhc on device
22namespace tinker {
23inline namespace v1 {
24#pragma acc routine seq
25template <int N>
27inline void fsinhcCheck()
28{
29 static_assert(1 <= N && N <= 7, "1 <= N <= 7 is violated.");
30}
31
32#pragma acc routine seq
33template <int N, class T>
35inline T fsinhcPade44(T x2)
36{
37 fsinhcCheck<N>();
38 constexpr int M = N - 1;
39 // Pade coefficients
40 // clang-format off
41 constexpr T c[][2][3] = {
42 {{1./1 , 53./396 , 551./166320 },{1.,-13./396 , 5./11088 }},
43 {{1./3 ,211./8580 ,2647./6486480 },{1.,-15./572 ,17./61776 }},
44 {{1./15 ,271./81900 , 7./171600 },{1.,-17./780 ,19./102960}},
45 {{1./105 ,113./321300 ,1889./551350800 },{1.,-19./1020, 7./53040 }},
46 {{1./945 , 83./2686068 ,7789./31426995600 },{1.,-21./1292,23./232560}},
47 {{1./10395 ,499./215675460 ,3461./219988969200},{1.,-23./1596,25./325584}},
48 {{1./135135,197./1305404100, 409./459976935600},{1.,-25./1932, 3./48944 }}};
49 // clang-format on
50 return (c[M][0][0] + x2 * (c[M][0][1] + x2 * c[M][0][2]))
51 / (c[M][1][0] + x2 * (c[M][1][1] + x2 * c[M][1][2]));
52}
53
54#pragma acc routine seq
55template <int N, class T>
57inline T fsinhcTaylor(T x2)
58{
59 fsinhcCheck<N>();
60 constexpr int M = N - 1;
61 // clang-format off
62 constexpr T c[][5] = {
63 {1. / 1 , 1. / 6 , 1. / 20, 1. / 42 , 1. / 72 },
64 {1. / 3 , 1. / 10, 1. / 28, 1. / 54 , 1. / 88 },
65 {1. / 15 , 1. / 14, 1. / 36, 1. / 66 , 1. / 104},
66 {1. / 105 , 1. / 18, 1. / 44, 1. / 78 , 1. / 120},
67 {1. / 945 , 1. / 22, 1. / 52, 1. / 90 , 1. / 136},
68 {1. / 10395 , 1. / 26, 1. / 60, 1. / 102, 1. / 152},
69 {1. / 135135, 1. / 30, 1. / 68, 1. / 114, 1. / 168}};
70 return c[M][0]*(1+x2*c[M][1]*(1+x2*c[M][2]*(1+x2*c[M][3]*(1+x2*c[M][4]))));
71 // clang-format on
72}
73
74#pragma acc routine seq
75template <int N, class T>
77inline T fsinhcSeries(T x2)
78{
79 return fsinhcTaylor<N>(x2);
80 // return fsinhcPade44<N>(x2);
81}
82
83#pragma acc routine seq
84template <class T>
86inline T fsinhcAnalyt7(T d, T d13, T y /* exp(-d) */, T z /* exp(+d) */)
87{
88 T cy, cz;
89 cy = d * (d * (d * (d * (d * (d + 21) + 210) + 1260) + 4725) + 10395)
90 + 10395;
91 cy = -cy;
92 cz = d * (d * (d * (d * (d * (d - 21) + 210) - 1260) + 4725) - 10395)
93 + 10395;
94 return (cy * y + cz * z) / (2 * d13);
95}
96
97#pragma acc routine seq
98template <class T>
100inline T fsinhcAnalyt6(T d, T d11, T y /* exp(-d) */, T z /* exp(+d) */)
101{
102 T cy, cz;
103 cy = d * (d * (d * (d * (d + 15) + 105) + 420) + 945) + 945;
104 cz = d * (d * (d * (d * (d - 15) + 105) - 420) + 945) - 945;
105 return (cy * y + cz * z) / (2 * d11);
106}
107
108#pragma acc routine seq
109template <class T>
111inline T fsinhcAnalyt5(T d, T d9, T y /* exp(-d) */, T z /* exp(+d) */)
112{
113 T cy, cz;
114 cy = d * (d * (d * (d + 10) + 45) + 105) + 105;
115 cy = -cy;
116 cz = d * (d * (d * (d - 10) + 45) - 105) + 105;
117 return (cy * y + cz * z) / (2 * d9);
118}
119
120#pragma acc routine seq
121template <class T>
123inline T fsinhcAnalyt4(T d, T d7, T y /* exp(-d) */, T z /* exp(+d) */)
124{
125 T cy, cz;
126 cy = d * (d * (d + 6) + 15) + 15;
127 cz = d * (d * (d - 6) + 15) - 15;
128 return (cy * y + cz * z) / (2 * d7);
129}
130
131#pragma acc routine seq
132template <class T>
134inline T fsinhcAnalyt3(T d, T d5, T y /* exp(-d) */, T z /* exp(+d) */)
135{
136 T cy, cz;
137 cy = (d + 3) * d + 3;
138 cy = -cy;
139 cz = (d - 3) * d + 3;
140 return (cy * y + cz * z) / (2 * d5);
141}
142
143#pragma acc routine seq
144template <class T>
146inline T fsinhcAnalyt2(T d, T d3, T y /* exp(-d) */, T z /* exp(+d) */)
147{
148 T cy, cz;
149 cy = d + 1;
150 cz = d - 1;
151 return (cy * y + cz * z) / (2 * d3);
152}
153
154#pragma acc routine seq
155template <class T>
157inline T fsinhcAnalyt1(T d, T y /* exp(-d) */, T z /* exp(+d) */)
158{
159 T cy, cz;
160 cy = -1;
161 cz = 1;
162 return (cy * y + cz * z) / (2 * d);
163}
164
165#pragma acc routine seq
166template <int N, class T>
168inline void fsinhcImpl(T d, T& restrict f1d, T& restrict f2d, T& restrict f3d,
169 T& restrict f4d, T& restrict f5d, T& restrict f6d, T& restrict f7d)
170{
171 fsinhcCheck<N>();
172 constexpr int M = N - 1;
173
174 // (x, Approx. |Analyt - Taylor|)
175 //
176 // f1 (0.90, 1e-8) (0.35, 1e-12) (0.28, 1e-13)
177 // f2 (1.15, 1e-8) (0.45, 1e-12) (0.37, 1e-13)
178 // f3 (1.5, 1e-8) (0.6, 1e-12) (0.48, 1e-13)
179 // f4 (2.0, 1e-8) (0.8, 1e-12) (0.64, 1e-13)
180 // f5 (2.7, 1e-8) (1.1, 1e-12) (0.87, 1e-13)
181 // f6 (3.7, 1e-8) (1.5, 1e-12) (1.16, 1e-13)
182 // f7 (5.0, 1e-8) (2.0, 1e-12) (1.60, 1e-13)
183
184 // double epsd[] = {0.35, 0.45, 0.60, 0.80, 1.10, 1.50, 2.00};
185 // double epsd[] = {0.28, 0.37, 0.48, 0.64, 0.87, 1.16, 1.60};
186 // float epsfl[] = {0.90, 1.15, 1.50, 2.00, 2.70, 3.70, 5.00};
187
188 // (x, Approx. relative error)
189 //
190 // f1 (0.92, 1e-8) (0.28, 1e-13)
191 // f2 (1.06, 1e-8) (0.33, 1e-13)
192 // f3 (1.19, 1e-8) (0.37, 1e-13)
193 // f4 (1.30, 1e-8) (0.40, 1e-13)
194 // f5 (1.40, 1e-8) (0.43, 1e-13)
195 // f6 (1.49, 1e-8) (0.46, 1e-13)
196 // f7 (1.58, 1e-8) (0.49, 1e-13)
197
198 double epsd[] = {0.28, 0.33, 0.37, 0.40, 0.43, 0.46, 0.49};
199 float epsfl[] = {0.92, 1.06, 1.19, 1.30, 1.40, 1.49, 1.58};
200 T absd, eps, expmd, exppd;
201 if CONSTEXPR (sizeof(T) == sizeof(float)) {
202 absd = fabsf(d), eps = epsfl[M];
203 expmd = expf(-d), exppd = expf(+d);
204 } else {
205 absd = fabs(d), eps = epsd[M];
206 expmd = exp(-d), exppd = exp(+d);
207 }
208
209 T d2, d4;
210 T d3, d5, d7, d9, d11, d13;
211 if CONSTEXPR (N >= 1) {
212 d2 = d * d;
213 if (absd > eps) {
214 f1d = fsinhcAnalyt1(d, expmd, exppd);
215 } else {
216 f1d = fsinhcSeries<1>(d2);
217 }
218 }
219 if CONSTEXPR (N >= 2) {
220 d3 = d * d2;
221 if (absd > eps) {
222 f2d = fsinhcAnalyt2(d, d3, expmd, exppd);
223 } else {
224 f2d = fsinhcSeries<2>(d2);
225 }
226 }
227 if CONSTEXPR (N >= 3) {
228 d5 = d2 * d3;
229 if (absd > eps) {
230 f3d = fsinhcAnalyt3(d, d5, expmd, exppd);
231 } else {
232 f3d = fsinhcSeries<3>(d2);
233 }
234 }
235 if CONSTEXPR (N >= 4) {
236 d4 = d2 * d2;
237 d7 = d3 * d4;
238 if (absd > eps) {
239 f4d = fsinhcAnalyt4(d, d7, expmd, exppd);
240 } else {
241 f4d = fsinhcSeries<4>(d2);
242 }
243 }
244 if CONSTEXPR (N >= 5) {
245 d9 = d3 * d3 * d3;
246 if (absd > eps) {
247 f5d = fsinhcAnalyt5(d, d9, expmd, exppd);
248 } else {
249 f5d = fsinhcSeries<5>(d2);
250 }
251 }
252 if CONSTEXPR (N >= 6) {
253 d11 = d3 * d4 * d4;
254 if (absd > eps) {
255 f6d = fsinhcAnalyt6(d, d11, expmd, exppd);
256 } else {
257 f6d = fsinhcSeries<6>(d2);
258 }
259 }
260 if CONSTEXPR (N >= 7) {
261 d13 = d3 * d3 * d3 * d4;
262 if (absd > eps) {
263 f7d = fsinhcAnalyt7(d, d13, expmd, exppd);
264 } else {
265 f7d = fsinhcSeries<7>(d2);
266 }
267 }
268}
269}
270
273#pragma acc routine seq
274template <class T>
276inline void fsinhc1(T d, T& restrict f1d)
277{
278 T f2d, f3d, f4d, f5d, f6d, f7d;
279 fsinhcImpl<1, T>(d, f1d, f2d, f3d, f4d, f5d, f6d, f7d);
280}
281
282#pragma acc routine seq
283template <class T>
285inline void fsinhc2(T d, T& restrict f1d, T& restrict f2d)
286{
287 T f3d, f4d, f5d, f6d, f7d;
288 fsinhcImpl<2, T>(d, f1d, f2d, f3d, f4d, f5d, f6d, f7d);
289}
290
291#pragma acc routine seq
292template <class T>
294inline void fsinhc3(T d, T& restrict f1d, T& restrict f2d, T& restrict f3d)
295{
296 T f4d, f5d, f6d, f7d;
297 fsinhcImpl<3, T>(d, f1d, f2d, f3d, f4d, f5d, f6d, f7d);
298}
299
300#pragma acc routine seq
301template <class T>
303inline void fsinhc4(T d, T& restrict f1d, T& restrict f2d, T& restrict f3d,
304 T& restrict f4d)
305{
306 T f5d, f6d, f7d;
307 fsinhcImpl<4, T>(d, f1d, f2d, f3d, f4d, f5d, f6d, f7d);
308}
309
310#pragma acc routine seq
311template <class T>
313inline void fsinhc5(T d, T& restrict f1d, T& restrict f2d, T& restrict f3d,
314 T& restrict f4d, T& restrict f5d)
315{
316 T f6d, f7d;
317 fsinhcImpl<5, T>(d, f1d, f2d, f3d, f4d, f5d, f6d, f7d);
318}
319
320#pragma acc routine seq
321template <class T>
323inline void fsinhc6(T d, T& restrict f1d, T& restrict f2d, T& restrict f3d,
324 T& restrict f4d, T& restrict f5d, T& restrict f6d)
325{
326 T f7d;
327 fsinhcImpl<6, T>(d, f1d, f2d, f3d, f4d, f5d, f6d, f7d);
328}
329
330#pragma acc routine seq
331template <class T>
333inline void fsinhc7(T d, T& restrict f1d, T& restrict f2d, T& restrict f3d,
334 T& restrict f4d, T& restrict f5d, T& restrict f6d, T& restrict f7d)
335{
336 fsinhcImpl<7, T>(d, f1d, f2d, f3d, f4d, f5d, f6d, f7d);
337}
339}
#define SEQ_CUDA
Definition: acc/seqdef.h:12
#define restrict
Definition: macro.h:51
#define CONSTEXPR
Definition: macro.h:61
real * z
Current coordinates used in energy evaluation and neighbor lists.
real * x
Number of the trajectory frames.
real * y
Current coordinates used in energy evaluation and neighbor lists.
__device__ void fsinhc5(T d, T &__restrict__ f1d, T &__restrict__ f2d, T &__restrict__ f3d, T &__restrict__ f4d, T &__restrict__ f5d)
Definition: sinhc.h:313
__device__ void fsinhc2(T d, T &__restrict__ f1d, T &__restrict__ f2d)
Definition: sinhc.h:285
__device__ void fsinhc7(T d, T &__restrict__ f1d, T &__restrict__ f2d, T &__restrict__ f3d, T &__restrict__ f4d, T &__restrict__ f5d, T &__restrict__ f6d, T &__restrict__ f7d)
Definition: sinhc.h:333
__device__ void fsinhc1(T d, T &__restrict__ f1d)
Definition: sinhc.h:276
__device__ void fsinhc3(T d, T &__restrict__ f1d, T &__restrict__ f2d, T &__restrict__ f3d)
Definition: sinhc.h:294
T sinhc(T x)
Definition: sinhc.h:11
__device__ void fsinhc4(T d, T &__restrict__ f1d, T &__restrict__ f2d, T &__restrict__ f3d, T &__restrict__ f4d)
Definition: sinhc.h:303
__device__ void fsinhc6(T d, T &__restrict__ f1d, T &__restrict__ f2d, T &__restrict__ f3d, T &__restrict__ f4d, T &__restrict__ f5d, T &__restrict__ f6d)
Definition: sinhc.h:323
__device__ T fsinhcTaylor(T x2)
Definition: sinhc.h:57
__device__ T fsinhcPade44(T x2)
Definition: sinhc.h:35
__device__ T fsinhcSeries(T x2)
Definition: sinhc.h:77
__device__ T fsinhcAnalyt1(T d, T y, T z)
Definition: sinhc.h:157
__device__ T fsinhcAnalyt7(T d, T d13, T y, T z)
Definition: sinhc.h:86
__device__ void fsinhcImpl(T d, T &__restrict__ f1d, T &__restrict__ f2d, T &__restrict__ f3d, T &__restrict__ f4d, T &__restrict__ f5d, T &__restrict__ f6d, T &__restrict__ f7d)
Definition: sinhc.h:168
__device__ T fsinhcAnalyt4(T d, T d7, T y, T z)
Definition: sinhc.h:123
__device__ T fsinhcAnalyt6(T d, T d11, T y, T z)
Definition: sinhc.h:100
__device__ void fsinhcCheck()
Definition: sinhc.h:27
__device__ T fsinhcAnalyt3(T d, T d5, T y, T z)
Definition: sinhc.h:134
__device__ T fsinhcAnalyt5(T d, T d9, T y, T z)
Definition: sinhc.h:111
__device__ T fsinhcAnalyt2(T d, T d3, T y, T z)
Definition: sinhc.h:146
Definition: testrt.h:9