Tinker9 70bd052 (Thu Nov 9 12:11:35 2023 -0800)
Loading...
Searching...
No Matches
damp_hippo.h
1#pragma once
2#include "math/sinhc.h"
3#include "seq/seq.h"
4
5namespace tinker {
6#pragma acc routine seq
7template <int DirOrder, int MutOrder>
9inline void damp_gordon1(real* restrict dmpij, real* restrict dmpi,
10 real* restrict dmpj, real r, real ai, real aj)
11{
12 const real div3 = 1 / ((real)3);
13 const real div6 = 1 / ((real)6);
14 const real div15 = 1 / ((real)15);
15 const real div30 = 1 / ((real)30);
16 const real div105 = 1 / ((real)105);
17 const real div945 = 1 / ((real)945);
18
19 real a, expi, b, expk;
20 a = ai * r, b = aj * r;
21 expi = REAL_EXP(-a), expk = REAL_EXP(-b);
22
23 // dmpi, dmpj
24 real a2, a3, a4, a5, b2, b3, b4, b5;
25
26 if CONSTEXPR (DirOrder >= 1) {
27 dmpi[0] = 1 - (1 + 0.5f * a) * expi;
28 dmpj[0] = 1 - (1 + 0.5f * b) * expk;
29 }
30 if CONSTEXPR (DirOrder >= 3) {
31 a2 = a * a, b2 = b * b;
32 dmpi[1] = 1 - (1 + a + 0.5f * a2) * expi;
33 dmpj[1] = 1 - (1 + b + 0.5f * b2) * expk;
34 }
35 if CONSTEXPR (DirOrder >= 5) {
36 a3 = a * a2, b3 = b * b2;
37 dmpi[2] = 1 - (1 + a + 0.5f * a2 + a3 * div6) * expi;
38 dmpj[2] = 1 - (1 + b + 0.5f * b2 + b3 * div6) * expk;
39 }
40 if CONSTEXPR (DirOrder >= 7) {
41 a4 = a2 * a2, b4 = b2 * b2;
42 dmpi[3] = 1 - (1 + a + 0.5f * a2 + a3 * div6 + a4 * div30) * expi;
43 dmpj[3] = 1 - (1 + b + 0.5f * b2 + b3 * div6 + b4 * div30) * expk;
44 }
45 if CONSTEXPR (DirOrder >= 9) {
46 a5 = a2 * a3, b5 = b2 * b3;
47 dmpi[4] = 1
48 - (1 + a + 0.5f * a2 + a3 * div6 + (4 * a4 + 0.5f * a5) * div105)
49 * expi;
50 dmpj[4] = 1
51 - (1 + b + 0.5f * b2 + b3 * div6 + (4 * b4 + 0.5f * b5) * div105)
52 * expk;
53 }
54
55 // dmpij
56 if CONSTEXPR (MutOrder >= 1) {
57 real c = (b + a) / 2, d = (b - a) / 2;
58 real expmc = REAL_EXP(-c);
59 real t = (ai + aj) * r;
60 real x = a / t, y = 1 - x;
61 real ec = expmc / 16, ea = expi / 32, eb = expk / 32;
62
63 real c2 = c * c, c3 = c * c * c;
64 real d2 = d * d;
65 real c2d2 = c2 * d2;
66
67 real f1d, f2d, f3d, f4d, f5d, f6d, f7d;
68 if CONSTEXPR (MutOrder >= 11)
69 fsinhc7(d, f1d, f2d, f3d, f4d, f5d, f6d, f7d);
70 else if CONSTEXPR (MutOrder >= 9)
71 fsinhc6(d, f1d, f2d, f3d, f4d, f5d, f6d);
72 else if CONSTEXPR (MutOrder >= 7)
73 fsinhc5(d, f1d, f2d, f3d, f4d, f5d);
74 else if CONSTEXPR (MutOrder >= 5)
75 fsinhc4(d, f1d, f2d, f3d, f4d);
76 else if CONSTEXPR (MutOrder >= 3)
77 fsinhc3(d, f1d, f2d, f3d);
78 else
79 fsinhc2(d, f1d, f2d);
80
81#define TINKER_GORDON1_L00(X) (4 * ((X) * ((X) * (2 * (X)-3) - 3) + 6))
82#define TINKER_GORDON1_L01(X) ((X) * (4 * ((X)-3) * (X) + 11) - 2)
83
84#define TINKER_GORDON1_M0(a) (1)
85#define TINKER_GORDON1_M1(a) ((a) + 1)
86#define TINKER_GORDON1_M2(a) ((a) * ((a) + 3) + 3)
87#define TINKER_GORDON1_M3(a) ((a) * ((a) * ((a) + 6) + 15) + 15)
88#define TINKER_GORDON1_M4(a) ((a) * ((a) * ((a) * ((a) + 10) + 45) + 105) + 105)
89#define TINKER_GORDON1_M5(a) \
90 ((a) * ((a) * ((a) * ((a) * ((a) + 15) + 105) + 420) + 945) + 945)
91
92 real l00x, l01x, l00y, l01y;
93 if CONSTEXPR (MutOrder >= 1) {
94 real k01, k02;
95 k01 = 3 * c * (c + 3);
96 k02 = c3;
97 l00x = TINKER_GORDON1_L00(x), l01x = TINKER_GORDON1_L01(x) * t;
98 l00y = TINKER_GORDON1_L00(y), l01y = TINKER_GORDON1_L01(y) * t;
99 dmpij[0] = 1
100 - ((k01 * f1d + k02 * f2d) * ec + (l00x + l01x) * ea
101 + (l00y + l01y) * eb);
102 }
103 if CONSTEXPR (MutOrder >= 3) {
104 real k11, k12, k13, l10x, l11x, l10y, l11y;
105 k11 = 3 * c2 * (c + 2);
106 k12 = c * ((c - 2) * c2 - 3 * (c + 3) * d2);
107 k13 = -c3 * d2;
108 l10x = TINKER_GORDON1_M1(a) * l00x,
109 l11x = a * TINKER_GORDON1_M0(a) * l01x;
110 l10y = TINKER_GORDON1_M1(b) * l00y,
111 l11y = b * TINKER_GORDON1_M0(b) * l01y;
112 dmpij[1] = 1
113 - ((k11 * f1d + k12 * f2d + k13 * f3d) * ec + (l10x + l11x) * ea
114 + (l10y + l11y) * eb);
115 }
116 if CONSTEXPR (MutOrder >= 5) {
117 real k21, k22, k23, k24, l20x, l21x, l20y, l21y;
118 k21 = 3 * c2 * (c * (c + 2) + 2);
119 k22 = c2 * ((c - 3) * c2 - 6 * (c + 2) * d2);
120 k23 = 3 * (c + 3) * d2 - 2 * (c - 2) * c2;
121 k24 = c2d2;
122 l20x = TINKER_GORDON1_M2(a) * l00x,
123 l21x = a * TINKER_GORDON1_M1(a) * l01x;
124 l20y = TINKER_GORDON1_M2(b) * l00y,
125 l21y = b * TINKER_GORDON1_M1(b) * l01y;
126 dmpij[2] = 1
127 - div3
128 * ((k21 * f1d + k22 * f2d + c * d2 * (k23 * f3d + k24 * f4d))
129 * ec
130 + (l20x + l21x) * ea + (l20y + l21y) * eb);
131 }
132 if CONSTEXPR (MutOrder >= 7) {
133 real k31, k32, k33, k34, k35, l30x, l31x, l30y, l31y;
134 k31 = 3 * c2 * (c * (c * (c + 3) + 6) + 6);
135 k32 = c2 * (c2 * ((c - 3) * c - 3) - 9 * (c * (c + 2) + 2) * d2);
136 k33 = c2 * (9 * (c + 2) * d2 - 3 * (c - 3) * c2);
137 k34 = 3 * (c - 2) * c3 - 3 * c * (c + 3) * d2;
138 k35 = -c3 * d2;
139 l30x = TINKER_GORDON1_M3(a) * l00x,
140 l31x = a * TINKER_GORDON1_M2(a) * l01x;
141 l30y = TINKER_GORDON1_M3(b) * l00y,
142 l31y = b * TINKER_GORDON1_M2(b) * l01y;
143 dmpij[3] = 1
144 - div15
145 * ((k31 * f1d + k32 * f2d
146 + d2 * (k33 * f3d + d2 * (k34 * f4d + k35 * f5d)))
147 * ec
148 + (l30x + l31x) * ea + (l30y + l31y) * eb);
149 }
150 if CONSTEXPR (MutOrder >= 9) {
151 real k41, k42, k43, k44, k45, k46, l40x, l41x, l40y, l41y;
152 k41 = 3 * c2 * (c * (c * (c * (c + 5) + 15) + 30) + 30);
153 k42 = c2
154 * (c2 * (c * ((c - 2) * c - 9) - 9)
155 - 12 * (c * (c * (c + 3) + 6) + 6) * d2);
156 k43 = c2 * (18 * (c * (c + 2) + 2) * d2 - 4 * c2 * ((c - 3) * c - 3));
157 k44 = c2 * (6 * (c - 3) * c2 - 12 * (c + 2) * d2);
158 k45 = c * (3 * (c + 3) * d2 - 4 * (c - 2) * c2);
159 k46 = c3 * d2;
160 l40x = TINKER_GORDON1_M4(a) * l00x,
161 l41x = a * TINKER_GORDON1_M3(a) * l01x;
162 l40y = TINKER_GORDON1_M4(b) * l00y,
163 l41y = b * TINKER_GORDON1_M3(b) * l01y;
164 dmpij[4] = 1
165 - div105
166 * ((k41 * f1d + k42 * f2d
167 + d2
168 * (k43 * f3d
169 + d2 * (k44 * f4d + d2 * (k45 * f5d + k46 * f6d))))
170 * ec
171 + (l40x + l41x) * ea + (l40y + l41y) * eb);
172 }
173 if CONSTEXPR (MutOrder >= 11) {
174 real k51, k52, k53, k54, k55, k56, k57, l50x, l51x, l50y, l51y;
175 k51 = 3 * c2 * (c * (c * (c * (c * (c + 8) + 35) + 105) + 210) + 210);
176 k52 = c2
177 * (c2 * (c * (c3 - 15 * c - 45) - 45)
178 - 15 * (c * (c * (c * (c + 5) + 15) + 30) + 30) * d2);
179 k53 = c2
180 * (5 * (c * (9 - (c - 2) * c) + 9) * c2
181 + 30 * (c * (c * (c + 3) + 6) + 6) * d2);
182 k54 = c2 * (10 * c2 * ((c - 3) * c - 3) - 30 * (c * (c + 2) + 2) * d2);
183 k55 = c2 * (15 * (c + 2) * d2 - 10 * (c - 3) * c2);
184 k56 = c * (5 * (c - 2) * c2 - 3 * (c + 3) * d2);
185 k57 = -c3 * d2;
186 l50x = TINKER_GORDON1_M5(a) * l00x,
187 l51x = a * TINKER_GORDON1_M4(a) * l01x;
188 l50y = TINKER_GORDON1_M5(b) * l00y,
189 l51y = b * TINKER_GORDON1_M4(b) * l01y;
190 dmpij[5] = 1
191 - div945
192 * ((k51 * f1d + k52 * f2d
193 + d2
194 * (k53 * f3d
195 + d2
196 * (k54 * f4d
197 + d2
198 * (k55 * f5d
199 + d2 * (k56 * f6d + k57 * f7d)))))
200 * ec
201 + (l50x + l51x) * ea + (l50y + l51y) * eb);
202 }
203
204#undef TINKER_GORDON1_L00
205#undef TINKER_GORDON1_L01
206#undef TINKER_GORDON1_M0
207#undef TINKER_GORDON1_M1
208#undef TINKER_GORDON1_M2
209#undef TINKER_GORDON1_M3
210#undef TINKER_GORDON1_M4
211#undef TINKER_GORDON1_M5
212 }
213}
214
216#pragma acc routine seq
217template <int ORDER>
220 real* restrict dmpk, real r, real alphai, real alphak)
221{
222#if TINKER_REAL_SIZE == 8
223 real eps = 0.001f;
224#elif TINKER_REAL_SIZE == 4
225 real eps = 0.05f;
226#endif
227
228 real diff = REAL_ABS(alphai - alphak);
229
230 if (diff < eps) alphai = 0.5f * (alphai + alphak);
231
232 real dampi = alphai * r;
233 real dampk = alphak * r;
234 real expi = REAL_EXP(-dampi);
235 real expk = REAL_EXP(-dampk);
236
237 real dampi2 = dampi * dampi;
238 real dampi3 = dampi * dampi2;
239 real dampi4 = dampi2 * dampi2;
240 real dampi5 = dampi2 * dampi3;
241
242 // divisions
243 const real div3 = 1 / ((real)3);
244 const real div6 = 1 / ((real)6);
245 const real div15 = 1 / ((real)15);
246 const real div21 = 1 / ((real)21);
247 const real div30 = 1 / ((real)30);
248 const real div105 = 1 / ((real)105);
249
250 // GORDON1
251 // core-valence
252 dmpi[0] = (1 + 0.5f * dampi) * expi;
253 dmpi[1] = (1 + dampi + 0.5f * dampi2) * expi;
254 dmpi[2] = (1 + dampi + 0.5f * dampi2 + dampi3 * div6) * expi;
255 dmpi[3] = (1 + dampi + 0.5f * dampi2 + dampi3 * div6 + dampi4 * div30)
256 * expi;
257 dmpi[4] = (1 + dampi + 0.5f * dampi2 + dampi3 * div6
258 + (4 * dampi4 + 0.5f * dampi5) * div105)
259 * expi;
260
261 if (diff < eps) {
262 dmpk[0] = dmpi[0];
263 dmpk[1] = dmpi[1];
264 dmpk[2] = dmpi[2];
265 dmpk[3] = dmpi[3];
266 dmpk[4] = dmpi[4];
267
268 // valence-valence
269 real dampi6 = dampi3 * dampi3;
270 real dampi7 = dampi3 * dampi4;
271 const real div5 = 1 / ((real)5);
272 const real div16 = 1 / ((real)16);
273 const real div24 = 1 / ((real)24);
274 const real div42 = 1 / ((real)42);
275 const real div48 = 1 / ((real)48);
276 const real div120 = 1 / ((real)120);
277 const real div144 = 1 / ((real)144);
278
279 dmpik[0] = (1 + (11 * dampi + 3 * dampi2) * div16 + dampi3 * div48)
280 * expi;
281 dmpik[1] = (1 + dampi + 0.5f * dampi2 + (7 * dampi3 + dampi4) * div48)
282 * expi;
283 dmpik[2] = (1 + dampi + 0.5f * dampi2 + dampi3 * div6 + dampi4 * div24
284 + dampi5 * div144)
285 * expi;
286 dmpik[3] = (1 + dampi + 0.5f * dampi2 + dampi3 * div6 + dampi4 * div24
287 + (dampi5 + dampi6 * div6) * div120)
288 * expi;
289 dmpik[4] = ((1 + dampi + 0.5f * dampi2 + dampi3 * div6)
290 + ((dampi4 + dampi5 * div5)
291 + 0.1f * (dampi6 * div3 + dampi7 * div21))
292 * div24)
293 * expi;
294 if CONSTEXPR (ORDER > 9) {
295 real dampi8 = dampi4 * dampi4;
296 const real div378 = 1 / ((real)378);
297 dmpik[5] = (1 + dampi + 0.5f * dampi2 + dampi3 * div6 + dampi4 * div24
298 + (dampi5 + dampi6 * div6 + dampi7 * div42
299 + dampi8 * div378)
300 * div120)
301 * expi;
302 }
303 } else {
304 real dampk2 = dampk * dampk;
305 real dampk3 = dampk * dampk2;
306 real dampk4 = dampk2 * dampk2;
307 real dampk5 = dampk2 * dampk3;
308
309 const real div5 = 1 / ((real)5);
310 const real div7 = 1 / ((real)7);
311
312 dmpk[0] = (1 + 0.5f * dampk) * expk;
313 dmpk[1] = (1 + dampk + 0.5f * dampk2) * expk;
314 dmpk[2] = (1 + dampk + 0.5f * dampk2 + dampk3 * div6) * expk;
315 dmpk[3] = (1 + dampk + 0.5f * dampk2 + dampk3 * div6 + dampk4 * div30)
316 * expk;
317 dmpk[4] = (1 + dampk + 0.5f * dampk2 + dampk3 * div6
318 + (4 * dampk4 + 0.5f * dampk5) * div105)
319 * expk;
320
321 // valence-valence
322 real alphai2 = alphai * alphai;
323 real alphak2 = alphak * alphak;
324 real alphaik = ((alphak + alphai) * (alphak - alphai));
325 real termi = alphak2 / alphaik;
326 real termk = -alphai2 / alphaik;
327 real termi2 = termi * termi;
328 real termk2 = termk * termk;
329
330 dmpik[0] = termi2 * (1 + 2 * termk + 0.5f * dampi) * expi
331 + termk2 * (1 + 2 * termi + 0.5f * dampk) * expk;
332 dmpik[1] = termi2 * (1 + dampi + 0.5f * dampi2) * expi
333 + termk2 * (1 + dampk + 0.5f * dampk2) * expk
334 + 2 * termi2 * termk * (1 + dampi) * expi
335 + 2 * termk2 * termi * (1 + dampk) * expk;
336 dmpik[2] = termi2 * (1 + dampi + 0.5f * dampi2 + dampi3 * div6) * expi
337 + termk2 * (1 + dampk + 0.5f * dampk2 + dampk3 * div6) * expk
338 + 2 * termi2 * termk * (1 + dampi + dampi2 * div3) * expi
339 + 2 * termk2 * termi * (1 + dampk + dampk2 * div3) * expk;
340 dmpik[3] = termi2
341 * (1 + dampi + 0.5f * dampi2 + dampi3 * div6 + dampi4 * div30)
342 * expi
343 + termk2 * (1 + dampk + 0.5f * dampk2 + dampk3 * div6 + dampk4 * div30)
344 * expk
345 + 2 * termi2 * termk * (1 + dampi + 2 * dampi2 * div5 + dampi3 * div15)
346 * expi
347 + 2 * termk2 * termi * (1 + dampk + 2 * dampk2 * div5 + dampk3 * div15)
348 * expk;
349 dmpik[4] = termi2
350 * (1 + dampi + 0.5f * dampi2 + dampi3 * div6
351 + (4 * dampi4 + 0.5f * dampi5) * div105)
352 * expi
353 + termk2
354 * (1 + dampk + 0.5f * dampk2 + dampk3 * div6
355 + (4 * dampk4 + 0.5f * dampk5) * div105)
356 * expk
357 + 2 * termi2 * termk
358 * (1 + dampi + 3 * dampi2 * div7 + 2 * dampi3 * div21
359 + dampi4 * div105)
360 * expi
361 + 2 * termk2 * termi
362 * (1 + dampk + 3 * dampk2 * div7 + 2 * dampk3 * div21
363 + dampk4 * div105)
364 * expk;
365
366 if CONSTEXPR (ORDER > 9) {
367 real dampi6 = dampi3 * dampi3;
368 real dampk6 = dampk3 * dampk3;
369 const real div945 = 1 / ((real)945);
370 const real div9 = 1 / ((real)9);
371 const real div63 = 1 / ((real)63);
372 const real div126 = 1 / ((real)126);
373 const real div315 = 1 / ((real)315);
374 const real div1890 = 1 / ((real)1890);
375
376 dmpik[5] = (1 + dampi + 0.5f * dampi2 + dampi3 * div6
377 + 5 * dampi4 * div126 + 2 * dampi5 * div315
378 + dampi6 * div1890)
379 * termi2 * expi
380 + (1 + dampk + 0.5f * dampk2 + dampk3 * div6 + 5 * dampk4 * div126
381 + 2 * dampk5 * div315 + dampk6 * div1890)
382 * termk2 * expk
383 + (1 + dampi + (4 * dampi2 + dampi3) * div9 + dampi4 * div63
384 + dampi5 * div945)
385 * 2 * termi2 * termk * expi
386 + (1 + dampk + (4 * dampk2 + dampk3) * div9 + dampk4 * div63
387 + dampk5 * div945)
388 * 2 * termk2 * termi * expk;
389 }
390 }
391}
392
393#pragma acc routine seq
394template <int order>
396inline void damp_pole_v2(real* restrict dmpij, real* restrict dmpi,
397 real* restrict dmpj, real r, real ai, real aj)
398{
399 damp_gordon1<9, order>(dmpij, dmpi, dmpj, r, ai, aj);
400}
401
403inline void damp_dir(real* restrict dmpi, real* restrict dmpk, real r,
404 real alphai, real alphak)
405{
406 damp_gordon1<7, 0>(nullptr, dmpi, dmpk, r, alphai, alphak);
407}
408
410inline void damp_mut(real* restrict dmpik, real r, real alphai, real alphak)
411{
412 damp_gordon1<0, 5>(dmpik, nullptr, nullptr, r, alphai, alphak);
413}
414
418#pragma acc routine seq
419template <int order>
421inline void damp_rep_deprecated(real* restrict dmpik, real r, real rinv,
422 real r2, real rr3, real rr5, real rr7, real rr9, real rr11, real dmpi,
423 real dmpk)
424{
425#if TINKER_REAL_SIZE == 8
426 real eps = 0.001f;
427#elif TINKER_REAL_SIZE == 4
428 real eps = 0.05f;
429#endif
430
431 // This makes sure that dmpi > dmpk, which solves numerical issues
432 // with atomi,atomk vs. atomk,atomi.
433 real dmpsmall = REAL_MIN(dmpi, dmpk);
434 real dmpbig = REAL_MAX(dmpi, dmpk);
435 dmpi = dmpsmall;
436 dmpk = dmpbig;
437
438 real diff = REAL_ABS(dmpi - dmpk);
439 if (diff < eps) dmpi = 0.5f * (dmpi + dmpk);
440
441 real r3 = r2 * r;
442 real r4 = r2 * r2;
443 real r5 = r3 * r2;
444 real dmpi2 = 0.5f * dmpi;
445 real dampi = dmpi2 * r;
446 real expi = REAL_EXP(-dampi);
447 real dmpi22 = dmpi2 * dmpi2;
448 real dmpi23 = dmpi22 * dmpi2;
449 real dmpi24 = dmpi22 * dmpi22;
450 real dmpi25 = dmpi23 * dmpi22;
451
452 // divisions
453 const real div3 = 1 / ((real)3);
454 const real div9 = 1 / ((real)9);
455 const real div945 = 1 / ((real)945);
456
457 real pre, s, ds, d2s, d3s, d4s, d5s;
458 if (diff < eps) {
459 real r6 = r3 * r3;
460 real r7 = r4 * r3;
461
462 real dmpi26 = dmpi23 * dmpi23;
463
464 pre = 2;
465 s = (r + dmpi2 * r2 + dmpi22 * r3 * div3) * expi;
466 ds = (dmpi22 * r3 + dmpi23 * r4) * expi * div3;
467 d2s = dmpi24 * expi * r5 * div9;
468 d3s = dmpi25 * expi * r6 / ((real)45);
469 d4s = (dmpi25 * r6 + dmpi26 * r7) * expi / ((real)315);
470
471 if CONSTEXPR (order > 9) {
472 real r8 = r4 * r4;
473 real dmpi27 = dmpi24 * dmpi23;
474 d5s = (dmpi25 * r6 + dmpi26 * r7 + dmpi27 * r8 * div3) * expi * div945;
475 }
476 } else {
477 // treat the case where alpha damping exponents are unequal
478
479 // divisions
480 real div5 = 1 / ((real)5);
481 real div7 = 1 / ((real)7);
482 real div15 = 1 / ((real)15);
483 real div21 = 1 / ((real)21);
484 real div63 = 1 / ((real)63);
485 real div105 = 1 / ((real)105);
486 real div189 = 1 / ((real)189);
487
488 real dmpk2 = 0.5f * dmpk;
489 real dampk = dmpk2 * r;
490 real expk = REAL_EXP(-dampk);
491 real dmpk22 = dmpk2 * dmpk2;
492 real dmpk23 = dmpk22 * dmpk2;
493 real dmpk24 = dmpk22 * dmpk22;
494 real dmpk25 = dmpk23 * dmpk22;
495
496 real term = 0.25f * (dmpi + dmpk) * (dmpi - dmpk);
497 real term1 = (dmpi + dmpk) * (dmpi - dmpk);
498 real tmp = 4 * (dmpi * dmpk) / term1;
499 pre = (128 * dmpi23 * dmpk23) / (term * term * term * term);
500
501 real coef1 = 16 / term1;
502
503 s = (dampi * expk) + (dampk * expi) + tmp * (expi - expk);
504 ds = (term * dmpk2 * r2 - 4 * (dmpk22 * r + dmpk2)) * dmpi2 * expk / term
505 + (term * dmpi2 * r2 + 4 * (dmpi22 * r + dmpi2)) * dmpk2 * expi / term;
506
507 d2s = ((dmpk2 * r2 + dmpk22 * r3) * div3
508 - coef1 * (dmpk23 * r2 * div3 + dmpk22 * r + dmpk2))
509 * dmpi2 * expk
510 + ((dmpi2 * r2 + dmpi22 * r3) * div3
511 + coef1 * (dmpi23 * r2 * div3 + dmpi22 * r + dmpi2))
512 * dmpk2 * expi;
513
514 d3s = ((dmpk23 * r4 * div3 + dmpk22 * r3 + dmpk2 * r2) * div5
515 - coef1
516 * (dmpk24 * r3 * div15 + (2 * div5) * dmpk23 * r2 + dmpk22 * r
517 + dmpk2))
518 * dmpi2 * expk
519 + ((dmpi23 * r4 * div3 + dmpi22 * r3 + dmpi2 * r2) * div5
520 + coef1
521 * (dmpi24 * r3 * div15 + (2 * div5) * dmpi23 * r2 + dmpi22 * r
522 + dmpi2))
523 * dmpk2 * expi;
524
525 d4s = ((dmpk24 * r5 * div15 + 2 * div5 * dmpk23 * r4 + dmpk22 * r3
526 + dmpk2 * r2)
527 * div7
528 - coef1
529 * (dmpk25 * r4 * div105 + 2 * div21 * dmpk24 * r3
530 + 3 * div7 * dmpk23 * r2 + dmpk22 * r + dmpk2))
531 * dmpi2 * expk
532 + ((dmpi24 * r5 * div15 + 2 * div5 * dmpi23 * r4 + dmpi22 * r3
533 + dmpi2 * r2)
534 * div7
535 + coef1
536 * (dmpi25 * r4 * div105 + 2 * div21 * dmpi24 * r3
537 + 3 * div7 * dmpi23 * r2 + dmpi22 * r + dmpi2))
538 * dmpk2 * expi;
539
540 if CONSTEXPR (order > 9) {
541 real r6 = r3 * r3;
542 real dmpi26 = dmpi23 * dmpi23;
543 real dmpk26 = dmpk23 * dmpk23;
544 d5s = (dmpk25 * r6 * div945 + 2 * div189 * dmpk24 * r5
545 + dmpk23 * r4 * div21 + dmpk22 * r3 * div9 + dmpk2 * r2 * div9
546 - coef1
547 * (dmpk26 * r5 * div945 + dmpk25 * r4 * div63
548 + dmpk24 * r3 * div9 + 4 * div9 * dmpk23 * r2
549 + dmpk22 * r + dmpk2))
550 * dmpi2 * expk
551 + (dmpi25 * r6 * div945 + 2 * div189 * dmpi24 * r5
552 + dmpi23 * r4 * div21 + dmpi22 * r3 * div9 + dmpi2 * r2 * div9
553 + coef1
554 * (dmpi26 * r5 * div945 + dmpi25 * r4 * div63
555 + dmpi24 * r3 * div9 + 4 * div9 * dmpi23 * r2
556 + dmpi22 * r + dmpi2))
557 * dmpk2 * expi;
558 }
559 }
560
561 // convert partial derivatives into full derivatives
562 s *= rinv;
563 ds *= rr3;
564 d2s *= rr5;
565 d3s *= rr7;
566 d4s *= rr9;
567
568 dmpik[0] = 0.5f * pre * s * s;
569 dmpik[1] = pre * s * ds;
570 dmpik[2] = pre * (s * d2s + ds * ds);
571 dmpik[3] = pre * (s * d3s + 3 * ds * d2s);
572 dmpik[4] = pre * (s * d4s + 4 * ds * d3s + 3 * d2s * d2s);
573
574 if CONSTEXPR (order > 9) {
575 d5s *= rr11;
576 dmpik[5] = pre * (s * d5s + 5 * ds * d4s + 10 * d2s * d3s);
577 }
578}
579
585#pragma acc routine seq
586template <int order>
588inline void damp_rep(real* restrict dmpik, real r, real rr1, real r2, real rr3,
589 real rr5, real rr7, real rr9, real rr11, real ai /* dmpi */,
590 real aj /* dmpk */)
591{
592 real pfac = 2 / (ai + aj);
593 pfac = pfac * pfac;
594 pfac = pfac * ai * aj;
595 pfac = pfac * pfac * pfac;
596 pfac *= r2;
597
598 real a = ai * r / 2, b = aj * r / 2;
599 real c = (a + b) / 2, d = (b - a) / 2;
600 real expmc = REAL_EXP(-c);
601
602 real c2 = c * c;
603 real c3 = c2 * c;
604 real c4 = c2 * c2;
605 real d2 = d * d;
606 real d4 = d2 * d2;
607 real c2d2 = (c * d) * (c * d);
608
609 real f1d, f2d, f3d, f4d, f5d, f6d, f7d;
610 if CONSTEXPR (order > 9)
611 fsinhc7(d, f1d, f2d, f3d, f4d, f5d, f6d, f7d);
612 else
613 fsinhc6(d, f1d, f2d, f3d, f4d, f5d, f6d);
614
615 real inv3 = 1. / 3, inv15 = 1. / 15, inv105 = 1. / 105, inv945 = 1. / 945;
616
617 // compute
618 real s;
619 s = f1d * (c + 1) + f2d * c2;
620 s *= rr1;
621 s *= expmc;
622 dmpik[0] = pfac * s * s;
623
624 real ds;
625 ds = f1d * c2 + f2d * ((c - 2) * c2 - (c + 1) * d2) - f3d * c2d2;
626 ds *= rr3;
627 ds *= expmc;
628 dmpik[1] = pfac * 2 * s * ds;
629
630 real d2s = 0;
631 d2s += f1d * c3 + f2d * c2 * ((c - 3) * c - 2 * d2);
632 d2s += d2 * (f3d * (2 * (2 - c) * c2 + (c + 1) * d2) + f4d * c2d2);
633 d2s *= rr5 * inv3;
634 d2s *= expmc;
635 dmpik[2] = pfac * 2 * (s * d2s + ds * ds);
636
637 real d3s = 0;
638 d3s += f1d * c3 * (c + 1) + f2d * c3 * (c * (c - 3) - 3 * (d2 + 1));
639 d3s -= d2
640 * (f3d * 3 * c2 * ((c - 3) * c - d2)
641 + d2 * (f4d * (3 * (2 - c) * c2 + (c + 1) * d2) + f5d * c2d2));
642 d3s *= rr7 * inv15;
643 d3s *= expmc;
644 dmpik[3] = pfac * 2 * (s * d3s + 3 * ds * d2s);
645
646 real d4s = 0;
647 d4s += f1d * c3 * (3 + c * (c + 3))
648 + f2d * c3 * (c3 - 9 * (c + 1) - 2 * c2 - 4 * (c + 1) * d2);
649 d4s += d2
650 * (f3d * 2 * c3 * (6 * (c + 1) - 2 * c2 + 3 * d2)
651 + d2
652 * (f4d * 2 * c2 * (3 * (c - 3) * c - 2 * d2)
653 + f5d * d2 * (4 * (2 - c) * c2 + (c + 1) * d2) + f6d * c2 * d4));
654 d4s *= rr9 * inv105;
655 d4s *= expmc;
656 dmpik[4] = pfac * 2 * (s * d4s + 4 * ds * d3s + 3 * d2s * d2s);
657
658 if CONSTEXPR (order > 9) {
659 real d5s = 0;
660 d5s += f1d * c3 * (15 + c * (15 + c * (c + 6)));
661 d5s += f2d * c3
662 * (c4 - 15 * c2 - 45 * (c + 1) - 5 * (3 + c * (c + 3)) * d2);
663 d5s -= d2
664 * (f3d * 5 * c3 * (c3 - 9 * (c + 1) - 2 * c2 - 2 * (c + 1) * d2)
665 + d2
666 * (f4d * 10 * c3 * (3 - (c - 3) * c + d2)
667 + d2
668 * (f5d * 5 * c2 * (2 * (c - 3) * c - d2)
669 + f6d * d2 * ((c + 1) * d2 - 5 * (c - 2) * c2)
670 + f7d * c2 * d4)));
671 d5s *= rr11 * inv945;
672 d5s *= expmc;
673 dmpik[5] = pfac * 2 * (s * d5s + 5 * ds * d4s + 10 * d2s * d3s);
674 }
675}
676}
#define SEQ_ROUTINE
Definition: acc/seqdef.h:7
#define SEQ_CUDA
Definition: acc/seqdef.h:12
EnergyBuffer ea
EnergyBuffer eb
#define restrict
Definition: macro.h:51
#define CONSTEXPR
Definition: macro.h:61
real * x
Number of the trajectory frames.
real * y
Current coordinates used in energy evaluation and neighbor lists.
__device__ void fsinhc5(T d, T &__restrict__ f1d, T &__restrict__ f2d, T &__restrict__ f3d, T &__restrict__ f4d, T &__restrict__ f5d)
Definition: sinhc.h:313
__device__ void fsinhc2(T d, T &__restrict__ f1d, T &__restrict__ f2d)
Definition: sinhc.h:285
__device__ void fsinhc7(T d, T &__restrict__ f1d, T &__restrict__ f2d, T &__restrict__ f3d, T &__restrict__ f4d, T &__restrict__ f5d, T &__restrict__ f6d, T &__restrict__ f7d)
Definition: sinhc.h:333
__device__ void fsinhc3(T d, T &__restrict__ f1d, T &__restrict__ f2d, T &__restrict__ f3d)
Definition: sinhc.h:294
__device__ void fsinhc4(T d, T &__restrict__ f1d, T &__restrict__ f2d, T &__restrict__ f3d, T &__restrict__ f4d)
Definition: sinhc.h:303
__device__ void fsinhc6(T d, T &__restrict__ f1d, T &__restrict__ f2d, T &__restrict__ f3d, T &__restrict__ f4d, T &__restrict__ f5d, T &__restrict__ f6d)
Definition: sinhc.h:323
float real
Definition: precision.h:80
Definition: testrt.h:9
EnergyBuffer ec
__device__ void damp_rep(real *__restrict__ dmpik, real r, real rr1, real r2, real rr3, real rr5, real rr7, real rr9, real rr11, real ai, real aj)
Definition: damp_hippo.h:588
__device__ void damp_mut(real *__restrict__ dmpik, real r, real alphai, real alphak)
Definition: damp_hippo.h:410
__device__ void damp_pole_deprecated(real *__restrict__ dmpik, real *__restrict__ dmpi, real *__restrict__ dmpk, real r, real alphai, real alphak)
deprecated
Definition: damp_hippo.h:219
__device__ void damp_gordon1(real *__restrict__ dmpij, real *__restrict__ dmpi, real *__restrict__ dmpj, real r, real ai, real aj)
Definition: damp_hippo.h:9
__device__ void damp_rep_deprecated(real *__restrict__ dmpik, real r, real rinv, real r2, real rr3, real rr5, real rr7, real rr9, real rr11, real dmpi, real dmpk)
Definition: damp_hippo.h:421
__device__ void damp_dir(real *__restrict__ dmpi, real *__restrict__ dmpk, real r, real alphai, real alphak)
Definition: damp_hippo.h:403
__device__ void damp_pole_v2(real *__restrict__ dmpij, real *__restrict__ dmpi, real *__restrict__ dmpj, real r, real ai, real aj)
Definition: damp_hippo.h:396