Tinker9 70bd052 (Thu Nov 9 12:11:35 2023 -0800)
Loading...
Searching...
No Matches
bsplgen.h
1#pragma once
2#include "math/libfunc.h"
3
4namespace tinker {
5#ifdef __CUDACC__
27template <int LEVEL, int bsorder>
28__device__
29void bsplgen(real w, real* restrict thetai, volatile real* restrict bsbuild_)
30#else
31#pragma acc routine seq
32template <int LEVEL>
33void bsplgen(real w, real* restrict thetai, int bsorder)
34#endif
35{
36#ifndef __CUDACC__
37 real bsbuild_[5 * 5];
38#endif
39
40// Fortran 2D array syntax
41#define bsbuild(j, i) bsbuild_[((i)-1) * bsorder + (j)-1]
42
43 // initialization to get to 2nd order recursion
44 bsbuild(2, 2) = w;
45 bsbuild(2, 1) = 1 - w;
46
47 // perform one pass to get to 3rd order recursion
48 bsbuild(3, 3) = 0.5f * w * bsbuild(2, 2);
49 bsbuild(3, 2) = 0.5f * ((1 + w) * bsbuild(2, 1) + (2 - w) * bsbuild(2, 2));
50 bsbuild(3, 1) = 0.5f * (1 - w) * bsbuild(2, 1);
51
52 // compute standard B-spline recursion to desired order
53 for (int i = 4; i <= bsorder; ++i) {
54 int k = i - 1;
55 real denom = REAL_RECIP(k);
56 bsbuild(i, i) = denom * w * bsbuild(k, k);
57 for (int j = 1; j <= i - 2; j++) {
58 bsbuild(i, i - j) = denom
59 * ((w + j) * bsbuild(k, i - j - 1)
60 + (i - j - w) * bsbuild(k, i - j));
61 }
62 bsbuild(i, 1) = denom * (1 - w) * bsbuild(k, 1);
63 }
64
65 // get coefficients for the B-spline first derivative
66 if CONSTEXPR (LEVEL >= 2) {
67 int k = bsorder - 1;
68 bsbuild(k, bsorder) = bsbuild(k, bsorder - 1);
69 for (int i = bsorder - 1; i >= 2; --i) {
70 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
71 }
72 bsbuild(k, 1) = -bsbuild(k, 1);
73 }
74
75 // get coefficients for the B-spline second derivative
76 if CONSTEXPR (LEVEL >= 3) {
77 int k = bsorder - 2;
78 bsbuild(k, bsorder - 1) = bsbuild(k, bsorder - 2);
79 for (int i = bsorder - 2; i >= 2; --i) {
80 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
81 }
82 bsbuild(k, 1) = -bsbuild(k, 1);
83 bsbuild(k, bsorder) = bsbuild(k, bsorder - 1);
84 for (int i = bsorder - 1; i >= 2; --i) {
85 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
86 }
87 bsbuild(k, 1) = -bsbuild(k, 1);
88 }
89
90 // get coefficients for the B-spline third derivative
91 if CONSTEXPR (LEVEL == 4) {
92 int k = bsorder - 3;
93 bsbuild(k, bsorder - 2) = bsbuild(k, bsorder - 3);
94 for (int i = bsorder - 3; i >= 2; --i) {
95 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
96 }
97 bsbuild(k, 1) = -bsbuild(k, 1);
98 bsbuild(k, bsorder - 1) = bsbuild(k, bsorder - 2);
99 for (int i = bsorder - 2; i >= 2; --i) {
100 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
101 }
102 bsbuild(k, 1) = -bsbuild(k, 1);
103 bsbuild(k, bsorder) = bsbuild(k, bsorder - 1);
104 for (int i = bsorder - 1; i >= 2; --i)
105 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
106 bsbuild(k, 1) = -bsbuild(k, 1);
107 }
108
109 // copy coefficients from temporary to permanent storage
110 for (int i = 1; i <= bsorder; ++i) {
111 // keep this line to work on Tesla
112 #pragma acc loop seq
113 for (int j = 1; j <= LEVEL; ++j) {
114 thetai[4 * (i - 1) + (j - 1)] = bsbuild(bsorder - j + 1, i);
115 }
116 }
117#undef bsbuild
118}
119
120#ifdef __CUDACC__
121template <int LEVEL, int bsorder>
122__device__
123void bsplgen2(real w, real* restrict thetai, int k, int padded_n,
124 volatile real* restrict bsbuild_)
125{
126// Fortran 2D array syntax
127# define bsbuild(j, i) bsbuild_[((i)-1) * bsorder + (j)-1]
128
129 // initialization to get to 2nd order recursion
130 bsbuild(2, 2) = w;
131 bsbuild(2, 1) = 1 - w;
132
133 // perform one pass to get to 3rd order recursion
134 bsbuild(3, 3) = 0.5f * w * bsbuild(2, 2);
135 bsbuild(3, 2) = 0.5f * ((1 + w) * bsbuild(2, 1) + (2 - w) * bsbuild(2, 2));
136 bsbuild(3, 1) = 0.5f * (1 - w) * bsbuild(2, 1);
137
138 // compute standard B-spline recursion to desired order
139 for (int i = 4; i <= bsorder; ++i) {
140 int k = i - 1;
141 real denom = REAL_RECIP(k);
142 bsbuild(i, i) = denom * w * bsbuild(k, k);
143 for (int j = 1; j <= i - 2; j++) {
144 bsbuild(i, i - j) = denom
145 * ((w + j) * bsbuild(k, i - j - 1)
146 + (i - j - w) * bsbuild(k, i - j));
147 }
148 bsbuild(i, 1) = denom * (1 - w) * bsbuild(k, 1);
149 }
150
151 // get coefficients for the B-spline first derivative
152 if CONSTEXPR (LEVEL >= 2) {
153 int k = bsorder - 1;
154 bsbuild(k, bsorder) = bsbuild(k, bsorder - 1);
155 for (int i = bsorder - 1; i >= 2; --i) {
156 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
157 }
158 bsbuild(k, 1) = -bsbuild(k, 1);
159 }
160
161 // get coefficients for the B-spline second derivative
162 if CONSTEXPR (LEVEL >= 3) {
163 int k = bsorder - 2;
164 bsbuild(k, bsorder - 1) = bsbuild(k, bsorder - 2);
165 for (int i = bsorder - 2; i >= 2; --i) {
166 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
167 }
168 bsbuild(k, 1) = -bsbuild(k, 1);
169 bsbuild(k, bsorder) = bsbuild(k, bsorder - 1);
170 for (int i = bsorder - 1; i >= 2; --i) {
171 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
172 }
173 bsbuild(k, 1) = -bsbuild(k, 1);
174 }
175
176 // get coefficients for the B-spline third derivative
177 if CONSTEXPR (LEVEL == 4) {
178 int k = bsorder - 3;
179 bsbuild(k, bsorder - 2) = bsbuild(k, bsorder - 3);
180 for (int i = bsorder - 3; i >= 2; --i) {
181 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
182 }
183 bsbuild(k, 1) = -bsbuild(k, 1);
184 bsbuild(k, bsorder - 1) = bsbuild(k, bsorder - 2);
185 for (int i = bsorder - 2; i >= 2; --i) {
186 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
187 }
188 bsbuild(k, 1) = -bsbuild(k, 1);
189 bsbuild(k, bsorder) = bsbuild(k, bsorder - 1);
190 for (int i = bsorder - 1; i >= 2; --i)
191 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
192 bsbuild(k, 1) = -bsbuild(k, 1);
193 }
194
195 // copy coefficients from temporary to permanent storage
196 for (int i = 1; i <= bsorder; ++i) {
197 for (int j = 1; j <= LEVEL; ++j) {
198 int offset = (4 * (i - 1) + (j - 1)) * padded_n + k;
199 thetai[offset] = bsbuild(bsorder - j + 1, i);
200 }
201 }
202# undef bsbuild
203}
204#endif
205}
#define restrict
Definition: macro.h:51
#define CONSTEXPR
Definition: macro.h:61
int padded_n
__device__ void bsplgen(real w, real *__restrict__ thetai, volatile real *__restrict__ bsbuild_)
B-spline coefficients and derivatives for a single PME atomic site along a particular direction....
Definition: bsplgen.h:29
float real
Definition: precision.h:80
Definition: testrt.h:9
__device__ void bsplgen2(real w, real *__restrict__ thetai, int k, int padded_n, volatile real *__restrict__ bsbuild_)
Definition: bsplgen.h:123