2#include "math/libfunc.h"
27template <
int LEVEL,
int bsorder>
31#pragma acc routine seq
41#define bsbuild(j, i) bsbuild_[((i)-1) * bsorder + (j)-1]
45 bsbuild(2, 1) = 1 - w;
48 bsbuild(3, 3) = 0.5f * w * bsbuild(2, 2);
49 bsbuild(3, 2) = 0.5f * ((1 + w) * bsbuild(2, 1) + (2 - w) * bsbuild(2, 2));
50 bsbuild(3, 1) = 0.5f * (1 - w) * bsbuild(2, 1);
53 for (
int i = 4; i <= bsorder; ++i) {
55 real denom = REAL_RECIP(k);
56 bsbuild(i, i) = denom * w * bsbuild(k, k);
57 for (
int j = 1; j <= i - 2; j++) {
58 bsbuild(i, i - j) = denom
59 * ((w + j) * bsbuild(k, i - j - 1)
60 + (i - j - w) * bsbuild(k, i - j));
62 bsbuild(i, 1) = denom * (1 - w) * bsbuild(k, 1);
68 bsbuild(k, bsorder) = bsbuild(k, bsorder - 1);
69 for (
int i = bsorder - 1; i >= 2; --i) {
70 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
72 bsbuild(k, 1) = -bsbuild(k, 1);
78 bsbuild(k, bsorder - 1) = bsbuild(k, bsorder - 2);
79 for (
int i = bsorder - 2; i >= 2; --i) {
80 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
82 bsbuild(k, 1) = -bsbuild(k, 1);
83 bsbuild(k, bsorder) = bsbuild(k, bsorder - 1);
84 for (
int i = bsorder - 1; i >= 2; --i) {
85 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
87 bsbuild(k, 1) = -bsbuild(k, 1);
93 bsbuild(k, bsorder - 2) = bsbuild(k, bsorder - 3);
94 for (
int i = bsorder - 3; i >= 2; --i) {
95 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
97 bsbuild(k, 1) = -bsbuild(k, 1);
98 bsbuild(k, bsorder - 1) = bsbuild(k, bsorder - 2);
99 for (
int i = bsorder - 2; i >= 2; --i) {
100 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
102 bsbuild(k, 1) = -bsbuild(k, 1);
103 bsbuild(k, bsorder) = bsbuild(k, bsorder - 1);
104 for (
int i = bsorder - 1; i >= 2; --i)
105 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
106 bsbuild(k, 1) = -bsbuild(k, 1);
110 for (
int i = 1; i <= bsorder; ++i) {
113 for (
int j = 1; j <= LEVEL; ++j) {
114 thetai[4 * (i - 1) + (j - 1)] = bsbuild(bsorder - j + 1, i);
121template <
int LEVEL,
int bsorder>
127# define bsbuild(j, i) bsbuild_[((i)-1) * bsorder + (j)-1]
131 bsbuild(2, 1) = 1 - w;
134 bsbuild(3, 3) = 0.5f * w * bsbuild(2, 2);
135 bsbuild(3, 2) = 0.5f * ((1 + w) * bsbuild(2, 1) + (2 - w) * bsbuild(2, 2));
136 bsbuild(3, 1) = 0.5f * (1 - w) * bsbuild(2, 1);
139 for (
int i = 4; i <= bsorder; ++i) {
141 real denom = REAL_RECIP(k);
142 bsbuild(i, i) = denom * w * bsbuild(k, k);
143 for (
int j = 1; j <= i - 2; j++) {
144 bsbuild(i, i - j) = denom
145 * ((w + j) * bsbuild(k, i - j - 1)
146 + (i - j - w) * bsbuild(k, i - j));
148 bsbuild(i, 1) = denom * (1 - w) * bsbuild(k, 1);
154 bsbuild(k, bsorder) = bsbuild(k, bsorder - 1);
155 for (
int i = bsorder - 1; i >= 2; --i) {
156 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
158 bsbuild(k, 1) = -bsbuild(k, 1);
164 bsbuild(k, bsorder - 1) = bsbuild(k, bsorder - 2);
165 for (
int i = bsorder - 2; i >= 2; --i) {
166 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
168 bsbuild(k, 1) = -bsbuild(k, 1);
169 bsbuild(k, bsorder) = bsbuild(k, bsorder - 1);
170 for (
int i = bsorder - 1; i >= 2; --i) {
171 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
173 bsbuild(k, 1) = -bsbuild(k, 1);
179 bsbuild(k, bsorder - 2) = bsbuild(k, bsorder - 3);
180 for (
int i = bsorder - 3; i >= 2; --i) {
181 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
183 bsbuild(k, 1) = -bsbuild(k, 1);
184 bsbuild(k, bsorder - 1) = bsbuild(k, bsorder - 2);
185 for (
int i = bsorder - 2; i >= 2; --i) {
186 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
188 bsbuild(k, 1) = -bsbuild(k, 1);
189 bsbuild(k, bsorder) = bsbuild(k, bsorder - 1);
190 for (
int i = bsorder - 1; i >= 2; --i)
191 bsbuild(k, i) = bsbuild(k, i - 1) - bsbuild(k, i);
192 bsbuild(k, 1) = -bsbuild(k, 1);
196 for (
int i = 1; i <= bsorder; ++i) {
197 for (
int j = 1; j <= LEVEL; ++j) {
198 int offset = (4 * (i - 1) + (j - 1)) *
padded_n + k;
199 thetai[offset] = bsbuild(bsorder - j + 1, i);
#define restrict
Definition: macro.h:51
#define CONSTEXPR
Definition: macro.h:61
__device__ void bsplgen(real w, real *__restrict__ thetai, volatile real *__restrict__ bsbuild_)
B-spline coefficients and derivatives for a single PME atomic site along a particular direction....
Definition: bsplgen.h:29
float real
Definition: precision.h:80
__device__ void bsplgen2(real w, real *__restrict__ thetai, int k, int padded_n, volatile real *__restrict__ bsbuild_)
Definition: bsplgen.h:123