Krita Source Code Documentation
Loading...
Searching...
No Matches
nubasis.cpp
Go to the documentation of this file.
1
2// einspline: a library for creating and evaluating B-splines //
3// Copyright (C) 2007 Kenneth P. Esler, Jr. //
4// //
5// This program is free software; you can redistribute it and/or modify //
6// it under the terms of the GNU General Public License as published by //
7// the Free Software Foundation; either version 2 of the License, or //
8// (at your option) any later version. //
9// //
10// This program is distributed in the hope that it will be useful, //
11// but WITHOUT ANY WARRANTY; without even the implied warranty of //
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13// GNU General Public License for more details. //
14// //
15// You should have received a copy of the GNU General Public License //
16// along with this program; if not, write to the Free Software //
17// Foundation, Inc., 51 Franklin Street, Fifth Floor, //
18// Boston, MA 02110-1301 USA //
20
21#include "nubasis.h"
22#include <stdlib.h>
23
24
25
27create_NUBasis (NUgrid *grid, bool periodic)
28{
29 NUBasis* restrict basis = new NUBasis;
30 basis->grid = grid;
31 basis->periodic = periodic;
32 int N = grid->num_points;
33 basis->xVals = new double[N+5];
34 basis->dxInv = new double[3*(N+2)];
35 for (int i=0; i<N; i++)
36 basis->xVals[i+2] = grid->points[i];
37 double* restrict g = grid->points;
38 // Extend grid points on either end to provide enough points to
39 // construct a full basis set
40 if (!periodic) {
41 basis->xVals[0] = g[ 0 ] - 2.0*(g[1]-g[0]);
42 basis->xVals[1] = g[ 0 ] - 1.0*(g[1]-g[0]);
43 basis->xVals[N+2] = g[N-1] + 1.0*(g[N-1]-g[N-2]);
44 basis->xVals[N+3] = g[N-1] + 2.0*(g[N-1]-g[N-2]);
45 basis->xVals[N+4] = g[N-1] + 3.0*(g[N-1]-g[N-2]);
46 }
47 else {
48 basis->xVals[1] = g[ 0 ] - (g[N-1] - g[N-2]);
49 basis->xVals[0] = g[ 0 ] - (g[N-1] - g[N-3]);
50 basis->xVals[N+2] = g[N-1] + (g[ 1 ] - g[ 0 ]);
51 basis->xVals[N+3] = g[N-1] + (g[ 2 ] - g[ 0 ]);
52 basis->xVals[N+4] = g[N-1] + (g[ 3 ] - g[ 0 ]);
53 }
54 for (int i=0; i<N+2; i++)
55 for (int j=0; j<3; j++)
56 basis->dxInv[3*i+j] =
57 1.0/(basis->xVals[i+j+1]-basis->xVals[i]);
58 return basis;
59}
60
61void
63{
64 delete[] (basis->xVals);
65 delete[] (basis->dxInv);
66 delete (basis);
67}
68
69
70int
72 float bfuncs[4])
73{
74 double b1[2], b2[3];
75 int i = (*basis->grid->reverse_map)(basis->grid, x);
76 int i2 = i+2;
77 double* restrict dxInv = basis->dxInv;
78 double* restrict xVals = basis->xVals;
79
80 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0];
81 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0];
82 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0];
83 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
84 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
85 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1];
86 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0];
87 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] +
88 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
89 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
90 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
91 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2];
92 return i;
93}
94
95
96void
98 float bfuncs[4])
99{
100 int i2 = i+2;
101 double b1[2], b2[3];
102 double x = basis->grid->points[i];
103 double* restrict dxInv = basis->dxInv;
104 double* restrict xVals = basis->xVals;
105
106 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0];
107 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0];
108 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0];
109 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
110 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
111 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1];
112 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0];
113 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] +
114 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
115 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
116 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
117 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2];
118}
119
120int
122 float bfuncs[4], float dbfuncs[4])
123{
124 double b1[2], b2[3];
125 int i = (*basis->grid->reverse_map)(basis->grid, x);
126 int i2 = i+2;
127 double* restrict dxInv = basis->dxInv;
128 double* restrict xVals = basis->xVals;
129
130 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0];
131 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0];
132 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0];
133 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
134 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
135 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1];
136 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0];
137 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] +
138 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
139 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
140 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
141 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2];
142
143 dbfuncs[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]);
144 dbfuncs[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
145 dbfuncs[2] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
146 dbfuncs[3] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]);
147
148 return i;
149}
150
151
152void
154 float bfuncs[4], float dbfuncs[4])
155{
156 double b1[2], b2[3];
157 double x = basis->grid->points[i];
158 int i2 = i+2;
159 double* restrict dxInv = basis->dxInv;
160 double* restrict xVals = basis->xVals;
161
162 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0];
163 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0];
164 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0];
165 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
166 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
167 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1];
168 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0];
169 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] +
170 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
171 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
172 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
173 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2];
174
175 dbfuncs[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]);
176 dbfuncs[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
177 dbfuncs[2] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
178 dbfuncs[3] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]);
179}
180
181
182int
184 float bfuncs[4], float dbfuncs[4], float d2bfuncs[4])
185{
186 double b1[2], b2[3];
187 int i = (*basis->grid->reverse_map)(basis->grid, x);
188 int i2 = i+2;
189 double* restrict dxInv = basis->dxInv;
190 double* restrict xVals = basis->xVals;
191
192 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0];
193 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0];
194 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0];
195 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
196 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
197 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1];
198 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0];
199 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] +
200 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
201 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
202 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
203 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2];
204
205 dbfuncs[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]);
206 dbfuncs[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
207 dbfuncs[2] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
208 dbfuncs[3] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]);
209
210 d2bfuncs[0] = 6.0f * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]);
211 d2bfuncs[1] = 6.0f * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] +
212 dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]);
213 d2bfuncs[2] = 6.0f * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] -
214 dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]);
215 d2bfuncs[3] = 6.0f * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]);
216
217 return i;
218}
219
220
221void
223 float bfuncs[4], float dbfuncs[4], float d2bfuncs[4])
224{
225 double b1[2], b2[3];
226 double x = basis->grid->points[i];
227 int i2 = i+2;
228 double* restrict dxInv = basis->dxInv;
229 double* restrict xVals = basis->xVals;
230
231 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0];
232 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0];
233 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0];
234 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
235 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
236 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1];
237 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0];
238 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] +
239 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
240 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
241 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
242 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2];
243
244 dbfuncs[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]);
245 dbfuncs[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
246 dbfuncs[2] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
247 dbfuncs[3] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]);
248
249 d2bfuncs[0] = 6.0f * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]);
250 d2bfuncs[1] = 6.0f * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] +
251 dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]);
252 d2bfuncs[2] = 6.0f * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] -
253 dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]);
254 d2bfuncs[3] = 6.0f * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]);
255}
256
257
259// Double-precision version //
261int
263 double bfuncs[4])
264{
265 double b1[2], b2[3];
266 int i = (*basis->grid->reverse_map)(basis->grid, x);
267 int i2 = i+2;
268 double* restrict dxInv = basis->dxInv;
269 double* restrict xVals = basis->xVals;
270
271 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0];
272 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0];
273 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0];
274 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
275 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
276 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1];
277 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0];
278 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] +
279 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
280 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
281 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
282 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2];
283 return i;
284}
285
286
287void
289 double bfuncs[4])
290{
291 int i2 = i+2;
292 double b1[2], b2[3];
293 double x = basis->grid->points[i];
294 double* restrict dxInv = basis->dxInv;
295 double* restrict xVals = basis->xVals;
296
297 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0];
298 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0];
299 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0];
300 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
301 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
302 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1];
303 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0];
304 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] +
305 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
306 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
307 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
308 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2];
309}
310
311int
313 double bfuncs[4], double dbfuncs[4])
314{
315 double b1[2], b2[3];
316 int i = (*basis->grid->reverse_map)(basis->grid, x);
317 int i2 = i+2;
318 double* restrict dxInv = basis->dxInv;
319 double* restrict xVals = basis->xVals;
320
321 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0];
322 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0];
323 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0];
324 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
325 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
326 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1];
327 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0];
328 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] +
329 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
330 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
331 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
332 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2];
333
334 dbfuncs[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]);
335 dbfuncs[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
336 dbfuncs[2] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
337 dbfuncs[3] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]);
338
339 return i;
340}
341
342
343void
345 double bfuncs[4], double dbfuncs[4])
346{
347 double b1[2], b2[3];
348 double x = basis->grid->points[i];
349 int i2 = i+2;
350 double* restrict dxInv = basis->dxInv;
351 double* restrict xVals = basis->xVals;
352
353 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0];
354 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0];
355 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0];
356 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
357 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
358 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1];
359 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0];
360 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] +
361 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
362 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
363 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
364 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2];
365
366 dbfuncs[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]);
367 dbfuncs[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
368 dbfuncs[2] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
369 dbfuncs[3] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]);
370}
371
372
373int
375 double bfuncs[4], double dbfuncs[4], double d2bfuncs[4])
376{
377 double b1[2], b2[3];
378 int i = (*basis->grid->reverse_map)(basis->grid, x);
379 int i2 = i+2;
380 double* restrict dxInv = basis->dxInv;
381 double* restrict xVals = basis->xVals;
382
383 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0];
384 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0];
385 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0];
386 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
387 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
388 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1];
389 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0];
390 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] +
391 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
392 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
393 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
394 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2];
395
396 dbfuncs[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]);
397 dbfuncs[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
398 dbfuncs[2] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
399 dbfuncs[3] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]);
400
401 d2bfuncs[0] = 6.0f * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]);
402 d2bfuncs[1] = 6.0f * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] +
403 dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]);
404 d2bfuncs[2] = 6.0f * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] -
405 dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]);
406 d2bfuncs[3] = 6.0f * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]);
407
408 return i;
409}
410
411
412void
414 double bfuncs[4], double dbfuncs[4],
415 double d2bfuncs[4])
416{
417 double b1[2], b2[3];
418 double x = basis->grid->points[i];
419 int i2 = i+2;
420 double* restrict dxInv = basis->dxInv;
421 double* restrict xVals = basis->xVals;
422
423 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0];
424 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0];
425 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0];
426 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
427 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
428 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1];
429 bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0];
430 bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] +
431 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
432 bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
433 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
434 bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2];
435
436 dbfuncs[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]);
437 dbfuncs[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
438 dbfuncs[2] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
439 dbfuncs[3] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]);
440
441 d2bfuncs[0] = 6.0f * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]);
442 d2bfuncs[1] = 6.0f * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] +
443 dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]);
444 d2bfuncs[2] = 6.0f * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] -
445 dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]);
446 d2bfuncs[3] = 6.0f * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]);
447}
448
449
450#ifdef HAVE_SSE2
451typedef union
452{
453 float s[4];
454 __m128 v;
455} uvec4;
456
457typedef union
458{
459 double s[2];
460 __m128d v;
461} uvec2;
462
463int
464get_NUBasis_funcs_sse_s (NUBasis* restrict basis, double x,
465 __m128 *restrict funcs)
466{
467 double b1[2], b2[3];
468 int i = (*basis->grid->reverse_map)(basis->grid, x);
469 int i2 = i+2;
470 double* restrict dxInv = basis->dxInv;
471 double* restrict xVals = basis->xVals;
472
473 uvec4 bfuncs;
474
475
476 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0];
477 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0];
478 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0];
479 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
480 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
481 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1];
482 bfuncs.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0];
483 bfuncs.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] +
484 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
485 bfuncs.s[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
486 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
487 bfuncs.s[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2];
488 *funcs = bfuncs.v;
489 return i;
490}
491
492int
493get_NUBasis_dfuncs_sse_s (NUBasis* restrict basis, double x,
494 __m128 *restrict funcs, __m128 *restrict dfuncs)
495{
496 double b1[2], b2[3];
497 int i = (*basis->grid->reverse_map)(basis->grid, x);
498 int i2 = i+2;
499 double* restrict dxInv = basis->dxInv;
500 double* restrict xVals = basis->xVals;
501 uvec4 bfuncs, dbfuncs;
502
503
504 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0];
505 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0];
506 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0];
507 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
508 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
509 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1];
510 bfuncs.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0];
511 bfuncs.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] +
512 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
513 bfuncs.s[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
514 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
515 bfuncs.s[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2];
516
517 dbfuncs.s[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]);
518 dbfuncs.s[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
519 dbfuncs.s[2] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
520 dbfuncs.s[3] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]);
521
522 *funcs = bfuncs.v;
523 *dfuncs = dbfuncs.v;
524
525 return i;
526}
527
528int
529get_NUBasis_d2funcs_sse_s (NUBasis* restrict basis, double x,
530 __m128 *restrict funcs, __m128 *restrict dfuncs, __m128 *restrict d2funcs)
531{
532 double b1[2], b2[3];
533 int i = (*basis->grid->reverse_map)(basis->grid, x);
534 int i2 = i+2;
535 double* restrict dxInv = basis->dxInv;
536 double* restrict xVals = basis->xVals;
537 uvec4 bfuncs, dbfuncs, d2bfuncs;
538
539 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0];
540 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0];
541 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0];
542 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
543 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
544 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1];
545 bfuncs.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0];
546 bfuncs.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] +
547 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
548 bfuncs.s[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
549 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
550 bfuncs.s[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2];
551
552 dbfuncs.s[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]);
553 dbfuncs.s[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
554 dbfuncs.s[2] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
555 dbfuncs.s[3] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]);
556
557 d2bfuncs.s[0] = 6.0f * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]);
558 d2bfuncs.s[1] = 6.0f * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] +
559 dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]);
560 d2bfuncs.s[2] = 6.0f * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] -
561 dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]);
562 d2bfuncs.s[3] = 6.0f * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]);
563
564 *funcs = bfuncs.v;
565 *dfuncs = dbfuncs.v;
566 *d2funcs = d2bfuncs.v;
567
568 return i;
569}
570
571
573// Double-precision version //
575int
576get_NUBasis_funcs_sse_d (NUBasis* restrict basis, double x,
577 __m128d *restrict f01, __m128d *restrict f23)
578{
579 double b1[2], b2[3];
580 int i = (*basis->grid->reverse_map)(basis->grid, x);
581 int i2 = i+2;
582 double* restrict dxInv = basis->dxInv;
583 double* restrict xVals = basis->xVals;
584 uvec2 bf01, bf23, dbf01, dbf23;
585
586 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0];
587 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0];
588 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0];
589 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
590 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
591 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1];
592 bf01.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0];
593 bf01.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] +
594 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
595 bf23.s[0] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
596 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
597 bf23.s[1] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2];
598
599 *f01 = bf01.v; *f23 = bf23.v;
600 return i;
601}
602
603int
604get_NUBasis_dfuncs_sse_d (NUBasis* restrict basis, double x,
605 __m128d *restrict f01, __m128d *restrict f23,
606 __m128d *restrict df01, __m128d *restrict df23)
607
608{
609 double b1[2], b2[3];
610 int i = (*basis->grid->reverse_map)(basis->grid, x);
611 int i2 = i+2;
612 double* restrict dxInv = basis->dxInv;
613 double* restrict xVals = basis->xVals;
614 uvec2 bf01, bf23, dbf01, dbf23;
615
616 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0];
617 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0];
618 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0];
619 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
620 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
621 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1];
622 bf01.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0];
623 bf01.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] +
624 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
625 bf23.s[0] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
626 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
627 bf23.s[1] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2];
628
629 dbf01.s[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]);
630 dbf01.s[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
631 dbf23.s[0] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
632 dbf23.s[1] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]);
633
634 *f01 = bf01.v; *f23 = bf23.v;
635 *df01 = dbf01.v; *df23 = dbf23.v;
636
637 return i;
638}
639
640int
641get_NUBasis_d2funcs_sse_d (NUBasis* restrict basis, double x,
642 __m128d *restrict f01, __m128d *restrict f23,
643 __m128d *restrict df01, __m128d *restrict df23,
644 __m128d *restrict d2f01, __m128d *restrict d2f23)
645{
646 double b1[2], b2[3];
647 int i = (*basis->grid->reverse_map)(basis->grid, x);
648 int i2 = i+2;
649 double* restrict dxInv = basis->dxInv;
650 double* restrict xVals = basis->xVals;
651 uvec2 bf01, bf23, dbf01, dbf23, d2bf01, d2bf23;
652
653 b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0];
654 b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0];
655 b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0];
656 b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+
657 (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]);
658 b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1];
659 bf01.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0];
660 bf01.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] +
661 (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]);
662 bf23.s[0] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] +
663 (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]);
664 bf23.s[1] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2];
665
666 dbf01.s[0] = -3.0f * (dxInv[3*(i )+2] * b2[0]);
667 dbf01.s[1] = 3.0f * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]);
668 dbf23.s[0] = 3.0f * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]);
669 dbf23.s[1] = 3.0f * (dxInv[3*(i+2)+2] * b2[2]);
670
671 d2bf01.s[0] = 6.0f * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]);
672 d2bf01.s[1] = 6.0f * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] +
673 dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]);
674 d2bf23.s[0] = 6.0f * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] -
675 dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]);
676 d2bf23.s[1] = 6.0f * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]);
677
678 *f01 = bf01.v; *f23 = bf23.v;
679 *df01 = dbf01.v; *df23 = dbf23.v;
680 *d2f01 = d2bf01.v; *d2f23 = d2bf23.v;
681
682 return i;
683}
684
685#endif
qreal v
#define restrict
void get_NUBasis_funcs_si(NUBasis *restrict basis, int i, float bfuncs[4])
Definition nubasis.cpp:97
void get_NUBasis_dfuncs_si(NUBasis *restrict basis, int i, float bfuncs[4], float dbfuncs[4])
Definition nubasis.cpp:153
void destroy_NUBasis(NUBasis *basis)
Definition nubasis.cpp:62
NUBasis * create_NUBasis(NUgrid *grid, bool periodic)
Definition nubasis.cpp:27
void get_NUBasis_d2funcs_di(NUBasis *restrict basis, int i, double bfuncs[4], double dbfuncs[4], double d2bfuncs[4])
Definition nubasis.cpp:413
void get_NUBasis_dfuncs_di(NUBasis *restrict basis, int i, double bfuncs[4], double dbfuncs[4])
Definition nubasis.cpp:344
int get_NUBasis_d2funcs_d(NUBasis *restrict basis, double x, double bfuncs[4], double dbfuncs[4], double d2bfuncs[4])
Definition nubasis.cpp:374
int get_NUBasis_dfuncs_d(NUBasis *restrict basis, double x, double bfuncs[4], double dbfuncs[4])
Definition nubasis.cpp:312
int get_NUBasis_dfuncs_s(NUBasis *restrict basis, double x, float bfuncs[4], float dbfuncs[4])
Definition nubasis.cpp:121
int get_NUBasis_funcs_d(NUBasis *restrict basis, double x, double bfuncs[4])
Definition nubasis.cpp:262
int get_NUBasis_funcs_s(NUBasis *restrict basis, double x, float bfuncs[4])
Definition nubasis.cpp:71
void get_NUBasis_funcs_di(NUBasis *restrict basis, int i, double bfuncs[4])
Definition nubasis.cpp:288
void get_NUBasis_d2funcs_si(NUBasis *restrict basis, int i, float bfuncs[4], float dbfuncs[4], float d2bfuncs[4])
Definition nubasis.cpp:222
int get_NUBasis_d2funcs_s(NUBasis *restrict basis, double x, float bfuncs[4], float dbfuncs[4], float d2bfuncs[4])
Definition nubasis.cpp:183
double *restrict xVals
Definition nubasis.h:33
double *restrict dxInv
Definition nubasis.h:35
NUgrid *restrict grid
Definition nubasis.h:29
int num_points
Definition nugrid.h:35
double *restrict points
Definition nugrid.h:34