Krita Source Code Documentation
Loading...
Searching...
No Matches
multi_bspline_eval_std_d.h
Go to the documentation of this file.
1
2// einspline: a library for creating and evaluating B-splines //
3// Copyright (C) 2007 Kenneth P. Esler, Jr. //
4// //
5// This program is free software; you can redistribute it and/or modify //
6// it under the terms of the GNU General Public License as published by //
7// the Free Software Foundation; either version 2 of the License, or //
8// (at your option) any later version. //
9// //
10// This program is distributed in the hope that it will be useful, //
11// but WITHOUT ANY WARRANTY; without even the implied warranty of //
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13// GNU General Public License for more details. //
14// //
15// You should have received a copy of the GNU General Public License //
16// along with this program; if not, write to the Free Software //
17// Foundation, Inc., 51 Franklin Street, Fifth Floor, //
18// Boston, MA 02110-1301 USA //
20
21#ifndef MULTI_BSPLINE_EVAL_STD_D_H
22#define MULTI_BSPLINE_EVAL_STD_D_H
23
24#include <math.h>
25#include <stdio.h>
27
28extern const double* restrict Ad;
29extern const double* restrict dAd;
30extern const double* restrict d2Ad;
31extern const double* restrict d3Ad;
32
33/************************************************************/
34/* 1D double-precision, real evaulation functions */
35/************************************************************/
36inline void
38 double x,
39 double* restrict vals)
40{
41 x -= spline->x_grid.start;
42 double ux = x*spline->x_grid.delta_inv;
43 double ipartx, tx;
44 tx = modf (ux, &ipartx); int ix = (int) ipartx;
45
46 double tpx[4], a[4];
47 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
48 double* restrict coefs = spline->coefs;
49
50 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
51 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
52 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
53 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
54
55 int xs = spline->x_stride;
56
57 for (int n=0; n<spline->num_splines; n++)
58 vals[n] = 0.0;
59
60 for (int i=0; i<4; i++) {
61 double* restrict coefs = spline->coefs + ((ix+i)*xs);
62 for (int n=0; n<spline->num_splines; n++)
63 vals[n] += a[i] * coefs[n];
64 }
65}
66
67
68
69inline void
71 double x,
72 double* restrict vals,
73 double* restrict grads)
74{
75 x -= spline->x_grid.start;
76 double ux = x*spline->x_grid.delta_inv;
77 double ipartx, tx;
78 tx = modf (ux, &ipartx); int ix = (int) ipartx;
79
80 double tpx[4], a[4], da[4];
81 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
82 double* restrict coefs = spline->coefs;
83
84 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
85 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
86 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
87 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
88 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
89 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
90 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
91 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
92
93 int xs = spline->x_stride;
94
95 for (int n=0; n<spline->num_splines; n++) {
96 vals[n] = 0.0;
97 grads[n] = 0.0;
98 }
99
100 for (int i=0; i<4; i++) {
101 double* restrict coefs = spline->coefs + ((ix+i)*xs);
102 for (int n=0; n<spline->num_splines; n++) {
103 vals[n] += a[i] * coefs[n];
104 grads[n] += da[i] * coefs[n];
105 }
106 }
107
108 double dxInv = spline->x_grid.delta_inv;
109 for (int n=0; n<spline->num_splines; n++)
110 grads[n] *= dxInv;
111}
112
113
114inline void
116 double x,
117 double* restrict vals,
118 double* restrict grads,
119 double* restrict lapl)
120{
121 x -= spline->x_grid.start;
122 double ux = x*spline->x_grid.delta_inv;
123 double ipartx, tx;
124 tx = modf (ux, &ipartx); int ix = (int) ipartx;
125
126 double tpx[4], a[4], da[4], d2a[4];
127 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
128 double* restrict coefs = spline->coefs;
129
130 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
131 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
132 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
133 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
134 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
135 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
136 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
137 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
138 d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]);
139 d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]);
140 d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]);
141 d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]);
142
143 int xs = spline->x_stride;
144
145 for (int n=0; n<spline->num_splines; n++) {
146 vals[n] = 0.0;
147 grads[n] = 0.0;
148 lapl[n] = 0.0;
149 }
150
151 for (int i=0; i<4; i++) {
152 double* restrict coefs = spline->coefs + ((ix+i)*xs);
153 for (int n=0; n<spline->num_splines; n++) {
154 vals[n] += a[i] * coefs[n];
155 grads[n] += da[i] * coefs[n];
156 lapl[n] += d2a[i] * coefs[n];
157 }
158 }
159
160 double dxInv = spline->x_grid.delta_inv;
161 for (int n=0; n<spline->num_splines; n++) {
162 grads[n] *= dxInv;
163 lapl [n] *= dxInv*dxInv;
164 }
165}
166
167
168inline void
170 double x,
171 double* restrict vals,
172 double* restrict grads,
173 double* restrict hess)
174{
175 eval_multi_UBspline_1d_d_vgl (spline, x, vals, grads, hess);
176}
177
178
179
180/************************************************************/
181/* 2D double-precision, real evaulation functions */
182/************************************************************/
183inline void
185 double x, double y,
186 double* restrict vals)
187{
188 x -= spline->x_grid.start;
189 y -= spline->y_grid.start;
190 double ux = x*spline->x_grid.delta_inv;
191 double uy = y*spline->y_grid.delta_inv;
192 double ipartx, iparty, tx, ty;
193 tx = modf (ux, &ipartx); int ix = (int) ipartx;
194 ty = modf (uy, &iparty); int iy = (int) iparty;
195
196 double tpx[4], tpy[4], a[4], b[4];
197 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
198 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
199 double* restrict coefs = spline->coefs;
200
201 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
202 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
203 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
204 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
205
206 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
207 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
208 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
209 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
210
211 int xs = spline->x_stride;
212 int ys = spline->y_stride;
213
214 for (int n=0; n<spline->num_splines; n++)
215 vals[n] = 0.0;
216
217 for (int i=0; i<4; i++)
218 for (int j=0; j<4; j++) {
219 double prefactor = a[i]*b[j];
220 double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys);
221 for (int n=0; n<spline->num_splines; n++)
222 vals[n] += prefactor*coefs[n];
223 }
224}
225
226
227inline void
229 double x, double y,
230 double* restrict vals,
231 double* restrict grads)
232{
233 x -= spline->x_grid.start;
234 y -= spline->y_grid.start;
235 double ux = x*spline->x_grid.delta_inv;
236 double uy = y*spline->y_grid.delta_inv;
237 double ipartx, iparty, tx, ty;
238 tx = modf (ux, &ipartx); int ix = (int) ipartx;
239 ty = modf (uy, &iparty); int iy = (int) iparty;
240
241 double tpx[4], tpy[4], a[4], b[4], da[4], db[4];
242 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
243 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
244 double* restrict coefs = spline->coefs;
245
246 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
247 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
248 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
249 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
250 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
251 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
252 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
253 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
254
255 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
256 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
257 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
258 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
259 db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]);
260 db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]);
261 db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]);
262 db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]);
263
264 int xs = spline->x_stride;
265 int ys = spline->y_stride;
266
267 for (int n=0; n<spline->num_splines; n++) {
268 vals[n] = 0.0;
269 grads[2*n+0] = grads[2*n+1] = grads[2*n+2] = 0.0;
270 }
271
272 for (int i=0; i<4; i++)
273 for (int j=0; j<4; j++) {
274 double ab = a[i]*b[j];
275 double dab[2];
276 dab[0] = da[i]* b[j];
277 dab[1] = a[i]*db[j];
278
279 double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys);
280 for (int n=0; n<spline->num_splines; n++) {
281 vals [n] += ab *coefs[n];
282 grads[2*n+0] += dab[0]*coefs[n];
283 grads[2*n+1] += dab[1]*coefs[n];
284 }
285 }
286
287 double dxInv = spline->x_grid.delta_inv;
288 double dyInv = spline->y_grid.delta_inv;
289 for (int n=0; n<spline->num_splines; n++) {
290 grads[2*n+0] *= dxInv;
291 grads[2*n+1] *= dyInv;
292 }
293}
294
295inline void
297 double x, double y,
298 double* restrict vals,
299 double* restrict grads,
300 double* restrict lapl)
301{
302 x -= spline->x_grid.start;
303 y -= spline->y_grid.start;
304 double ux = x*spline->x_grid.delta_inv;
305 double uy = y*spline->y_grid.delta_inv;
306 double ipartx, iparty, tx, ty;
307 tx = modf (ux, &ipartx); int ix = (int) ipartx;
308 ty = modf (uy, &iparty); int iy = (int) iparty;
309
310 double tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4];
311 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
312 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
313 double* restrict coefs = spline->coefs;
314
315 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
316 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
317 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
318 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
319 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
320 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
321 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
322 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
323 d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]);
324 d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]);
325 d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]);
326 d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]);
327
328 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
329 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
330 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
331 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
332 db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]);
333 db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]);
334 db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]);
335 db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]);
336 d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]);
337 d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]);
338 d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]);
339 d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]);
340
341 int xs = spline->x_stride;
342 int ys = spline->y_stride;
343
344 double lapl2[2*spline->num_splines];
345 for (int n=0; n<spline->num_splines; n++) {
346 vals[n] = 0.0;
347 grads[2*n+0] = grads[2*n+1] = 0.0;
348 lapl2[2*n+0] = lapl2[2*n+1] = 0.0;
349 }
350
351 for (int i=0; i<4; i++)
352 for (int j=0; j<4; j++) {
353 double ab = a[i]*b[j];
354 double dab[2], d2ab[2];
355 dab[0] = da[i]* b[j];
356 dab[1] = a[i]*db[j];
357 d2ab[0] = d2a[i]* b[j];
358 d2ab[1] = a[i]*d2b[j];
359
360 double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys);
361 for (int n=0; n<spline->num_splines; n++) {
362 vals[n] += ab *coefs[n];
363 grads[2*n+0] += dab[0]*coefs[n];
364 grads[2*n+1] += dab[1]*coefs[n];
365 lapl2[2*n+0] += d2ab[0]*coefs[n];
366 lapl2[2*n+1] += d2ab[1]*coefs[n];
367 }
368 }
369
370 double dxInv = spline->x_grid.delta_inv;
371 double dyInv = spline->y_grid.delta_inv;
372 for (int n=0; n<spline->num_splines; n++) {
373 grads[2*n+0] *= dxInv;
374 grads[2*n+1] *= dyInv;
375 lapl2[2*n+0] *= dxInv*dxInv;
376 lapl2[2*n+1] *= dyInv*dyInv;
377 lapl[n] = lapl2[2*n+0] + lapl2[2*n+1];
378 }
379}
380
381
382
383
384inline void
386 double x, double y,
387 double* restrict vals,
388 double* restrict grads,
389 double* restrict hess)
390{
391 x -= spline->x_grid.start;
392 y -= spline->y_grid.start;
393 double ux = x*spline->x_grid.delta_inv;
394 double uy = y*spline->y_grid.delta_inv;
395 double ipartx, iparty, tx, ty;
396 tx = modf (ux, &ipartx); int ix = (int) ipartx;
397 ty = modf (uy, &iparty); int iy = (int) iparty;
398
399 double tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4];
400 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
401 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
402 double* restrict coefs = spline->coefs;
403
404 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
405 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
406 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
407 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
408 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
409 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
410 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
411 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
412 d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]);
413 d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]);
414 d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]);
415 d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]);
416
417 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
418 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
419 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
420 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
421 db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]);
422 db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]);
423 db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]);
424 db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]);
425 d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]);
426 d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]);
427 d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]);
428 d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]);
429
430 int xs = spline->x_stride;
431 int ys = spline->y_stride;
432
433 for (int n=0; n<spline->num_splines; n++) {
434 vals[n] = 0.0;
435 grads[2*n+0] = grads[2*n+1] = 0.0;
436 for (int i=0; i<4; i++)
437 hess[4*n+i] = 0.0;
438 }
439
440 for (int i=0; i<4; i++)
441 for (int j=0; j<4; j++){
442 double ab = a[i]*b[j];
443 double dab[2], d2ab[3];
444 dab[0] = da[i]* b[j];
445 dab[1] = a[i]*db[j];
446 d2ab[0] = d2a[i] * b[j];
447 d2ab[1] = da[i] * db[j];
448 d2ab[2] = a[i] * d2b[j];
449
450 double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys);
451 for (int n=0; n<spline->num_splines; n++) {
452 vals[n] += ab *coefs[n];
453 grads[2*n+0] += dab[0]*coefs[n];
454 grads[2*n+1] += dab[1]*coefs[n];
455 hess [4*n+0] += d2ab[0]*coefs[n];
456 hess [4*n+1] += d2ab[1]*coefs[n];
457 hess [4*n+3] += d2ab[2]*coefs[n];
458 }
459 }
460
461 double dxInv = spline->x_grid.delta_inv;
462 double dyInv = spline->y_grid.delta_inv;
463 for (int n=0; n<spline->num_splines; n++) {
464 grads[2*n+0] *= dxInv;
465 grads[2*n+1] *= dyInv;
466 hess[4*n+0] *= dxInv*dxInv;
467 hess[4*n+1] *= dxInv*dyInv;
468 hess[4*n+3] *= dyInv*dyInv;
469 // Copy hessian elements into lower half of 3x3 matrix
470 hess[4*n+2] = hess[4*n+1];
471 }
472}
473
474
475
476
477/************************************************************/
478/* 3D double-precision, real evaulation functions */
479/************************************************************/
480inline void
482 double x, double y, double z,
483 double* restrict vals)
484{
485 x -= spline->x_grid.start;
486 y -= spline->y_grid.start;
487 z -= spline->z_grid.start;
488 double ux = x*spline->x_grid.delta_inv;
489 double uy = y*spline->y_grid.delta_inv;
490 double uz = z*spline->z_grid.delta_inv;
491 double ipartx, iparty, ipartz, tx, ty, tz;
492 tx = modf (ux, &ipartx); int ix = (int) ipartx;
493 ty = modf (uy, &iparty); int iy = (int) iparty;
494 tz = modf (uz, &ipartz); int iz = (int) ipartz;
495
496 double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4];
497 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
498 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
499 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0;
500 double* restrict coefs = spline->coefs;
501
502 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
503 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
504 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
505 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
506
507 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
508 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
509 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
510 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
511
512 c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]);
513 c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]);
514 c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]);
515 c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]);
516
517 int xs = spline->x_stride;
518 int ys = spline->y_stride;
519 int zs = spline->z_stride;
520
521 for (int n=0; n<spline->num_splines; n++)
522 vals[n] = 0.0;
523
524 for (int i=0; i<4; i++)
525 for (int j=0; j<4; j++)
526 for (int k=0; k<4; k++) {
527 double prefactor = a[i]*b[j]*c[k];
528 double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
529 for (int n=0; n<spline->num_splines; n++)
530 vals[n] += prefactor*coefs[n];
531 }
532}
533
534
535inline void
537 double x, double y, double z,
538 double* restrict vals,
539 double* restrict grads)
540{
541 x -= spline->x_grid.start;
542 y -= spline->y_grid.start;
543 z -= spline->z_grid.start;
544 double ux = x*spline->x_grid.delta_inv;
545 double uy = y*spline->y_grid.delta_inv;
546 double uz = z*spline->z_grid.delta_inv;
547 double ipartx, iparty, ipartz, tx, ty, tz;
548 tx = modf (ux, &ipartx); int ix = (int) ipartx;
549 ty = modf (uy, &iparty); int iy = (int) iparty;
550 tz = modf (uz, &ipartz); int iz = (int) ipartz;
551
552 double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4],
553 da[4], db[4], dc[4];
554 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
555 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
556 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0;
557 double* restrict coefs = spline->coefs;
558
559 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
560 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
561 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
562 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
563 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
564 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
565 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
566 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
567
568 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
569 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
570 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
571 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
572 db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]);
573 db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]);
574 db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]);
575 db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]);
576
577 c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]);
578 c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]);
579 c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]);
580 c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]);
581 dc[0] = (dAd[ 0]*tpz[0] + dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]);
582 dc[1] = (dAd[ 4]*tpz[0] + dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]);
583 dc[2] = (dAd[ 8]*tpz[0] + dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]);
584 dc[3] = (dAd[12]*tpz[0] + dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]);
585
586 int xs = spline->x_stride;
587 int ys = spline->y_stride;
588 int zs = spline->z_stride;
589
590 for (int n=0; n<spline->num_splines; n++) {
591 vals[n] = 0.0;
592 grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0;
593 }
594
595 for (int i=0; i<4; i++)
596 for (int j=0; j<4; j++)
597 for (int k=0; k<4; k++) {
598 double abc = a[i]*b[j]*c[k];
599 double dabc[3];
600 dabc[0] = da[i]* b[j]* c[k];
601 dabc[1] = a[i]*db[j]* c[k];
602 dabc[2] = a[i]* b[j]*dc[k];
603
604 double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
605 for (int n=0; n<spline->num_splines; n++) {
606 vals[n] += abc *coefs[n];
607 grads[3*n+0] += dabc[0]*coefs[n];
608 grads[3*n+1] += dabc[1]*coefs[n];
609 grads[3*n+2] += dabc[2]*coefs[n];
610 }
611 }
612
613 double dxInv = spline->x_grid.delta_inv;
614 double dyInv = spline->y_grid.delta_inv;
615 double dzInv = spline->z_grid.delta_inv;
616 for (int n=0; n<spline->num_splines; n++) {
617 grads[3*n+0] *= dxInv;
618 grads[3*n+1] *= dyInv;
619 grads[3*n+2] *= dzInv;
620 }
621}
622
623
624
625inline void
627 double x, double y, double z,
628 double* restrict vals,
629 double* restrict grads,
630 double* restrict lapl)
631{
632 x -= spline->x_grid.start;
633 y -= spline->y_grid.start;
634 z -= spline->z_grid.start;
635 double ux = x*spline->x_grid.delta_inv;
636 double uy = y*spline->y_grid.delta_inv;
637 double uz = z*spline->z_grid.delta_inv;
638 double ipartx, iparty, ipartz, tx, ty, tz;
639 tx = modf (ux, &ipartx); int ix = (int) ipartx;
640 ty = modf (uy, &iparty); int iy = (int) iparty;
641 tz = modf (uz, &ipartz); int iz = (int) ipartz;
642
643 double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4],
644 da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
645 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
646 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
647 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0;
648 double* restrict coefs = spline->coefs;
649
650 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
651 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
652 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
653 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
654 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
655 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
656 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
657 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
658 d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]);
659 d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]);
660 d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]);
661 d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]);
662
663 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
664 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
665 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
666 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
667 db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]);
668 db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]);
669 db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]);
670 db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]);
671 d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]);
672 d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]);
673 d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]);
674 d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]);
675
676 c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]);
677 c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]);
678 c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]);
679 c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]);
680 dc[0] = (dAd[ 0]*tpz[0] + dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]);
681 dc[1] = (dAd[ 4]*tpz[0] + dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]);
682 dc[2] = (dAd[ 8]*tpz[0] + dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]);
683 dc[3] = (dAd[12]*tpz[0] + dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]);
684 d2c[0] = (d2Ad[ 0]*tpz[0] + d2Ad[ 1]*tpz[1] + d2Ad[ 2]*tpz[2] + d2Ad[ 3]*tpz[3]);
685 d2c[1] = (d2Ad[ 4]*tpz[0] + d2Ad[ 5]*tpz[1] + d2Ad[ 6]*tpz[2] + d2Ad[ 7]*tpz[3]);
686 d2c[2] = (d2Ad[ 8]*tpz[0] + d2Ad[ 9]*tpz[1] + d2Ad[10]*tpz[2] + d2Ad[11]*tpz[3]);
687 d2c[3] = (d2Ad[12]*tpz[0] + d2Ad[13]*tpz[1] + d2Ad[14]*tpz[2] + d2Ad[15]*tpz[3]);
688
689 int xs = spline->x_stride;
690 int ys = spline->y_stride;
691 int zs = spline->z_stride;
692
693 double lapl3[3*spline->num_splines];
694 for (int n=0; n<spline->num_splines; n++) {
695 vals[n] = 0.0;
696 grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0;
697 lapl3[3*n+0] = lapl3[3*n+1] = lapl3[3*n+2] = 0.0;
698 }
699
700 for (int i=0; i<4; i++)
701 for (int j=0; j<4; j++)
702 for (int k=0; k<4; k++) {
703 double abc = a[i]*b[j]*c[k];
704 double dabc[3], d2abc[3];
705 dabc[0] = da[i]* b[j]* c[k];
706 dabc[1] = a[i]*db[j]* c[k];
707 dabc[2] = a[i]* b[j]*dc[k];
708 d2abc[0] = d2a[i]* b[j]* c[k];
709 d2abc[1] = a[i]*d2b[j]* c[k];
710 d2abc[2] = a[i]* b[j]*d2c[k];
711
712 double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
713 for (int n=0; n<spline->num_splines; n++) {
714 vals[n] += abc *coefs[n];
715 grads[3*n+0] += dabc[0]*coefs[n];
716 grads[3*n+1] += dabc[1]*coefs[n];
717 grads[3*n+2] += dabc[2]*coefs[n];
718 lapl3[3*n+0] += d2abc[0]*coefs[n];
719 lapl3[3*n+1] += d2abc[1]*coefs[n];
720 lapl3[3*n+2] += d2abc[2]*coefs[n];
721 }
722 }
723
724 double dxInv = spline->x_grid.delta_inv;
725 double dyInv = spline->y_grid.delta_inv;
726 double dzInv = spline->z_grid.delta_inv;
727 for (int n=0; n<spline->num_splines; n++) {
728 grads[3*n+0] *= dxInv;
729 grads[3*n+1] *= dyInv;
730 grads[3*n+2] *= dzInv;
731 lapl3[3*n+0] *= dxInv*dxInv;
732 lapl3[3*n+1] *= dyInv*dyInv;
733 lapl3[3*n+2] *= dzInv*dzInv;
734 lapl[n] = lapl3[3*n+0] + lapl3[3*n+1] + lapl3[3*n+2];
735 }
736}
737
738
739
740inline void
742 double x, double y, double z,
743 double* restrict vals,
744 double* restrict grads,
745 double* restrict hess)
746{
747 x -= spline->x_grid.start;
748 y -= spline->y_grid.start;
749 z -= spline->z_grid.start;
750 double ux = x*spline->x_grid.delta_inv;
751 double uy = y*spline->y_grid.delta_inv;
752 double uz = z*spline->z_grid.delta_inv;
753 double ipartx, iparty, ipartz, tx, ty, tz;
754 tx = modf (ux, &ipartx); int ix = (int) ipartx;
755 ty = modf (uy, &iparty); int iy = (int) iparty;
756 tz = modf (uz, &ipartz); int iz = (int) ipartz;
757
758 double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4],
759 da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
760 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
761 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
762 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0;
763 double* restrict coefs = spline->coefs;
764
765 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
766 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
767 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
768 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
769 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
770 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
771 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
772 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
773 d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]);
774 d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]);
775 d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]);
776 d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]);
777
778 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
779 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
780 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
781 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
782 db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]);
783 db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]);
784 db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]);
785 db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]);
786 d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]);
787 d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]);
788 d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]);
789 d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]);
790
791 c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]);
792 c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]);
793 c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]);
794 c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]);
795 dc[0] = (dAd[ 0]*tpz[0] + dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]);
796 dc[1] = (dAd[ 4]*tpz[0] + dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]);
797 dc[2] = (dAd[ 8]*tpz[0] + dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]);
798 dc[3] = (dAd[12]*tpz[0] + dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]);
799 d2c[0] = (d2Ad[ 0]*tpz[0] + d2Ad[ 1]*tpz[1] + d2Ad[ 2]*tpz[2] + d2Ad[ 3]*tpz[3]);
800 d2c[1] = (d2Ad[ 4]*tpz[0] + d2Ad[ 5]*tpz[1] + d2Ad[ 6]*tpz[2] + d2Ad[ 7]*tpz[3]);
801 d2c[2] = (d2Ad[ 8]*tpz[0] + d2Ad[ 9]*tpz[1] + d2Ad[10]*tpz[2] + d2Ad[11]*tpz[3]);
802 d2c[3] = (d2Ad[12]*tpz[0] + d2Ad[13]*tpz[1] + d2Ad[14]*tpz[2] + d2Ad[15]*tpz[3]);
803
804 int xs = spline->x_stride;
805 int ys = spline->y_stride;
806 int zs = spline->z_stride;
807
808 for (int n=0; n<spline->num_splines; n++) {
809 vals[n] = 0.0;
810 grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0;
811 for (int i=0; i<9; i++)
812 hess[9*n+i] = 0.0;
813 }
814
815 for (int i=0; i<4; i++)
816 for (int j=0; j<4; j++)
817 for (int k=0; k<4; k++) {
818 double abc = a[i]*b[j]*c[k];
819 double dabc[3], d2abc[6];
820 dabc[0] = da[i]* b[j]* c[k];
821 dabc[1] = a[i]*db[j]* c[k];
822 dabc[2] = a[i]* b[j]*dc[k];
823 d2abc[0] = d2a[i]* b[j]* c[k];
824 d2abc[1] = da[i]* db[j]* c[k];
825 d2abc[2] = da[i]* b[j]* dc[k];
826 d2abc[3] = a[i]*d2b[j]* c[k];
827 d2abc[4] = a[i]* db[j]* dc[k];
828 d2abc[5] = a[i]* b[j]*d2c[k];
829
830 double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
831 for (int n=0; n<spline->num_splines; n++) {
832 vals[n] += abc *coefs[n];
833 grads[3*n+0] += dabc[0]*coefs[n];
834 grads[3*n+1] += dabc[1]*coefs[n];
835 grads[3*n+2] += dabc[2]*coefs[n];
836 hess [9*n+0] += d2abc[0]*coefs[n];
837 hess [9*n+1] += d2abc[1]*coefs[n];
838 hess [9*n+2] += d2abc[2]*coefs[n];
839 hess [9*n+4] += d2abc[3]*coefs[n];
840 hess [9*n+5] += d2abc[4]*coefs[n];
841 hess [9*n+8] += d2abc[5]*coefs[n];
842 }
843 }
844
845 double dxInv = spline->x_grid.delta_inv;
846 double dyInv = spline->y_grid.delta_inv;
847 double dzInv = spline->z_grid.delta_inv;
848 for (int n=0; n<spline->num_splines; n++) {
849 grads[3*n+0] *= dxInv;
850 grads[3*n+1] *= dyInv;
851 grads[3*n+2] *= dzInv;
852 hess [9*n+0] *= dxInv*dxInv;
853 hess [9*n+4] *= dyInv*dyInv;
854 hess [9*n+8] *= dzInv*dzInv;
855 hess [9*n+1] *= dxInv*dyInv;
856 hess [9*n+2] *= dxInv*dzInv;
857 hess [9*n+5] *= dyInv*dzInv;
858 // Copy hessian elements into lower half of 3x3 matrix
859 hess [9*n+3] = hess[9*n+1];
860 hess [9*n+6] = hess[9*n+2];
861 hess [9*n+7] = hess[9*n+5];
862 }
863}
864
865inline void
867 double x, double y, double z,
868 double* restrict vals,
869 double* restrict grads,
870 double* restrict hess,
871 double* restrict gradhess)
872{
873 x -= spline->x_grid.start;
874 y -= spline->y_grid.start;
875 z -= spline->z_grid.start;
876 double ux = x*spline->x_grid.delta_inv;
877 double uy = y*spline->y_grid.delta_inv;
878 double uz = z*spline->z_grid.delta_inv;
879 double ipartx, iparty, ipartz, tx, ty, tz;
880 tx = modf (ux, &ipartx); int ix = (int) ipartx;
881 ty = modf (uy, &iparty); int iy = (int) iparty;
882 tz = modf (uz, &ipartz); int iz = (int) ipartz;
883
884 double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4],
885 da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4],
886 d3a[4], d3b[4], d3c[4];
887 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
888 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
889 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0;
890 double* restrict coefs = spline->coefs;
891
892 a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]);
893 a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]);
894 a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]);
895 a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]);
896 da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]);
897 da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]);
898 da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]);
899 da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]);
900 d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]);
901 d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]);
902 d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]);
903 d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]);
904 d3a[0] = (/*d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] +*/ d3Ad[ 3]*tpx[3]);
905 d3a[1] = (/*d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] +*/ d3Ad[ 7]*tpx[3]);
906 d3a[2] = (/*d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] +*/ d3Ad[11]*tpx[3]);
907 d3a[3] = (/*d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] +*/ d3Ad[15]*tpx[3]);
908
909 b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]);
910 b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]);
911 b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]);
912 b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]);
913 db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]);
914 db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]);
915 db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]);
916 db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]);
917 d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]);
918 d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]);
919 d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]);
920 d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]);
921 d3b[0] = (/*d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] +*/ d3Ad[ 3]*tpy[3]);
922 d3b[1] = (/*d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] +*/ d3Ad[ 7]*tpy[3]);
923 d3b[2] = (/*d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] +*/ d3Ad[11]*tpy[3]);
924 d3b[3] = (/*d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] +*/ d3Ad[15]*tpy[3]);
925
926 c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]);
927 c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]);
928 c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]);
929 c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]);
930 dc[0] = (dAd[ 0]*tpz[0] + dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]);
931 dc[1] = (dAd[ 4]*tpz[0] + dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]);
932 dc[2] = (dAd[ 8]*tpz[0] + dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]);
933 dc[3] = (dAd[12]*tpz[0] + dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]);
934 d2c[0] = (d2Ad[ 0]*tpz[0] + d2Ad[ 1]*tpz[1] + d2Ad[ 2]*tpz[2] + d2Ad[ 3]*tpz[3]);
935 d2c[1] = (d2Ad[ 4]*tpz[0] + d2Ad[ 5]*tpz[1] + d2Ad[ 6]*tpz[2] + d2Ad[ 7]*tpz[3]);
936 d2c[2] = (d2Ad[ 8]*tpz[0] + d2Ad[ 9]*tpz[1] + d2Ad[10]*tpz[2] + d2Ad[11]*tpz[3]);
937 d2c[3] = (d2Ad[12]*tpz[0] + d2Ad[13]*tpz[1] + d2Ad[14]*tpz[2] + d2Ad[15]*tpz[3]);
938 d3c[0] = (/*d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] +*/ d3Ad[ 3]*tpz[3]);
939 d3c[1] = (/*d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] +*/ d3Ad[ 7]*tpz[3]);
940 d3c[2] = (/*d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] +*/ d3Ad[11]*tpz[3]);
941 d3c[3] = (/*d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] +*/ d3Ad[15]*tpz[3]);
942
943 int xs = spline->x_stride;
944 int ys = spline->y_stride;
945 int zs = spline->z_stride;
946
947 for (int n=0; n<spline->num_splines; n++) {
948 vals[n] = 0.0;
949 grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0;
950 for (int i=0; i<9; i++)
951 hess[9*n+i] = 0.0;
952 for (int i=0; i<27; i++)
953 gradhess[27*n+i] = 0.0;
954 }
955
956 for (int i=0; i<4; i++)
957 for (int j=0; j<4; j++)
958 for (int k=0; k<4; k++) {
959 double abc = a[i]*b[j]*c[k];
960 double dabc[3], d2abc[6], d3abc[10];
961 dabc[0] = da[i]* b[j]* c[k];
962 dabc[1] = a[i]*db[j]* c[k];
963 dabc[2] = a[i]* b[j]*dc[k];
964 d2abc[0] = d2a[i]* b[j]* c[k];
965 d2abc[1] = da[i]* db[j]* c[k];
966 d2abc[2] = da[i]* b[j]* dc[k];
967 d2abc[3] = a[i]*d2b[j]* c[k];
968 d2abc[4] = a[i]* db[j]* dc[k];
969 d2abc[5] = a[i]* b[j]*d2c[k];
970
971 d3abc[0] = d3a[i]* b[j]* c[k];
972 d3abc[1] = d2a[i]* db[j]* c[k];
973 d3abc[2] = d2a[i]* b[j]* dc[k];
974 d3abc[3] = da[i]*d2b[j]* c[k];
975 d3abc[4] = da[i]* db[j]* dc[k];
976 d3abc[5] = da[i]* b[j]*d2c[k];
977 d3abc[6] = a[i]*d3b[j]* c[k];
978 d3abc[7] = a[i]*d2b[j]* dc[k];
979 d3abc[8] = a[i]* db[j]*d2c[k];
980 d3abc[9] = a[i]* b[j]*d3c[k];
981
982 double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
983 for (int n=0; n<spline->num_splines; n++) {
984 vals[n] += abc *coefs[n];
985 grads[3*n+0] += dabc[0]*coefs[n];
986 grads[3*n+1] += dabc[1]*coefs[n];
987 grads[3*n+2] += dabc[2]*coefs[n];
988 hess [9*n+0] += d2abc[0]*coefs[n];
989 hess [9*n+1] += d2abc[1]*coefs[n];
990 hess [9*n+2] += d2abc[2]*coefs[n];
991 hess [9*n+4] += d2abc[3]*coefs[n];
992 hess [9*n+5] += d2abc[4]*coefs[n];
993 hess [9*n+8] += d2abc[5]*coefs[n];
994
995 gradhess [27*n+0 ] += d3abc[0]*coefs[n];
996 gradhess [27*n+1 ] += d3abc[1]*coefs[n];
997 gradhess [27*n+2 ] += d3abc[2]*coefs[n];
998 gradhess [27*n+4 ] += d3abc[3]*coefs[n];
999 gradhess [27*n+5 ] += d3abc[4]*coefs[n];
1000 gradhess [27*n+8 ] += d3abc[5]*coefs[n];
1001 gradhess [27*n+13] += d3abc[6]*coefs[n];
1002 gradhess [27*n+14] += d3abc[7]*coefs[n];
1003 gradhess [27*n+17] += d3abc[8]*coefs[n];
1004 gradhess [27*n+26] += d3abc[9]*coefs[n];
1005 }
1006 }
1007
1008 double dxInv = spline->x_grid.delta_inv;
1009 double dyInv = spline->y_grid.delta_inv;
1010 double dzInv = spline->z_grid.delta_inv;
1011 for (int n=0; n<spline->num_splines; n++) {
1012 grads[3*n+0] *= dxInv;
1013 grads[3*n+1] *= dyInv;
1014 grads[3*n+2] *= dzInv;
1015 hess [9*n+0] *= dxInv*dxInv;
1016 hess [9*n+4] *= dyInv*dyInv;
1017 hess [9*n+8] *= dzInv*dzInv;
1018 hess [9*n+1] *= dxInv*dyInv;
1019 hess [9*n+2] *= dxInv*dzInv;
1020 hess [9*n+5] *= dyInv*dzInv;
1021 // Copy hessian elements into lower half of 3x3 matrix
1022 hess [9*n+3] = hess[9*n+1];
1023 hess [9*n+6] = hess[9*n+2];
1024 hess [9*n+7] = hess[9*n+5];
1025
1026 gradhess [27*n+0 ] *= dxInv*dxInv*dxInv;
1027 gradhess [27*n+1 ] *= dxInv*dxInv*dyInv;
1028 gradhess [27*n+2 ] *= dxInv*dxInv*dzInv;
1029 gradhess [27*n+4 ] *= dxInv*dyInv*dyInv;
1030 gradhess [27*n+5 ] *= dxInv*dyInv*dzInv;
1031 gradhess [27*n+8 ] *= dxInv*dzInv*dzInv;
1032 gradhess [27*n+13] *= dyInv*dyInv*dyInv;
1033 gradhess [27*n+14] *= dyInv*dyInv*dzInv;
1034 gradhess [27*n+17] *= dyInv*dzInv*dzInv;
1035 gradhess [27*n+26] *= dzInv*dzInv*dzInv;
1036
1037 // Copy gradhess elements into rest of tensor
1038 gradhess [27*n+9 ] = gradhess [27*n+3 ] = gradhess [27*n+1 ];
1039 gradhess [27*n+18 ] = gradhess [27*n+6 ] = gradhess [27*n+2 ];
1040 gradhess [27*n+22 ] = gradhess [27*n+16 ] = gradhess [27*n+14];
1041 gradhess [27*n+12 ] = gradhess [27*n+10 ] = gradhess [27*n+4 ];
1042 gradhess [27*n+24 ] = gradhess [27*n+20 ] = gradhess [27*n+8 ];
1043 gradhess [27*n+25 ] = gradhess [27*n+23 ] = gradhess [27*n+17];
1044 gradhess [27*n+21 ] = gradhess [27*n+19 ] = gradhess [27*n+15] = gradhess [27*n+11 ] = gradhess [27*n+7 ] = gradhess [27*n+5];
1045
1046 }
1047}
1048
1049#endif
#define restrict
const double *restrict Ad
const double *restrict d2Ad
void eval_multi_UBspline_2d_d_vgl(multi_UBspline_2d_d *spline, double x, double y, double *restrict vals, double *restrict grads, double *restrict lapl)
void eval_multi_UBspline_3d_d_vgh(multi_UBspline_3d_d *spline, double x, double y, double z, double *restrict vals, double *restrict grads, double *restrict hess)
void eval_multi_UBspline_3d_d(multi_UBspline_3d_d *spline, double x, double y, double z, double *restrict vals)
void eval_multi_UBspline_1d_d_vgh(multi_UBspline_1d_d *spline, double x, double *restrict vals, double *restrict grads, double *restrict hess)
void eval_multi_UBspline_1d_d_vgl(multi_UBspline_1d_d *spline, double x, double *restrict vals, double *restrict grads, double *restrict lapl)
void eval_multi_UBspline_2d_d_vg(multi_UBspline_2d_d *spline, double x, double y, double *restrict vals, double *restrict grads)
void eval_multi_UBspline_2d_d(multi_UBspline_2d_d *spline, double x, double y, double *restrict vals)
const double *restrict dAd
void eval_multi_UBspline_3d_d_vgl(multi_UBspline_3d_d *spline, double x, double y, double z, double *restrict vals, double *restrict grads, double *restrict lapl)
void eval_multi_UBspline_1d_d(multi_UBspline_1d_d *spline, double x, double *restrict vals)
const double *restrict d3Ad
void eval_multi_UBspline_2d_d_vgh(multi_UBspline_2d_d *spline, double x, double y, double *restrict vals, double *restrict grads, double *restrict hess)
void eval_multi_UBspline_3d_d_vg(multi_UBspline_3d_d *spline, double x, double y, double z, double *restrict vals, double *restrict grads)
void eval_multi_UBspline_3d_d_vghgh(multi_UBspline_3d_d *spline, double x, double y, double z, double *restrict vals, double *restrict grads, double *restrict hess, double *restrict gradhess)
void eval_multi_UBspline_1d_d_vg(multi_UBspline_1d_d *spline, double x, double *restrict vals, double *restrict grads)
double start
double delta_inv