Krita Source Code Documentation
Loading...
Searching...
No Matches
multi_bspline_eval_std_s.h
Go to the documentation of this file.
1
2// einspline: a library for creating and evaluating B-splines //
3// Copyright (C) 2007 Kenneth P. Esler, Jr. //
4// //
5// This program is free software; you can redistribute it and/or modify //
6// it under the terms of the GNU General Public License as published by //
7// the Free Software Foundation; either version 2 of the License, or //
8// (at your option) any later version. //
9// //
10// This program is distributed in the hope that it will be useful, //
11// but WITHOUT ANY WARRANTY; without even the implied warranty of //
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13// GNU General Public License for more details. //
14// //
15// You should have received a copy of the GNU General Public License //
16// along with this program; if not, write to the Free Software //
17// Foundation, Inc., 51 Franklin Street, Fifth Floor, //
18// Boston, MA 02110-1301 USA //
20
21#ifndef MULTI_BSPLINE_EVAL_STD_S_H
22#define MULTI_BSPLINE_EVAL_STD_S_H
23
24#include <math.h>
25#include <stdio.h>
27
28extern const float* restrict Af;
29extern const float* restrict dAf;
30extern const float* restrict d2Af;
31extern const float* restrict d3Af;
32
33/************************************************************/
34/* 1D double-precision, real evaulation functions */
35/************************************************************/
36inline void
38 double x,
39 float* restrict vals)
40{
41 x -= spline->x_grid.start;
42 float ux = x*spline->x_grid.delta_inv;
43 float ipartx, tx;
44 tx = modff (ux, &ipartx); int ix = (int) ipartx;
45
46 float tpx[4], a[4];
47 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
48 float* restrict coefs = spline->coefs;
49
50 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
51 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
52 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
53 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
54
55 int xs = spline->x_stride;
56
57 for (int n=0; n<spline->num_splines; n++)
58 vals[n] = 0.0;
59
60 for (int i=0; i<4; i++) {
61 float* restrict coefs = spline->coefs + ((ix+i)*xs);
62 for (int n=0; n<spline->num_splines; n++)
63 vals[n] += a[i] * coefs[n];
64 }
65}
66
67
68
69inline void
71 double x,
72 float* restrict vals,
73 float* restrict grads)
74{
75 x -= spline->x_grid.start;
76 float ux = x*spline->x_grid.delta_inv;
77 float ipartx, tx;
78 tx = modff (ux, &ipartx); int ix = (int) ipartx;
79
80 float tpx[4], a[4], da[4];
81 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
82 float* restrict coefs = spline->coefs;
83
84 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
85 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
86 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
87 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
88 da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]);
89 da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]);
90 da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]);
91 da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]);
92
93 int xs = spline->x_stride;
94
95 for (int n=0; n<spline->num_splines; n++) {
96 vals[n] = 0.0;
97 grads[n] = 0.0;
98 }
99
100 for (int i=0; i<4; i++) {
101 float* restrict coefs = spline->coefs + ((ix+i)*xs);
102 for (int n=0; n<spline->num_splines; n++) {
103 vals[n] += a[i] * coefs[n];
104 grads[n] += da[i] * coefs[n];
105 }
106 }
107
108 float dxInv = spline->x_grid.delta_inv;
109 for (int n=0; n<spline->num_splines; n++)
110 grads[n] *= dxInv;
111}
112
113
114inline void
116 double x,
117 float* restrict vals,
118 float* restrict grads,
119 float* restrict lapl)
120{
121 x -= spline->x_grid.start;
122 float ux = x*spline->x_grid.delta_inv;
123 float ipartx, tx;
124 tx = modff (ux, &ipartx); int ix = (int) ipartx;
125
126 float tpx[4], a[4], da[4], d2a[4];
127 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
128 float* restrict coefs = spline->coefs;
129
130 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
131 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
132 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
133 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
134 da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]);
135 da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]);
136 da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]);
137 da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]);
138 d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]);
139 d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]);
140 d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]);
141 d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]);
142
143 int xs = spline->x_stride;
144
145 for (int n=0; n<spline->num_splines; n++) {
146 vals[n] = 0.0;
147 grads[n] = 0.0;
148 lapl[n] = 0.0;
149 }
150
151 for (int i=0; i<4; i++) {
152 float* restrict coefs = spline->coefs + ((ix+i)*xs);
153 for (int n=0; n<spline->num_splines; n++) {
154 vals[n] += a[i] * coefs[n];
155 grads[n] += da[i] * coefs[n];
156 lapl[n] += d2a[i] * coefs[n];
157 }
158 }
159
160 float dxInv = spline->x_grid.delta_inv;
161 for (int n=0; n<spline->num_splines; n++) {
162 grads[n] *= dxInv;
163 lapl [n] *= dxInv*dxInv;
164 }
165}
166
167inline void
169 double x,
170 float* restrict vals,
171 float* restrict grads,
172 float* restrict hess)
173{
174 eval_multi_UBspline_1d_s_vgl (spline, x, vals, grads, hess);
175}
176
177
178/************************************************************/
179/* 2D double-precision, real evaulation functions */
180/************************************************************/
181inline void
183 double x, double y,
184 float* restrict vals)
185{
186 x -= spline->x_grid.start;
187 y -= spline->y_grid.start;
188 float ux = x*spline->x_grid.delta_inv;
189 float uy = y*spline->y_grid.delta_inv;
190 float ipartx, iparty, tx, ty;
191 tx = modff (ux, &ipartx); int ix = (int) ipartx;
192 ty = modff (uy, &iparty); int iy = (int) iparty;
193
194 float tpx[4], tpy[4], a[4], b[4];
195 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
196 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
197 float* restrict coefs = spline->coefs;
198
199 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
200 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
201 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
202 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
203
204 b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]);
205 b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]);
206 b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]);
207 b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]);
208
209 int xs = spline->x_stride;
210 int ys = spline->y_stride;
211
212 for (int n=0; n<spline->num_splines; n++)
213 vals[n] = 0.0;
214
215 for (int i=0; i<4; i++)
216 for (int j=0; j<4; j++) {
217 float prefactor = a[i]*b[j];
218 float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys);
219 for (int n=0; n<spline->num_splines; n++)
220 vals[n] += prefactor*coefs[n];
221 }
222}
223
224
225inline void
227 double x, double y,
228 float* restrict vals,
229 float* restrict grads)
230{
231 x -= spline->x_grid.start;
232 y -= spline->y_grid.start;
233 float ux = x*spline->x_grid.delta_inv;
234 float uy = y*spline->y_grid.delta_inv;
235 float ipartx, iparty, tx, ty;
236 tx = modff (ux, &ipartx); int ix = (int) ipartx;
237 ty = modff (uy, &iparty); int iy = (int) iparty;
238
239 float tpx[4], tpy[4], a[4], b[4], da[4], db[4];
240 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
241 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
242 float* restrict coefs = spline->coefs;
243
244 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
245 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
246 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
247 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
248 da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]);
249 da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]);
250 da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]);
251 da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]);
252
253 b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]);
254 b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]);
255 b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]);
256 b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]);
257 db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]);
258 db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]);
259 db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]);
260 db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]);
261
262 int xs = spline->x_stride;
263 int ys = spline->y_stride;
264
265 for (int n=0; n<spline->num_splines; n++) {
266 vals[n] = 0.0;
267 grads[2*n+0] = grads[2*n+1] = grads[2*n+2] = 0.0;
268 }
269
270 for (int i=0; i<4; i++)
271 for (int j=0; j<4; j++) {
272 float ab = a[i]*b[j];
273 float dab[2];
274 dab[0] = da[i]* b[j];
275 dab[1] = a[i]*db[j];
276
277 float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys);
278 for (int n=0; n<spline->num_splines; n++) {
279 vals [n] += ab *coefs[n];
280 grads[2*n+0] += dab[0]*coefs[n];
281 grads[2*n+1] += dab[1]*coefs[n];
282 }
283 }
284
285 float dxInv = spline->x_grid.delta_inv;
286 float dyInv = spline->y_grid.delta_inv;
287 for (int n=0; n<spline->num_splines; n++) {
288 grads[2*n+0] *= dxInv;
289 grads[2*n+1] *= dyInv;
290 }
291}
292
293inline void
295 double x, double y,
296 float* restrict vals,
297 float* restrict grads,
298 float* restrict lapl)
299{
300 x -= spline->x_grid.start;
301 y -= spline->y_grid.start;
302 float ux = x*spline->x_grid.delta_inv;
303 float uy = y*spline->y_grid.delta_inv;
304 float ipartx, iparty, tx, ty;
305 tx = modff (ux, &ipartx); int ix = (int) ipartx;
306 ty = modff (uy, &iparty); int iy = (int) iparty;
307
308 float tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4];
309 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
310 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
311 float* restrict coefs = spline->coefs;
312
313 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
314 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
315 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
316 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
317 da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]);
318 da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]);
319 da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]);
320 da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]);
321 d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]);
322 d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]);
323 d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]);
324 d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]);
325
326 b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]);
327 b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]);
328 b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]);
329 b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]);
330 db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]);
331 db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]);
332 db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]);
333 db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]);
334 d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]);
335 d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]);
336 d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]);
337 d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]);
338
339 int xs = spline->x_stride;
340 int ys = spline->y_stride;
341
342 float lapl2[2*spline->num_splines];
343 for (int n=0; n<spline->num_splines; n++) {
344 vals[n] = 0.0;
345 grads[2*n+0] = grads[2*n+1] = 0.0;
346 lapl2[2*n+0] = lapl2[2*n+1] = 0.0;
347 }
348
349 for (int i=0; i<4; i++)
350 for (int j=0; j<4; j++) {
351 float ab = a[i]*b[j];
352 float dab[2], d2ab[2];
353 dab[0] = da[i]* b[j];
354 dab[1] = a[i]*db[j];
355 d2ab[0] = d2a[i]* b[j];
356 d2ab[1] = a[i]*d2b[j];
357
358 float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys);
359 for (int n=0; n<spline->num_splines; n++) {
360 vals[n] += ab *coefs[n];
361 grads[2*n+0] += dab[0]*coefs[n];
362 grads[2*n+1] += dab[1]*coefs[n];
363 lapl2[2*n+0] += d2ab[0]*coefs[n];
364 lapl2[2*n+1] += d2ab[1]*coefs[n];
365 }
366 }
367
368 float dxInv = spline->x_grid.delta_inv;
369 float dyInv = spline->y_grid.delta_inv;
370 for (int n=0; n<spline->num_splines; n++) {
371 grads[2*n+0] *= dxInv;
372 grads[2*n+1] *= dyInv;
373 lapl2[2*n+0] *= dxInv*dxInv;
374 lapl2[2*n+1] *= dyInv*dyInv;
375 lapl[n] = lapl2[2*n+0] + lapl2[2*n+1];
376 }
377}
378
379
380
381
382inline void
384 double x, double y,
385 float* restrict vals,
386 float* restrict grads,
387 float* restrict hess)
388{
389 x -= spline->x_grid.start;
390 y -= spline->y_grid.start;
391 float ux = x*spline->x_grid.delta_inv;
392 float uy = y*spline->y_grid.delta_inv;
393 float ipartx, iparty, tx, ty;
394 tx = modff (ux, &ipartx); int ix = (int) ipartx;
395 ty = modff (uy, &iparty); int iy = (int) iparty;
396
397 float tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4];
398 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
399 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
400 float* restrict coefs = spline->coefs;
401
402 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
403 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
404 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
405 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
406 da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]);
407 da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]);
408 da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]);
409 da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]);
410 d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]);
411 d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]);
412 d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]);
413 d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]);
414
415 b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]);
416 b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]);
417 b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]);
418 b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]);
419 db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]);
420 db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]);
421 db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]);
422 db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]);
423 d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]);
424 d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]);
425 d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]);
426 d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]);
427
428 int xs = spline->x_stride;
429 int ys = spline->y_stride;
430
431 for (int n=0; n<spline->num_splines; n++) {
432 vals[n] = 0.0;
433 grads[2*n+0] = grads[2*n+1] = 0.0;
434 for (int i=0; i<4; i++)
435 hess[4*n+i] = 0.0;
436 }
437
438 for (int i=0; i<4; i++)
439 for (int j=0; j<4; j++){
440 float ab = a[i]*b[j];
441 float dab[2], d2ab[3];
442 dab[0] = da[i]* b[j];
443 dab[1] = a[i]*db[j];
444 d2ab[0] = d2a[i] * b[j];
445 d2ab[1] = da[i] * db[j];
446 d2ab[2] = a[i] * d2b[j];
447
448 float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys);
449 for (int n=0; n<spline->num_splines; n++) {
450 vals[n] += ab *coefs[n];
451 grads[2*n+0] += dab[0]*coefs[n];
452 grads[2*n+1] += dab[1]*coefs[n];
453 hess [4*n+0] += d2ab[0]*coefs[n];
454 hess [4*n+1] += d2ab[1]*coefs[n];
455 hess [4*n+3] += d2ab[2]*coefs[n];
456 }
457 }
458
459 float dxInv = spline->x_grid.delta_inv;
460 float dyInv = spline->y_grid.delta_inv;
461 for (int n=0; n<spline->num_splines; n++) {
462 grads[2*n+0] *= dxInv;
463 grads[2*n+1] *= dyInv;
464 hess[4*n+0] *= dxInv*dxInv;
465 hess[4*n+1] *= dxInv*dyInv;
466 hess[4*n+3] *= dyInv*dyInv;
467 // Copy hessian elements into lower half of 3x3 matrix
468 hess[4*n+2] = hess[4*n+1];
469 }
470}
471
472
473
474
475/************************************************************/
476/* 3D double-precision, real evaulation functions */
477/************************************************************/
478inline void
480 double x, double y, double z,
481 float* restrict vals)
482{
483 x -= spline->x_grid.start;
484 y -= spline->y_grid.start;
485 z -= spline->z_grid.start;
486 float ux = x*spline->x_grid.delta_inv;
487 float uy = y*spline->y_grid.delta_inv;
488 float uz = z*spline->z_grid.delta_inv;
489 float ipartx, iparty, ipartz, tx, ty, tz;
490 tx = modff (ux, &ipartx); int ix = (int) ipartx;
491 ty = modff (uy, &iparty); int iy = (int) iparty;
492 tz = modff (uz, &ipartz); int iz = (int) ipartz;
493
494 float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4];
495 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
496 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
497 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0;
498 float* restrict coefs = spline->coefs;
499
500 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
501 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
502 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
503 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
504
505 b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]);
506 b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]);
507 b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]);
508 b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]);
509
510 c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]);
511 c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]);
512 c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]);
513 c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]);
514
515 int xs = spline->x_stride;
516 int ys = spline->y_stride;
517 int zs = spline->z_stride;
518
519 for (int n=0; n<spline->num_splines; n++)
520 vals[n] = 0.0;
521
522 for (int i=0; i<4; i++)
523 for (int j=0; j<4; j++)
524 for (int k=0; k<4; k++) {
525 float abc = a[i]*b[j]*c[k];
526 float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
527 for (int n=0; n<spline->num_splines; n++)
528 vals[n] += abc*coefs[n];
529 }
530}
531
532
533inline void
535 double x, double y, double z,
536 float* restrict vals,
537 float* restrict grads)
538{
539 x -= spline->x_grid.start;
540 y -= spline->y_grid.start;
541 z -= spline->z_grid.start;
542 float ux = x*spline->x_grid.delta_inv;
543 float uy = y*spline->y_grid.delta_inv;
544 float uz = z*spline->z_grid.delta_inv;
545 float ipartx, iparty, ipartz, tx, ty, tz;
546 tx = modff (ux, &ipartx); int ix = (int) ipartx;
547 ty = modff (uy, &iparty); int iy = (int) iparty;
548 tz = modff (uz, &ipartz); int iz = (int) ipartz;
549
550 float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4],
551 da[4], db[4], dc[4];
552 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
553 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
554 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0;
555 float* restrict coefs = spline->coefs;
556
557 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
558 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
559 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
560 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
561 da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]);
562 da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]);
563 da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]);
564 da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]);
565
566 b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]);
567 b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]);
568 b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]);
569 b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]);
570 db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]);
571 db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]);
572 db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]);
573 db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]);
574
575 c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]);
576 c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]);
577 c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]);
578 c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]);
579 dc[0] = (dAf[ 0]*tpz[0] + dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]);
580 dc[1] = (dAf[ 4]*tpz[0] + dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]);
581 dc[2] = (dAf[ 8]*tpz[0] + dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]);
582 dc[3] = (dAf[12]*tpz[0] + dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]);
583
584 int xs = spline->x_stride;
585 int ys = spline->y_stride;
586 int zs = spline->z_stride;
587
588 for (int n=0; n<spline->num_splines; n++) {
589 vals[n] = 0.0;
590 grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0;
591 }
592
593 for (int i=0; i<4; i++)
594 for (int j=0; j<4; j++)
595 for (int k=0; k<4; k++) {
596 float abc = a[i]*b[j]*c[k];
597 float dabc[3];
598 dabc[0] = da[i]* b[j]* c[k];
599 dabc[1] = a[i]*db[j]* c[k];
600 dabc[2] = a[i]* b[j]*dc[k];
601
602 float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
603 for (int n=0; n<spline->num_splines; n++) {
604 vals[n] += abc *coefs[n];
605 grads[3*n+0] += dabc[0]*coefs[n];
606 grads[3*n+1] += dabc[1]*coefs[n];
607 grads[3*n+2] += dabc[2]*coefs[n];
608 }
609 }
610
611 float dxInv = spline->x_grid.delta_inv;
612 float dyInv = spline->y_grid.delta_inv;
613 float dzInv = spline->z_grid.delta_inv;
614 for (int n=0; n<spline->num_splines; n++) {
615 grads[3*n+0] *= dxInv;
616 grads[3*n+1] *= dyInv;
617 grads[3*n+2] *= dzInv;
618 }
619}
620
621
622
623inline void
625 double x, double y, double z,
626 float* restrict vals,
627 float* restrict grads,
628 float* restrict lapl)
629{
630 x -= spline->x_grid.start;
631 y -= spline->y_grid.start;
632 z -= spline->z_grid.start;
633 float ux = x*spline->x_grid.delta_inv;
634 float uy = y*spline->y_grid.delta_inv;
635 float uz = z*spline->z_grid.delta_inv;
636 float ipartx, iparty, ipartz, tx, ty, tz;
637 tx = modff (ux, &ipartx); int ix = (int) ipartx;
638 ty = modff (uy, &iparty); int iy = (int) iparty;
639 tz = modff (uz, &ipartz); int iz = (int) ipartz;
640
641 float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4],
642 da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
643 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
644 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
645 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0;
646 float* restrict coefs = spline->coefs;
647
648 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
649 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
650 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
651 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
652 da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]);
653 da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]);
654 da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]);
655 da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]);
656 d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]);
657 d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]);
658 d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]);
659 d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]);
660
661 b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]);
662 b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]);
663 b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]);
664 b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]);
665 db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]);
666 db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]);
667 db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]);
668 db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]);
669 d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]);
670 d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]);
671 d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]);
672 d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]);
673
674 c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]);
675 c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]);
676 c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]);
677 c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]);
678 dc[0] = (dAf[ 0]*tpz[0] + dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]);
679 dc[1] = (dAf[ 4]*tpz[0] + dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]);
680 dc[2] = (dAf[ 8]*tpz[0] + dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]);
681 dc[3] = (dAf[12]*tpz[0] + dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]);
682 d2c[0] = (d2Af[ 0]*tpz[0] + d2Af[ 1]*tpz[1] + d2Af[ 2]*tpz[2] + d2Af[ 3]*tpz[3]);
683 d2c[1] = (d2Af[ 4]*tpz[0] + d2Af[ 5]*tpz[1] + d2Af[ 6]*tpz[2] + d2Af[ 7]*tpz[3]);
684 d2c[2] = (d2Af[ 8]*tpz[0] + d2Af[ 9]*tpz[1] + d2Af[10]*tpz[2] + d2Af[11]*tpz[3]);
685 d2c[3] = (d2Af[12]*tpz[0] + d2Af[13]*tpz[1] + d2Af[14]*tpz[2] + d2Af[15]*tpz[3]);
686
687 int xs = spline->x_stride;
688 int ys = spline->y_stride;
689 int zs = spline->z_stride;
690
691 float lapl3[3*spline->num_splines];
692 for (int n=0; n<spline->num_splines; n++) {
693 vals[n] = 0.0;
694 grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0;
695 lapl3[3*n+0] = lapl3[3*n+1] = lapl3[3*n+2] = 0.0;
696 }
697
698 for (int i=0; i<4; i++)
699 for (int j=0; j<4; j++)
700 for (int k=0; k<4; k++) {
701 float abc = a[i]*b[j]*c[k];
702 float dabc[3], d2abc[3];
703 dabc[0] = da[i]* b[j]* c[k];
704 dabc[1] = a[i]*db[j]* c[k];
705 dabc[2] = a[i]* b[j]*dc[k];
706 d2abc[0] = d2a[i]* b[j]* c[k];
707 d2abc[1] = a[i]*d2b[j]* c[k];
708 d2abc[2] = a[i]* b[j]*d2c[k];
709
710 float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
711 for (int n=0; n<spline->num_splines; n++) {
712 vals[n] += abc *coefs[n];
713 grads[3*n+0] += dabc[0]*coefs[n];
714 grads[3*n+1] += dabc[1]*coefs[n];
715 grads[3*n+2] += dabc[2]*coefs[n];
716 lapl3[3*n+0] += d2abc[0]*coefs[n];
717 lapl3[3*n+1] += d2abc[1]*coefs[n];
718 lapl3[3*n+2] += d2abc[2]*coefs[n];
719 }
720 }
721
722 float dxInv = spline->x_grid.delta_inv;
723 float dyInv = spline->y_grid.delta_inv;
724 float dzInv = spline->z_grid.delta_inv;
725 for (int n=0; n<spline->num_splines; n++) {
726 grads[3*n+0] *= dxInv;
727 grads[3*n+1] *= dyInv;
728 grads[3*n+2] *= dzInv;
729 lapl3[3*n+0] *= dxInv*dxInv;
730 lapl3[3*n+1] *= dyInv*dyInv;
731 lapl3[3*n+2] *= dzInv*dzInv;
732 lapl[n] = lapl3[3*n+0] + lapl3[3*n+1] + lapl3[3*n+2];
733 }
734}
735
736
737
738inline void
740 double x, double y, double z,
741 float* restrict vals,
742 float* restrict grads,
743 float* restrict hess)
744{
745 x -= spline->x_grid.start;
746 y -= spline->y_grid.start;
747 z -= spline->z_grid.start;
748 float ux = x*spline->x_grid.delta_inv;
749 float uy = y*spline->y_grid.delta_inv;
750 float uz = z*spline->z_grid.delta_inv;
751 float ipartx, iparty, ipartz, tx, ty, tz;
752 tx = modff (ux, &ipartx); int ix = (int) ipartx;
753 ty = modff (uy, &iparty); int iy = (int) iparty;
754 tz = modff (uz, &ipartz); int iz = (int) ipartz;
755
756 float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4],
757 da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4];
758 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
759 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
760 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0;
761 float* restrict coefs = spline->coefs;
762
763 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
764 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
765 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
766 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
767 da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]);
768 da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]);
769 da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]);
770 da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]);
771 d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]);
772 d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]);
773 d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]);
774 d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]);
775
776 b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]);
777 b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]);
778 b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]);
779 b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]);
780 db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]);
781 db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]);
782 db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]);
783 db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]);
784 d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]);
785 d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]);
786 d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]);
787 d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]);
788
789 c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]);
790 c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]);
791 c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]);
792 c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]);
793 dc[0] = (dAf[ 0]*tpz[0] + dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]);
794 dc[1] = (dAf[ 4]*tpz[0] + dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]);
795 dc[2] = (dAf[ 8]*tpz[0] + dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]);
796 dc[3] = (dAf[12]*tpz[0] + dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]);
797 d2c[0] = (d2Af[ 0]*tpz[0] + d2Af[ 1]*tpz[1] + d2Af[ 2]*tpz[2] + d2Af[ 3]*tpz[3]);
798 d2c[1] = (d2Af[ 4]*tpz[0] + d2Af[ 5]*tpz[1] + d2Af[ 6]*tpz[2] + d2Af[ 7]*tpz[3]);
799 d2c[2] = (d2Af[ 8]*tpz[0] + d2Af[ 9]*tpz[1] + d2Af[10]*tpz[2] + d2Af[11]*tpz[3]);
800 d2c[3] = (d2Af[12]*tpz[0] + d2Af[13]*tpz[1] + d2Af[14]*tpz[2] + d2Af[15]*tpz[3]);
801
802 int xs = spline->x_stride;
803 int ys = spline->y_stride;
804 int zs = spline->z_stride;
805
806 for (int n=0; n<spline->num_splines; n++) {
807 vals[n] = 0.0;
808 grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0;
809 for (int i=0; i<9; i++)
810 hess[9*n+i] = 0.0;
811 }
812
813 for (int i=0; i<4; i++)
814 for (int j=0; j<4; j++)
815 for (int k=0; k<4; k++) {
816 float abc = a[i]*b[j]*c[k];
817 float dabc[3], d2abc[6];
818 dabc[0] = da[i]* b[j]* c[k];
819 dabc[1] = a[i]*db[j]* c[k];
820 dabc[2] = a[i]* b[j]*dc[k];
821 d2abc[0] = d2a[i]* b[j]* c[k];
822 d2abc[1] = da[i]* db[j]* c[k];
823 d2abc[2] = da[i]* b[j]* dc[k];
824 d2abc[3] = a[i]*d2b[j]* c[k];
825 d2abc[4] = a[i]* db[j]* dc[k];
826 d2abc[5] = a[i]* b[j]*d2c[k];
827
828 float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
829 for (int n=0; n<spline->num_splines; n++) {
830 vals[n] += abc *coefs[n];
831 grads[3*n+0] += dabc[0]*coefs[n];
832 grads[3*n+1] += dabc[1]*coefs[n];
833 grads[3*n+2] += dabc[2]*coefs[n];
834 hess [9*n+0] += d2abc[0]*coefs[n];
835 hess [9*n+1] += d2abc[1]*coefs[n];
836 hess [9*n+2] += d2abc[2]*coefs[n];
837 hess [9*n+4] += d2abc[3]*coefs[n];
838 hess [9*n+5] += d2abc[4]*coefs[n];
839 hess [9*n+8] += d2abc[5]*coefs[n];
840 }
841 }
842
843 float dxInv = spline->x_grid.delta_inv;
844 float dyInv = spline->y_grid.delta_inv;
845 float dzInv = spline->z_grid.delta_inv;
846 for (int n=0; n<spline->num_splines; n++) {
847 grads[3*n+0] *= dxInv;
848 grads[3*n+1] *= dyInv;
849 grads[3*n+2] *= dzInv;
850 hess [9*n+0] *= dxInv*dxInv;
851 hess [9*n+4] *= dyInv*dyInv;
852 hess [9*n+8] *= dzInv*dzInv;
853 hess [9*n+1] *= dxInv*dyInv;
854 hess [9*n+2] *= dxInv*dzInv;
855 hess [9*n+5] *= dyInv*dzInv;
856 // Copy hessian elements into lower half of 3x3 matrix
857 hess [9*n+3] = hess[9*n+1];
858 hess [9*n+6] = hess[9*n+2];
859 hess [9*n+7] = hess[9*n+5];
860 }
861}
862
863inline void
865 double x, double y, double z,
866 float* restrict vals,
867 float* restrict grads,
868 float* restrict hess,
869 float* restrict gradhess)
870{
871 x -= spline->x_grid.start;
872 y -= spline->y_grid.start;
873 z -= spline->z_grid.start;
874 float ux = x*spline->x_grid.delta_inv;
875 float uy = y*spline->y_grid.delta_inv;
876 float uz = z*spline->z_grid.delta_inv;
877 float ipartx, iparty, ipartz, tx, ty, tz;
878 tx = modff (ux, &ipartx); int ix = (int) ipartx;
879 ty = modff (uy, &iparty); int iy = (int) iparty;
880 tz = modff (uz, &ipartz); int iz = (int) ipartz;
881
882 float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4],
883 da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4],
884 d3a[4], d3b[4], d3c[4];
885 tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0;
886 tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0;
887 tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0;
888 float* restrict coefs = spline->coefs;
889
890 a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]);
891 a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]);
892 a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]);
893 a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]);
894 da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]);
895 da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]);
896 da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]);
897 da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]);
898 d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]);
899 d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]);
900 d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]);
901 d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]);
902 d3a[0] = (/*d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] +*/ d3Af[ 3]*tpx[3]);
903 d3a[1] = (/*d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] +*/ d3Af[ 7]*tpx[3]);
904 d3a[2] = (/*d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] +*/ d3Af[11]*tpx[3]);
905 d3a[3] = (/*d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] +*/ d3Af[15]*tpx[3]);
906
907 b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]);
908 b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]);
909 b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]);
910 b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]);
911 db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]);
912 db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]);
913 db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]);
914 db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]);
915 d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]);
916 d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]);
917 d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]);
918 d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]);
919 d3b[0] = (/*d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] +*/ d3Af[ 3]*tpy[3]);
920 d3b[1] = (/*d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] +*/ d3Af[ 7]*tpy[3]);
921 d3b[2] = (/*d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] +*/ d3Af[11]*tpy[3]);
922 d3b[3] = (/*d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] +*/ d3Af[15]*tpy[3]);
923
924 c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]);
925 c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]);
926 c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]);
927 c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]);
928 dc[0] = (dAf[ 0]*tpz[0] + dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]);
929 dc[1] = (dAf[ 4]*tpz[0] + dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]);
930 dc[2] = (dAf[ 8]*tpz[0] + dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]);
931 dc[3] = (dAf[12]*tpz[0] + dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]);
932 d2c[0] = (d2Af[ 0]*tpz[0] + d2Af[ 1]*tpz[1] + d2Af[ 2]*tpz[2] + d2Af[ 3]*tpz[3]);
933 d2c[1] = (d2Af[ 4]*tpz[0] + d2Af[ 5]*tpz[1] + d2Af[ 6]*tpz[2] + d2Af[ 7]*tpz[3]);
934 d2c[2] = (d2Af[ 8]*tpz[0] + d2Af[ 9]*tpz[1] + d2Af[10]*tpz[2] + d2Af[11]*tpz[3]);
935 d2c[3] = (d2Af[12]*tpz[0] + d2Af[13]*tpz[1] + d2Af[14]*tpz[2] + d2Af[15]*tpz[3]);
936 d3c[0] = (/*d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] +*/ d3Af[ 3]*tpz[3]);
937 d3c[1] = (/*d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] +*/ d3Af[ 7]*tpz[3]);
938 d3c[2] = (/*d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] +*/ d3Af[11]*tpz[3]);
939 d3c[3] = (/*d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] +*/ d3Af[15]*tpz[3]);
940
941 int xs = spline->x_stride;
942 int ys = spline->y_stride;
943 int zs = spline->z_stride;
944
945 for (int n=0; n<spline->num_splines; n++) {
946 vals[n] = 0.0;
947 grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0;
948 for (int i=0; i<9; i++)
949 hess[9*n+i] = 0.0;
950 for (int i=0; i<27; i++)
951 gradhess[27*n+i] = 0.0;
952 }
953
954 for (int i=0; i<4; i++)
955 for (int j=0; j<4; j++)
956 for (int k=0; k<4; k++) {
957 float abc = a[i]*b[j]*c[k];
958 float dabc[3], d2abc[6], d3abc[10];
959 dabc[0] = da[i]* b[j]* c[k];
960 dabc[1] = a[i]*db[j]* c[k];
961 dabc[2] = a[i]* b[j]*dc[k];
962 d2abc[0] = d2a[i]* b[j]* c[k];
963 d2abc[1] = da[i]* db[j]* c[k];
964 d2abc[2] = da[i]* b[j]* dc[k];
965 d2abc[3] = a[i]*d2b[j]* c[k];
966 d2abc[4] = a[i]* db[j]* dc[k];
967 d2abc[5] = a[i]* b[j]*d2c[k];
968
969 d3abc[0] = d3a[i]* b[j]* c[k];
970 d3abc[1] = d2a[i]* db[j]* c[k];
971 d3abc[2] = d2a[i]* b[j]* dc[k];
972 d3abc[3] = da[i]*d2b[j]* c[k];
973 d3abc[4] = da[i]* db[j]* dc[k];
974 d3abc[5] = da[i]* b[j]*d2c[k];
975 d3abc[6] = a[i]*d3b[j]* c[k];
976 d3abc[7] = a[i]*d2b[j]* dc[k];
977 d3abc[8] = a[i]* db[j]*d2c[k];
978 d3abc[9] = a[i]* b[j]*d3c[k];
979
980 float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs);
981 for (int n=0; n<spline->num_splines; n++) {
982 vals[n] += abc *coefs[n];
983 grads[3*n+0] += dabc[0]*coefs[n];
984 grads[3*n+1] += dabc[1]*coefs[n];
985 grads[3*n+2] += dabc[2]*coefs[n];
986 hess [9*n+0] += d2abc[0]*coefs[n];
987 hess [9*n+1] += d2abc[1]*coefs[n];
988 hess [9*n+2] += d2abc[2]*coefs[n];
989 hess [9*n+4] += d2abc[3]*coefs[n];
990 hess [9*n+5] += d2abc[4]*coefs[n];
991 hess [9*n+8] += d2abc[5]*coefs[n];
992
993 gradhess [27*n+0 ] += d3abc[0]*coefs[n];
994 gradhess [27*n+1 ] += d3abc[1]*coefs[n];
995 gradhess [27*n+2 ] += d3abc[2]*coefs[n];
996 gradhess [27*n+4 ] += d3abc[3]*coefs[n];
997 gradhess [27*n+5 ] += d3abc[4]*coefs[n];
998 gradhess [27*n+8 ] += d3abc[5]*coefs[n];
999 gradhess [27*n+13] += d3abc[6]*coefs[n];
1000 gradhess [27*n+14] += d3abc[7]*coefs[n];
1001 gradhess [27*n+17] += d3abc[8]*coefs[n];
1002 gradhess [27*n+26] += d3abc[9]*coefs[n];
1003 }
1004 }
1005
1006 float dxInv = spline->x_grid.delta_inv;
1007 float dyInv = spline->y_grid.delta_inv;
1008 float dzInv = spline->z_grid.delta_inv;
1009 for (int n=0; n<spline->num_splines; n++) {
1010 grads[3*n+0] *= dxInv;
1011 grads[3*n+1] *= dyInv;
1012 grads[3*n+2] *= dzInv;
1013 hess [9*n+0] *= dxInv*dxInv;
1014 hess [9*n+4] *= dyInv*dyInv;
1015 hess [9*n+8] *= dzInv*dzInv;
1016 hess [9*n+1] *= dxInv*dyInv;
1017 hess [9*n+2] *= dxInv*dzInv;
1018 hess [9*n+5] *= dyInv*dzInv;
1019 // Copy hessian elements into lower half of 3x3 matrix
1020 hess [9*n+3] = hess[9*n+1];
1021 hess [9*n+6] = hess[9*n+2];
1022 hess [9*n+7] = hess[9*n+5];
1023
1024 gradhess [27*n+0 ] *= dxInv*dxInv*dxInv;
1025 gradhess [27*n+1 ] *= dxInv*dxInv*dyInv;
1026 gradhess [27*n+2 ] *= dxInv*dxInv*dzInv;
1027 gradhess [27*n+4 ] *= dxInv*dyInv*dyInv;
1028 gradhess [27*n+5 ] *= dxInv*dyInv*dzInv;
1029 gradhess [27*n+8 ] *= dxInv*dzInv*dzInv;
1030 gradhess [27*n+13] *= dyInv*dyInv*dyInv;
1031 gradhess [27*n+14] *= dyInv*dyInv*dzInv;
1032 gradhess [27*n+17] *= dyInv*dzInv*dzInv;
1033 gradhess [27*n+26] *= dzInv*dzInv*dzInv;
1034
1035 // Copy gradhess elements into rest of tensor
1036 gradhess [27*n+9 ] = gradhess [27*n+3 ] = gradhess [27*n+1 ];
1037 gradhess [27*n+18 ] = gradhess [27*n+6 ] = gradhess [27*n+2 ];
1038 gradhess [27*n+22 ] = gradhess [27*n+16 ] = gradhess [27*n+14];
1039 gradhess [27*n+12 ] = gradhess [27*n+10 ] = gradhess [27*n+4 ];
1040 gradhess [27*n+24 ] = gradhess [27*n+20 ] = gradhess [27*n+8 ];
1041 gradhess [27*n+25 ] = gradhess [27*n+23 ] = gradhess [27*n+17];
1042 gradhess [27*n+21 ] = gradhess [27*n+19 ] = gradhess [27*n+15] = gradhess [27*n+11 ] = gradhess [27*n+7 ] = gradhess [27*n+5];
1043
1044 }
1045}
1046
1047#endif
#define restrict
void eval_multi_UBspline_3d_s(multi_UBspline_3d_s *spline, double x, double y, double z, float *restrict vals)
void eval_multi_UBspline_3d_s_vgh(multi_UBspline_3d_s *spline, double x, double y, double z, float *restrict vals, float *restrict grads, float *restrict hess)
void eval_multi_UBspline_1d_s_vgh(multi_UBspline_1d_s *spline, double x, float *restrict vals, float *restrict grads, float *restrict hess)
void eval_multi_UBspline_1d_s_vgl(multi_UBspline_1d_s *spline, double x, float *restrict vals, float *restrict grads, float *restrict lapl)
const float *restrict d3Af
void eval_multi_UBspline_3d_s_vgl(multi_UBspline_3d_s *spline, double x, double y, double z, float *restrict vals, float *restrict grads, float *restrict lapl)
void eval_multi_UBspline_2d_s(multi_UBspline_2d_s *spline, double x, double y, float *restrict vals)
void eval_multi_UBspline_2d_s_vgh(multi_UBspline_2d_s *spline, double x, double y, float *restrict vals, float *restrict grads, float *restrict hess)
void eval_multi_UBspline_3d_s_vghgh(multi_UBspline_3d_s *spline, double x, double y, double z, float *restrict vals, float *restrict grads, float *restrict hess, float *restrict gradhess)
void eval_multi_UBspline_1d_s(multi_UBspline_1d_s *spline, double x, float *restrict vals)
const float *restrict dAf
const float *restrict d2Af
void eval_multi_UBspline_3d_s_vg(multi_UBspline_3d_s *spline, double x, double y, double z, float *restrict vals, float *restrict grads)
void eval_multi_UBspline_2d_s_vg(multi_UBspline_2d_s *spline, double x, double y, float *restrict vals, float *restrict grads)
void eval_multi_UBspline_2d_s_vgl(multi_UBspline_2d_s *spline, double x, double y, float *restrict vals, float *restrict grads, float *restrict lapl)
void eval_multi_UBspline_1d_s_vg(multi_UBspline_1d_s *spline, double x, float *restrict vals, float *restrict grads)
const float *restrict Af
double start
double delta_inv