25 #define _XOPEN_SOURCE 600
53 float*
restrict data,
int datastride,
55 float abcdInitial[4],
float abcdFinal[4])
57 int M = basis->grid->num_points;
62#ifdef HAVE_C_VARARRAYS
65 float *bands =
new float[4*N];
70 for (
int i=0; i<4; i++) {
71 bands[i] = abcdInitial[i];
72 bands[4*(N-1)+i] = abcdFinal[i];
74 for (
int i=0; i<M; i++) {
76 bands[4*(i+1)+3] = data[datastride*i];
81 bands[4*0+1] /= bands[4*0+0];
82 bands[4*0+2] /= bands[4*0+0];
83 bands[4*0+3] /= bands[4*0+0];
85 bands[4*1+1] -= bands[4*1+0]*bands[4*0+1];
86 bands[4*1+2] -= bands[4*1+0]*bands[4*0+2];
87 bands[4*1+3] -= bands[4*1+0]*bands[4*0+3];
89 bands[4*1+2] /= bands[4*1+1];
90 bands[4*1+3] /= bands[4*1+1];
94 for (
int row=2; row < N-1; row++) {
95 bands[4*(row)+1] -= bands[4*(row)+0]*bands[4*(row-1)+2];
96 bands[4*(row)+3] -= bands[4*(row)+0]*bands[4*(row-1)+3];
97 bands[4*(row)+2] /= bands[4*(row)+1];
98 bands[4*(row)+3] /= bands[4*(row)+1];
99 bands[4*(row)+0] = 0.0;
100 bands[4*(row)+1] = 1.0;
104 bands[4*(M+1)+1] -= bands[4*(M+1)+0]*bands[4*(M-1)+2];
105 bands[4*(M+1)+3] -= bands[4*(M+1)+0]*bands[4*(M-1)+3];
106 bands[4*(M+1)+2] -= bands[4*(M+1)+1]*bands[4*(M)+2];
107 bands[4*(M+1)+3] -= bands[4*(M+1)+1]*bands[4*(M)+3];
108 bands[4*(M+1)+3] /= bands[4*(M+1)+2];
109 bands[4*(M+1)+2] = 1.0;
111 p[pstride*(M+1)] = bands[4*(M+1)+3];
113 for (
int row=M; row>0; row--)
114 p[pstride*(row)] = bands[4*(row)+3] - bands[4*(row)+2]*
p[pstride*(row+1)];
117 p[0] = bands[4*(0)+3] - bands[4*(0)+1]*
p[pstride*1] - bands[4*(0)+2]*
p[pstride*2];
119#ifndef HAVE_C_VARARRAYS
130 float*
restrict data,
int datastride,
133 int M = basis->grid->num_points-1;
138#ifdef HAVE_C_VARARRAYS
139 float bands[4*M], lastCol[M];
141 float *bands =
new float[4*M];
142 float *lastCol =
new float[ M];
146 for (
int i=0; i<M; i++) {
148 bands[4*(i)+3] = data[i*datastride];
153 bands[4*(0)+2] /= bands[4*(0)+1];
154 bands[4*(0)+0] /= bands[4*(0)+1];
155 bands[4*(0)+3] /= bands[4*(0)+1];
156 bands[4*(0)+1] = 1.0;
157 bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0];
158 bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3];
159 bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(0)+2];
160 lastCol[0] = bands[4*(0)+0];
162 for (
int row=1; row < (M-1); row++) {
163 bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2];
164 bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3];
165 lastCol[row] = -bands[4*(row)+0] * lastCol[row-1];
166 bands[4*(row)+0] = 0.0;
167 bands[4*(row)+2] /= bands[4*(row)+1];
168 bands[4*(row)+3] /= bands[4*(row)+1];
169 lastCol[row] /= bands[4*(row)+1];
170 bands[4*(row)+1] = 1.0;
172 bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3];
173 bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row];
174 bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2];
180 bands[4*(M-1)+0] += bands[4*(M-1)+2];
181 bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]);
182 bands[4*(M-1)+3] -= bands[4*(M-1)+0] * bands[4*(M-2)+3];
183 bands[4*(M-1)+3] /= bands[4*(M-1)+1];
184 p[pstride*M] = bands[4*(M-1)+3];
185 for (
int row=M-2; row>=0; row--)
186 p[pstride*(row+1)] = bands[4*(row)+3] -
187 bands[4*(row)+2]*
p[pstride*(row+2)] - lastCol[row]*
p[pstride*M];
189 p[pstride* 0 ] =
p[pstride*M];
190 p[pstride*(M+1)] =
p[pstride*1];
191 p[pstride*(M+2)] =
p[pstride*2];
192#ifndef HAVE_C_VARARRAYS
202 float *data,
int dstride,
203 float *coefs,
int cstride)
208 int M = basis->grid->num_points;
210 float bfuncs[4] = {};
211 float dbfuncs[4] = {};
212 float abcd_left[4] = {};
213 float abcd_right[4] = {};
219 abcd_left[3] = bc.
lVal;
223 abcd_left[3] = bc.
lVal;
231 abcd_right[3] = bc.
rVal;
235 abcd_right[3] = bc.
rVal;
239 abcd_left, abcd_right);
265 spline->
coefs = (
float*)malloc (
sizeof(
float)*N);
298 spline->
coefs = (
float*)malloc (
sizeof(
float)*Nx*Ny);
304 for (
int iy=0; iy<My; iy++) {
308 spline->
coefs+coffset, Ny);
312 for (
int ix=0; ix<Nx; ix++) {
316 spline->
coefs+coffset, 1);
340 int My, Mz, Nx, Ny, Nz;
356 spline->
coefs = (
float*)malloc (
sizeof(
float)*Nx*Ny*Nz);
362 for (
int iy=0; iy<My; iy++)
363 for (
int iz=0; iz<Mz; iz++) {
364 int doffset = iy*Mz+iz;
365 int coffset = iy*Nz+iz;
367 spline->
coefs+coffset, Ny*Nz);
371 for (
int ix=0; ix<Nx; ix++)
372 for (
int iz=0; iz<Nz; iz++) {
373 int doffset = ix*Ny*Nz + iz;
374 int coffset = ix*Ny*Nz + iz;
376 spline->
coefs+coffset, Nz);
380 for (
int ix=0; ix<Nx; ix++)
381 for (
int iy=0; iy<Ny; iy++) {
382 int doffset = (ix*Ny+iy)*Nz;
383 int coffset = (ix*Ny+iy)*Nz;
385 spline->
coefs+coffset, 1);
397 double*
restrict data,
int datastride,
399 double abcdInitial[4],
double abcdFinal[4])
401 int M = basis->grid->num_points;
406#ifdef HAVE_C_VARARRAYS
409 double *bands =
new double[4*N];
413 for (
int i=0; i<4; i++) {
414 bands[i] = abcdInitial[i];
415 bands[4*(N-1)+i] = abcdFinal[i];
417 for (
int i=0; i<M; i++) {
419 bands[4*(i+1)+3] = data[datastride*i];
427 bands[4*0+1] /= bands[4*0+0];
428 bands[4*0+2] /= bands[4*0+0];
429 bands[4*0+3] /= bands[4*0+0];
431 bands[4*1+1] -= bands[4*1+0]*bands[4*0+1];
432 bands[4*1+2] -= bands[4*1+0]*bands[4*0+2];
433 bands[4*1+3] -= bands[4*1+0]*bands[4*0+3];
435 bands[4*1+2] /= bands[4*1+1];
436 bands[4*1+3] /= bands[4*1+1];
440 for (
int row=2; row < N-1; row++) {
441 bands[4*(row)+1] -= bands[4*(row)+0]*bands[4*(row-1)+2];
442 bands[4*(row)+3] -= bands[4*(row)+0]*bands[4*(row-1)+3];
443 bands[4*(row)+2] /= bands[4*(row)+1];
444 bands[4*(row)+3] /= bands[4*(row)+1];
445 bands[4*(row)+0] = 0.0;
446 bands[4*(row)+1] = 1.0;
450 bands[4*(M+1)+1] -= bands[4*(M+1)+0]*bands[4*(M-1)+2];
451 bands[4*(M+1)+3] -= bands[4*(M+1)+0]*bands[4*(M-1)+3];
452 bands[4*(M+1)+2] -= bands[4*(M+1)+1]*bands[4*(M)+2];
453 bands[4*(M+1)+3] -= bands[4*(M+1)+1]*bands[4*(M)+3];
454 bands[4*(M+1)+3] /= bands[4*(M+1)+2];
455 bands[4*(M+1)+2] = 1.0;
457 p[pstride*(M+1)] = bands[4*(M+1)+3];
459 for (
int row=M; row>0; row--)
460 p[pstride*(row)] = bands[4*(row)+3] - bands[4*(row)+2]*
p[pstride*(row+1)];
463 p[0] = bands[4*(0)+3] - bands[4*(0)+1]*
p[pstride*1] - bands[4*(0)+2]*
p[pstride*2];
464#ifndef HAVE_C_VARARRAYS
472 double*
restrict data,
int datastride,
475 int M = basis->grid->num_points-1;
480#ifdef HAVE_C_VARARRAYS
481 double bands[4*M], lastCol[M];
483 double *bands =
new double[4*M];
484 double *lastCol =
new double[ M];
488 for (
int i=0; i<M; i++) {
490 bands[4*(i)+3] = data[i*datastride];
495 bands[4*(0)+2] /= bands[4*(0)+1];
496 bands[4*(0)+0] /= bands[4*(0)+1];
497 bands[4*(0)+3] /= bands[4*(0)+1];
498 bands[4*(0)+1] = 1.0;
499 bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0];
500 bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3];
501 bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(0)+2];
502 lastCol[0] = bands[4*(0)+0];
504 for (
int row=1; row < (M-1); row++) {
505 bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2];
506 bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3];
507 lastCol[row] = -bands[4*(row)+0] * lastCol[row-1];
508 bands[4*(row)+0] = 0.0;
509 bands[4*(row)+2] /= bands[4*(row)+1];
510 bands[4*(row)+3] /= bands[4*(row)+1];
511 lastCol[row] /= bands[4*(row)+1];
512 bands[4*(row)+1] = 1.0;
514 bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3];
515 bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row];
516 bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2];
522 bands[4*(M-1)+0] += bands[4*(M-1)+2];
523 bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]);
524 bands[4*(M-1)+3] -= bands[4*(M-1)+0] * bands[4*(M-2)+3];
525 bands[4*(M-1)+3] /= bands[4*(M-1)+1];
526 p[pstride*M] = bands[4*(M-1)+3];
527 for (
int row=M-2; row>=0; row--)
528 p[pstride*(row+1)] = bands[4*(row)+3] -
529 bands[4*(row)+2]*
p[pstride*(row+2)] - lastCol[row]*
p[pstride*M];
531 p[pstride* 0 ] =
p[pstride*M];
532 p[pstride*(M+1)] =
p[pstride*1];
533 p[pstride*(M+2)] =
p[pstride*2];
534#ifndef HAVE_C_VARARRAYS
544 double *data,
int dstride,
545 double *coefs,
int cstride)
550 int M = basis->grid->num_points;
553 double dbfuncs[4] {};
554 double abcd_left[4] {};
555 double abcd_right[4] {};
562 abcd_left[3] = bc.
lVal;
566 abcd_left[3] = bc.
lVal;
574 abcd_right[3] = bc.
rVal;
578 abcd_right[3] = bc.
rVal;
583 abcd_left, abcd_right);
609 spline->
coefs = (
double*)malloc (
sizeof(
double)*N);
642 spline->
coefs = (
double*)malloc (
sizeof(
double)*Nx*Ny);
648 for (
int iy=0; iy<My; iy++) {
652 spline->
coefs+coffset, Ny);
656 for (
int ix=0; ix<Nx; ix++) {
660 spline->
coefs+coffset, 1);
685 int My, Mz, Nx, Ny, Nz;
700 spline->
coefs = (
double*)malloc (
sizeof(
double)*Nx*Ny*Nz);
706 for (
int iy=0; iy<My; iy++)
707 for (
int iz=0; iz<Mz; iz++) {
708 int doffset = iy*Mz+iz;
709 int coffset = iy*Nz+iz;
711 spline->
coefs+coffset, Ny*Nz);
715 for (
int ix=0; ix<Nx; ix++)
716 for (
int iz=0; iz<Nz; iz++) {
717 int doffset = ix*Ny*Nz + iz;
718 int coffset = ix*Ny*Nz + iz;
720 spline->
coefs+coffset, Nz);
724 for (
int ix=0; ix<Nx; ix++)
725 for (
int iy=0; iy<Ny; iy++) {
726 int doffset = (ix*Ny+iy)*Nz;
727 int coffset = (ix*Ny+iy)*Nz;
729 spline->
coefs+coffset, 1);
752 float *data_r = ((
float*)data );
753 float *data_i = ((
float*)data )+1;
754 float *coefs_r = ((
float*)coefs);
755 float *coefs_i = ((
float*)coefs)+1;
819 for (
int iy=0; iy<My; iy++) {
823 spline->
coefs+coffset, Ny);
827 for (
int ix=0; ix<Nx; ix++) {
831 spline->
coefs+coffset, 1);
854 int My, Mz, Nx, Ny, Nz;
876 for (
int iy=0; iy<My; iy++)
877 for (
int iz=0; iz<Mz; iz++) {
878 int doffset = iy*Mz+iz;
879 int coffset = iy*Nz+iz;
881 spline->
coefs+coffset, Ny*Nz);
885 for (
int ix=0; ix<Nx; ix++)
886 for (
int iz=0; iz<Nz; iz++) {
887 int doffset = ix*Ny*Nz + iz;
888 int coffset = ix*Ny*Nz + iz;
890 spline->
coefs+coffset, Nz);
894 for (
int ix=0; ix<Nx; ix++)
895 for (
int iy=0; iy<Ny; iy++) {
896 int doffset = (ix*Ny+iy)*Nz;
897 int coffset = (ix*Ny+iy)*Nz;
899 spline->
coefs+coffset, 1);
921 double *data_r = ((
double*)data );
922 double *data_i = ((
double*)data )+1;
923 double *coefs_r = ((
double*)coefs);
924 double *coefs_i = ((
double*)coefs)+1;
988 for (
int iy=0; iy<My; iy++) {
992 spline->
coefs+coffset, Ny);
996 for (
int ix=0; ix<Nx; ix++) {
1000 spline->
coefs+coffset, 1);
1026 int My, Mz, Nx, Ny, Nz;
1048 for (
int iy=0; iy<My; iy++)
1049 for (
int iz=0; iz<Mz; iz++) {
1050 int doffset = iy*Mz+iz;
1051 int coffset = iy*Nz+iz;
1053 spline->
coefs+coffset, Ny*Nz);
1066 for (
int ix=0; ix<Nx; ix++)
1067 for (
int iz=0; iz<Nz; iz++) {
1068 int doffset = ix*Ny*Nz + iz;
1069 int coffset = ix*Ny*Nz + iz;
1071 spline->
coefs+coffset, Nz);
1075 for (
int ix=0; ix<Nx; ix++)
1076 for (
int iy=0; iy<Ny; iy++) {
1077 int doffset = (ix*Ny+iy)*Nz;
1078 int coffset = (ix*Ny+iy)*Nz;
1080 spline->
coefs+coffset, 1);
1089 free(spline->
coefs);
complex float complex_float
complex double complex_double
int posix_memalign(void **memptr, size_t alignment, size_t size)
void get_NUBasis_funcs_si(NUBasis *restrict basis, int i, float bfuncs[4])
void get_NUBasis_dfuncs_si(NUBasis *restrict basis, int i, float bfuncs[4], float dbfuncs[4])
void destroy_NUBasis(NUBasis *basis)
NUBasis * create_NUBasis(NUgrid *grid, bool periodic)
void get_NUBasis_d2funcs_di(NUBasis *restrict basis, int i, double bfuncs[4], double dbfuncs[4], double d2bfuncs[4])
void get_NUBasis_dfuncs_di(NUBasis *restrict basis, int i, double bfuncs[4], double dbfuncs[4])
void get_NUBasis_funcs_di(NUBasis *restrict basis, int i, double bfuncs[4])
void get_NUBasis_d2funcs_si(NUBasis *restrict basis, int i, float bfuncs[4], float dbfuncs[4], float d2bfuncs[4])
NUBspline_1d_c * create_NUBspline_1d_c(NUgrid *x_grid, BCtype_c xBC, complex_float *data)
NUBspline_3d_s * create_NUBspline_3d_s(NUgrid *x_grid, NUgrid *y_grid, NUgrid *z_grid, BCtype_s xBC, BCtype_s yBC, BCtype_s zBC, float *data)
void find_NUBcoefs_1d_c(NUBasis *restrict basis, BCtype_c bc, complex_float *data, int dstride, complex_float *coefs, int cstride)
NUBspline_2d_z * create_NUBspline_2d_z(NUgrid *x_grid, NUgrid *y_grid, BCtype_z xBC, BCtype_z yBC, complex_double *data)
void find_NUBcoefs_1d_s(NUBasis *restrict basis, BCtype_s bc, float *data, int dstride, float *coefs, int cstride)
void find_NUBcoefs_1d_z(NUBasis *restrict basis, BCtype_z bc, complex_double *data, int dstride, complex_double *coefs, int cstride)
void destroy_NUBspline(Bspline *spline)
void solve_NUB_periodic_interp_1d_d(NUBasis *restrict basis, double *restrict data, int datastride, double *restrict p, int pstride)
NUBspline_1d_d * create_NUBspline_1d_d(NUgrid *x_grid, BCtype_d xBC, double *data)
NUBspline_1d_s * create_NUBspline_1d_s(NUgrid *x_grid, BCtype_s xBC, float *data)
void solve_NUB_deriv_interp_1d_s(NUBasis *restrict basis, float *restrict data, int datastride, float *restrict p, int pstride, float abcdInitial[4], float abcdFinal[4])
NUBspline_2d_c * create_NUBspline_2d_c(NUgrid *x_grid, NUgrid *y_grid, BCtype_c xBC, BCtype_c yBC, complex_float *data)
void solve_NUB_periodic_interp_1d_s(NUBasis *restrict basis, float *restrict data, int datastride, float *restrict p, int pstride)
NUBspline_3d_z * create_NUBspline_3d_z(NUgrid *x_grid, NUgrid *y_grid, NUgrid *z_grid, BCtype_z xBC, BCtype_z yBC, BCtype_z zBC, complex_double *data)
NUBspline_2d_s * create_NUBspline_2d_s(NUgrid *x_grid, NUgrid *y_grid, BCtype_s xBC, BCtype_s yBC, float *data)
void solve_NUB_deriv_interp_1d_d(NUBasis *restrict basis, double *restrict data, int datastride, double *restrict p, int pstride, double abcdInitial[4], double abcdFinal[4])
NUBspline_3d_c * create_NUBspline_3d_c(NUgrid *x_grid, NUgrid *y_grid, NUgrid *z_grid, BCtype_c xBC, BCtype_c yBC, BCtype_c zBC, complex_float *data)
void find_NUBcoefs_1d_d(NUBasis *restrict basis, BCtype_d bc, double *data, int dstride, double *coefs, int cstride)
NUBspline_1d_z * create_NUBspline_1d_z(NUgrid *x_grid, BCtype_z xBC, complex_double *data)
NUBspline_2d_d * create_NUBspline_2d_d(NUgrid *x_grid, NUgrid *y_grid, BCtype_d xBC, BCtype_d yBC, double *data)
NUBspline_3d_d * create_NUBspline_3d_d(NUgrid *x_grid, NUgrid *y_grid, NUgrid *z_grid, BCtype_d xBC, BCtype_d yBC, BCtype_d zBC, double *data)
NUBasis *restrict x_basis
complex_float *restrict coefs
NUBasis *restrict x_basis
NUBasis *restrict x_basis
complex_double *restrict coefs
NUBasis *restrict x_basis
NUBasis *restrict *restrict y_basis
NUBasis *restrict x_basis
complex_float *restrict coefs
NUBasis *restrict *restrict y_basis
NUBasis *restrict x_basis
NUBasis *restrict *restrict y_basis
NUBasis *restrict x_basis
complex_double *restrict coefs
NUBasis *restrict x_basis
NUBasis *restrict *restrict y_basis
NUBasis *restrict *restrict y_basis
NUBasis *restrict x_basis
complex_float *restrict coefs
NUBasis *restrict *restrict *restrict z_basis
NUBasis *restrict *restrict *restrict z_basis
NUBasis *restrict x_basis
NUBasis *restrict *restrict y_basis
NUBasis *restrict x_basis
NUBasis *restrict *restrict y_basis
NUBasis *restrict *restrict *restrict z_basis
complex_double *restrict coefs
NUgrid *restrict *restrict y_grid
NUBasis *restrict *restrict *restrict z_basis
NUgrid *restrict *restrict *restrict z_grid
NUBasis *restrict x_basis
NUBasis *restrict *restrict y_basis