Krita Source Code Documentation
Loading...
Searching...
No Matches
bspline_data.cpp
Go to the documentation of this file.
1
2// einspline: a library for creating and evaluating B-splines //
3// Copyright (C) 2007 Kenneth P. Esler, Jr. //
4// //
5// This program is free software; you can redistribute it and/or modify //
6// it under the terms of the GNU General Public License as published by //
7// the Free Software Foundation; either version 2 of the License, or //
8// (at your option) any later version. //
9// //
10// This program is distributed in the hope that it will be useful, //
11// but WITHOUT ANY WARRANTY; without even the implied warranty of //
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13// GNU General Public License for more details. //
14// //
15// You should have received a copy of the GNU General Public License //
16// along with this program; if not, write to the Free Software //
17// Foundation, Inc., 51 Franklin Street, Fifth Floor, //
18// Boston, MA 02110-1301 USA //
20
21//#include "config.h"
22
23#include "local_definitions.h"
24
25/*****************/
26/* SSE Data */
27/*****************/
28
29#ifdef _XOPEN_SOURCE
30#undef _XOPEN_SOURCE
31#endif
32
33#define _XOPEN_SOURCE 600
34
35#ifndef __USE_XOPEN2K
36 #define __USE_XOPEN2K
37#endif
38#include <stdlib.h>
39
40#ifdef HAVE_SSE
41#include <xmmintrin.h>
42
43// Single-precision version of matrices
44__m128 *restrict A_s = (__m128 *)0;
45// There is a problem with alignment of global variables in shared
46// libraries on 32-bit machines.
47// __m128 A0, A1, A2, A3, dA0, dA1, dA2, dA3, d2A0, d2A1, d2A2, d2A3;
48#endif
49
50#ifdef HAVE_SSE2
51// Double-precision version of matrices
52#include <emmintrin.h>
53__m128d *restrict A_d = (__m128d *)0;
54
55// There is a problem with alignment of global variables in shared
56// libraries on 32-bit machines.
57//__m128d A0_01, A0_23, A1_01, A1_23, A2_01, A2_23, A3_01, A3_23,
58// dA0_01, dA0_23, dA1_01, dA1_23, dA2_01, dA2_23, dA3_01, dA3_23,
59// d2A0_01, d2A0_23, d2A1_01, d2A1_23, d2A2_01, d2A2_23, d2A3_01, d2A3_23;
60#endif
61
63{
64#ifdef HAVE_SSE
65 if (A_s == 0) {
66 posix_memalign ((void**)&A_s, 16, (sizeof(__m128)*12));
67 A_s[0] = _mm_setr_ps ( 1.0/6.0, -3.0/6.0, 3.0/6.0, -1.0/6.0 );
68 A_s[0] = _mm_setr_ps ( 1.0/6.0, -3.0/6.0, 3.0/6.0, -1.0/6.0 );
69 A_s[1] = _mm_setr_ps ( 4.0/6.0, 0.0/6.0, -6.0/6.0, 3.0/6.0 );
70 A_s[2] = _mm_setr_ps ( 1.0/6.0, 3.0/6.0, 3.0/6.0, -3.0/6.0 );
71 A_s[3] = _mm_setr_ps ( 0.0/6.0, 0.0/6.0, 0.0/6.0, 1.0/6.0 );
72 A_s[4] = _mm_setr_ps ( -0.5, 1.0, -0.5, 0.0 );
73 A_s[5] = _mm_setr_ps ( 0.0, -2.0, 1.5, 0.0 );
74 A_s[6] = _mm_setr_ps ( 0.5, 1.0, -1.5, 0.0 );
75 A_s[7] = _mm_setr_ps ( 0.0, 0.0, 0.5, 0.0 );
76 A_s[8] = _mm_setr_ps ( 1.0, -1.0, 0.0, 0.0 );
77 A_s[9] = _mm_setr_ps ( -2.0, 3.0, 0.0, 0.0 );
78 A_s[10] = _mm_setr_ps ( 1.0, -3.0, 0.0, 0.0 );
79 A_s[11] = _mm_setr_ps ( 0.0, 1.0, 0.0, 0.0 );
80 }
81
82#endif
83#ifdef HAVE_SSE2
84 if (A_d == 0) {
85 posix_memalign ((void**)&A_d, 16, (sizeof(__m128d)*24));
86 A_d[ 0] = _mm_setr_pd ( 3.0/6.0, -1.0/6.0 );
87 A_d[ 1] = _mm_setr_pd ( 1.0/6.0, -3.0/6.0 );
88 A_d[ 2] = _mm_setr_pd ( -6.0/6.0, 3.0/6.0 );
89 A_d[ 3] = _mm_setr_pd ( 4.0/6.0, 0.0/6.0 );
90 A_d[ 4] = _mm_setr_pd ( 3.0/6.0, -3.0/6.0 );
91 A_d[ 5] = _mm_setr_pd ( 1.0/6.0, 3.0/6.0 );
92 A_d[ 6] = _mm_setr_pd ( 0.0/6.0, 1.0/6.0 );
93 A_d[ 7] = _mm_setr_pd ( 0.0/6.0, 0.0/6.0 );
94 A_d[ 8] = _mm_setr_pd ( -0.5, 0.0 );
95 A_d[ 9] = _mm_setr_pd ( -0.5, 1.0 );
96 A_d[10] = _mm_setr_pd ( 1.5, 0.0 );
97 A_d[11] = _mm_setr_pd ( 0.0, -2.0 );
98 A_d[12] = _mm_setr_pd ( -1.5, 0.0 );
99 A_d[13] = _mm_setr_pd ( 0.5, 1.0 );
100 A_d[14] = _mm_setr_pd ( 0.5, 0.0 );
101 A_d[15] = _mm_setr_pd ( 0.0, 0.0 );
102 A_d[16] = _mm_setr_pd ( 0.0, 0.0 );
103 A_d[17] = _mm_setr_pd ( 1.0, -1.0 );
104 A_d[18] = _mm_setr_pd ( 0.0, 0.0 );
105 A_d[19] = _mm_setr_pd ( -2.0, 3.0 );
106 A_d[20] = _mm_setr_pd ( 0.0, 0.0 );
107 A_d[21] = _mm_setr_pd ( 1.0, -3.0 );
108 A_d[22] = _mm_setr_pd ( 0.0, 0.0 );
109 A_d[23] = _mm_setr_pd ( 0.0, 1.0 );
110 }
111#endif
112}
113
114
115#ifdef USE_ALTIVEC
116vector float A0 = (vector float) ( -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0);
117vector float A1 = (vector float) ( 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0);
118vector float A2 = (vector float) ( -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0);
119vector float A3 = (vector float) ( 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0);
120/* vector float A0 = (vector float) ( -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0); */
121/* vector float A1 = (vector float) ( 3.0/6.0, -6.0/6.0, 3.0/6.0, 0.0/6.0); */
122/* vector float A2 = (vector float) ( -3.0/6.0, 0.0/6.0, 3.0/6.0, 0.0/6.0); */
123/* vector float A3 = (vector float) ( 1.0/6.0, 4.0/6.0, 1.0/6.0, 0.0/6.0); */
124/* vector float A0 = (vector float) ( 1.0/6.0, -3.0/6.0, 3.0/6.0, -1.0/6.0); */
125/* vector float A1 = (vector float) ( 4.0/6.0, 0.0/6.0, -6.0/6.0, 3.0/6.0); */
126/* vector float A2 = (vector float) ( 1.0/6.0, 3.0/6.0, 3.0/6.0, -3.0/6.0); */
127/* vector float A3 = (vector float) ( 0.0/6.0, 0.0/6.0, 0.0/6.0, 1.0/6.0); */
128vector float dA0 = (vector float) ( 0.0, -0.5, 1.0, -0.5 );
129vector float dA1 = (vector float) ( 0.0, 1.5, -2.0, 0.0 );
130vector float dA2 = (vector float) ( 0.0, -1.5, 1.0, 0.5 );
131vector float dA3 = (vector float) ( 0.0, 0.5, 0.0, 0.0 );
132vector float d2A0 = (vector float) ( 0.0, 0.0, -1.0, 1.0 );
133vector float d2A1 = (vector float) ( 0.0, 0.0, 3.0, -2.0 );
134vector float d2A2 = (vector float) ( 0.0, 0.0, -3.0, 1.0 );
135vector float d2A3 = (vector float) ( 0.0, 0.0, 1.0, 0.0 );
136#endif
137
138/*****************/
139/* Standard Data */
140/*****************/
141
143// Single precision //
145const float A44f[16] =
146 { -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0,
147 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0,
148 -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0,
149 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0 };
150const float* restrict Af = A44f;
151
152const float dA44f[16] =
153 { 0.0, -0.5, 1.0, -0.5,
154 0.0, 1.5, -2.0, 0.0,
155 0.0, -1.5, 1.0, 0.5,
156 0.0, 0.5, 0.0, 0.0 };
157const float* restrict dAf = dA44f;
158
159const float d2A44f[16] =
160 { 0.0, 0.0, -1.0, 1.0,
161 0.0, 0.0, 3.0, -2.0,
162 0.0, 0.0, -3.0, 1.0,
163 0.0, 0.0, 1.0, 0.0 };
164const float* restrict d2Af = d2A44f;
165
166const float d3A44f[16] =
167 { 0.0, 0.0, 0.0, -1.0,
168 0.0, 0.0, 0.0, 3.0,
169 0.0, 0.0, 0.0, -3.0,
170 0.0, 0.0, 0.0, 1.0};
171const float* restrict d3Af = d3A44f;
172
174// Double precision //
176const double A44d[16] =
177 { -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0,
178 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0,
179 -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0,
180 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0 };
181const double* restrict Ad = A44d;
182
183const double dA44d[16] =
184 { 0.0, -0.5, 1.0, -0.5,
185 0.0, 1.5, -2.0, 0.0,
186 0.0, -1.5, 1.0, 0.5,
187 0.0, 0.5, 0.0, 0.0 };
188const double* restrict dAd = dA44d;
189
190const double d2A44d[16] =
191 { 0.0, 0.0, -1.0, 1.0,
192 0.0, 0.0, 3.0, -2.0,
193 0.0, 0.0, -3.0, 1.0,
194 0.0, 0.0, 1.0, 0.0 };
195const double* restrict d2Ad = d2A44d;
196
197const double d3A44d[16] =
198 { 0.0, 0.0, 0.0, -1.0,
199 0.0, 0.0, 0.0, 3.0,
200 0.0, 0.0, 0.0, -3.0,
201 0.0, 0.0, 0.0, 1.0};
202const double* restrict d3Ad = d3A44d;
int posix_memalign(void **memptr, size_t alignment, size_t size)
const float dA44f[16]
const float A44f[16]
const double *restrict Ad
void init_sse_data()
const double *restrict d2Ad
const float d3A44f[16]
const double A44d[16]
const float *restrict d3Af
const double *restrict dAd
const double *restrict d3Ad
const double d3A44d[16]
const double d2A44d[16]
const float *restrict dAf
const double dA44d[16]
const float *restrict d2Af
const float d2A44f[16]
const float *restrict Af
#define restrict