Krita Source Code Documentation
Loading...
Searching...
No Matches
kis_convolution_worker_fft.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: 2010 Edward Apap <schumifer@hotmail.com>
3 * SPDX-FileCopyrightText: 2011 José Luis Vergara Toloza <pentalis@gmail.com>
4 *
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 */
7
8#ifndef KIS_CONVOLUTION_WORKER_FFT_H
9#define KIS_CONVOLUTION_WORKER_FFT_H
10
11#include <iostream>
12
13#include <KoChannelInfo.h>
14
16#include "kis_math_toolbox.h"
17
18#include <QMutex>
19#include <QVector>
20#include <QTextStream>
21#include <QFile>
22#include <QDir>
23
24#include <KisPortingUtils.h>
25
26#include <fftw3.h>
27
28template<class _IteratorFactory_> class KisConvolutionWorkerFFT;
30{
31private:
32 static QMutex fftwMutex;
33 template<class _IteratorFactory_> friend class KisConvolutionWorkerFFT;
34};
35
37
38
39template<class _IteratorFactory_>
40class KisConvolutionWorkerFFT : public KisConvolutionWorker<_IteratorFactory_>
41{
42public:
44 : KisConvolutionWorker<_IteratorFactory_>(painter, progress)
45 {
46 }
47
51
53 const KisPaintDeviceSP src,
54 QPoint srcPos,
55 QPoint dstPos,
56 QSize areaSize,
57 const QRect &dataRect) override
58 {
59 // Make the area we cover as small as possible
60 if (this->m_painter->selection())
61 {
62 QRect r = this->m_painter->selection()->selectedRect().intersected(QRect(srcPos, areaSize));
63 dstPos += r.topLeft() - srcPos;
64 srcPos = r.topLeft();
65 areaSize = r.size();
66 }
67
68 if (areaSize.width() == 0 || areaSize.height() == 0)
69 return;
70
72 if (isInterrupted()) return;
73
74 const quint32 halfKernelWidth = (kernel->width() - 1) / 2;
75 const quint32 halfKernelHeight = (kernel->height() - 1) / 2;
76
77 m_fftWidth = areaSize.width() + 4 * halfKernelWidth;
78 m_fftHeight = areaSize.height() + 2 * halfKernelHeight;
79
85 //optimumDimensions(m_fftWidth, m_fftHeight);
86
88 m_extraMem = (m_fftWidth % 2) ? 1 : 2;
89
90 // create and fill kernel
91 m_kernelFFT = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * m_fftLength);
92 memset(m_kernelFFT, 0, sizeof(fftw_complex) * m_fftLength);
94
95 // find out which channels need convolving
96 QList<KoChannelInfo*> convChannelList = this->convolvableChannelList(src);
97
98 m_channelFFT.resize(convChannelList.count());
99 for (auto i = m_channelFFT.begin(); i != m_channelFFT.end(); ++i) {
100 *i = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * m_fftLength);
101 }
102
103 const double kernelFactor = kernel->factor() ? kernel->factor() : 1;
104 const double fftScale = 1.0 / (m_fftHeight * m_fftWidth) / kernelFactor;
105
106 FFTInfo info (fftScale, convChannelList, kernel, this->m_painter->device()->colorSpace());
107 int cacheRowStride = m_fftWidth + m_extraMem;
108
110 QRect(srcPos.x() - halfKernelWidth,
111 srcPos.y() - halfKernelHeight,
114 cacheRowStride,
115 info, dataRect);
116
117 addToProgress(10);
118 if (isInterrupted()) return;
119
120 // calculate number off fft operations required for progress reporting
121 const float progressPerFFT = (100 - 30) / (double)(convChannelList.count() * 2 + 1);
122
123 // perform FFT
124 fftw_plan fftwPlanForward, fftwPlanBackward;
125
127 fftwPlanForward = fftw_plan_dft_r2c_2d(m_fftHeight, m_fftWidth, (double*)m_kernelFFT, m_kernelFFT, FFTW_ESTIMATE);
128 fftwPlanBackward = fftw_plan_dft_c2r_2d(m_fftHeight, m_fftWidth, m_kernelFFT, (double*)m_kernelFFT, FFTW_ESTIMATE);
130
131 fftw_execute(fftwPlanForward);
132 addToProgress(progressPerFFT);
133 if (isInterrupted()) return;
134
135 for (auto k = m_channelFFT.begin(); k != m_channelFFT.end(); ++k)
136 {
137 fftw_execute_dft_r2c(fftwPlanForward, (double*)(*k), *k);
138 addToProgress(progressPerFFT);
139 if (isInterrupted()) return;
140
142
143 fftw_execute_dft_c2r(fftwPlanBackward, *k, (double*)*k);
144 addToProgress(progressPerFFT);
145 if (isInterrupted()) return;
146 }
147
149 fftw_destroy_plan(fftwPlanForward);
150 fftw_destroy_plan(fftwPlanBackward);
152
153
154 writeResultToDevice(QRect(dstPos.x(), dstPos.y(), areaSize.width(), areaSize.height()),
155 cacheRowStride, halfKernelWidth, halfKernelHeight,
156 info, dataRect);
157
158 addToProgress(20);
159 cleanUp();
160 }
161
162 struct FFTInfo {
163 FFTInfo(qreal _fftScale,
164 const QList<KoChannelInfo*> &_convChannelList,
165 const KisConvolutionKernelSP kernel,
166 const KoColorSpace */*colorSpace*/)
167 : fftScale(_fftScale),
168 convChannelList(_convChannelList)
169 {
170 KisMathToolbox mathToolbox;
171
172 for (int i = 0; i < convChannelList.count(); ++i) {
173 minClamp.append(mathToolbox.minChannelValue(convChannelList[i]));
174 maxClamp.append(mathToolbox.maxChannelValue(convChannelList[i]));
175 absoluteOffset.append((maxClamp[i] - minClamp[i]) * kernel->offset());
176
177 if (convChannelList[i]->channelType() == KoChannelInfo::ALPHA) {
178 alphaCachePos = i;
179 alphaRealPos = convChannelList[i]->pos();
180 }
181 }
182
183 toDoubleFuncPtr.resize(convChannelList.count());
184 fromDoubleFuncPtr.resize(convChannelList.count());
186
187 bool result = mathToolbox.getToDoubleChannelPtr(convChannelList, toDoubleFuncPtr);
190
191 KIS_ASSERT(result);
192 }
193
194 inline int numChannels() const {
195 return convChannelList.size();
196 }
197
198
202
203 qreal fftScale {0.0};
205
209
211 int alphaRealPos {-1};
212 };
213
215 const QRect &rect,
216 const int cacheRowStride,
217 const FFTInfo &info,
218 const QRect &dataRect) {
219
220 typename _IteratorFactory_::HLineConstIterator hitSrc =
221 _IteratorFactory_::createHLineConstIterator(src,
222 rect.x(), rect.y(), rect.width(),
223 dataRect);
224
225 const int channelCount = info.numChannels();
226 QVector<double*> channelPtr(channelCount);
227 const auto channelPtrBegin = channelPtr.begin();
228 const auto channelPtrEnd = channelPtr.end();
229
230 auto iFFt = m_channelFFT.constBegin();
231 for (auto i = channelPtrBegin; i != channelPtrEnd; ++i, ++iFFt) {
232 *i = (double*)*iFFt;
233 }
234
235 // prepare cache, reused in all loops
236 QVector<double*> cacheRowStart(channelCount);
237 const auto cacheRowStartBegin = cacheRowStart.begin();
238
239 for (int y = 0; y < rect.height(); ++y) {
240 // cache current channelPtr in cacheRowStart
241 memcpy(cacheRowStart.data(), channelPtr.data(), channelCount * sizeof(double*));
242
243 for (int x = 0; x < rect.width(); ++x) {
244 const quint8 *data = hitSrc->oldRawData();
245
246 // no alpha is a rare case, so just multiply by 1.0 in that case
247 double alphaValue = info.alphaRealPos >= 0 ?
248 info.toDoubleFuncPtr[info.alphaCachePos](data, info.alphaRealPos) : 1.0;
249 int k = 0;
250 for (auto i = channelPtrBegin; i != channelPtrEnd; ++i, ++k) {
251 if (k != info.alphaCachePos) {
252 const quint32 channelPos = info.convChannelList[k]->pos();
253 **i = info.toDoubleFuncPtr[k](data, channelPos) * alphaValue;
254 } else {
255 **i = alphaValue;
256 }
257
258 ++(*i);
259 }
260
261 hitSrc->nextPixel();
262 }
263
264 auto iRowStart = cacheRowStartBegin;
265 for (auto i = channelPtrBegin; i != channelPtrEnd; ++i, ++iRowStart) {
266 *i = *iRowStart + cacheRowStride;
267 }
268
269 hitSrc->nextRow();
270 }
271
272 }
273
274 inline void limitValue(qreal *value, qreal lowBound, qreal highBound) {
275 if (*value > highBound) {
276 *value = highBound;
277 } else if (!(*value >= lowBound)) { // value < lowBound or value == NaN
278 // IEEE compliant comparisons with NaN are always false
279 *value = lowBound;
280 }
281 }
282
283 inline qreal writeAlphaFromCache(quint8* dstPtr,
284 const quint32 channel,
285 const FFTInfo &info,
286 double* channelValuePtr,
287 bool *dstValueIsNull) {
288 qreal channelPixelValue;
289
290 channelPixelValue = *channelValuePtr * info.fftScale + info.absoluteOffset[channel];
291 limitValue(&channelPixelValue, info.minClamp[channel], info.maxClamp[channel]);
292 info.fromDoubleCheckNullFuncPtr[channel](dstPtr, info.convChannelList[channel]->pos(), channelPixelValue, dstValueIsNull);
293
294 return channelPixelValue;
295 }
296
297 template <bool additionalMultiplierActive>
298 inline qreal writeOneChannelFromCache(quint8* dstPtr,
299 const quint32 channel,
300 const FFTInfo &info,
301 double* channelValuePtr,
302 const qreal additionalMultiplier = 0.0) {
303 qreal channelPixelValue;
304
305 if (additionalMultiplierActive) {
306 channelPixelValue = *channelValuePtr * info.fftScale * additionalMultiplier + info.absoluteOffset[channel];
307 } else {
308 channelPixelValue = *channelValuePtr * info.fftScale + info.absoluteOffset[channel];
309 }
310
311 limitValue(&channelPixelValue, info.minClamp[channel], info.maxClamp[channel]);
312
313 info.fromDoubleFuncPtr[channel](dstPtr, info.convChannelList[channel]->pos(), channelPixelValue);
314
315 return channelPixelValue;
316 }
317
318 void writeResultToDevice(const QRect &rect,
319 const int cacheRowStride,
320 const int halfKernelWidth,
321 const int halfKernelHeight,
322 const FFTInfo &info,
323 const QRect &dataRect) {
324
325 typename _IteratorFactory_::HLineIterator hitDst =
326 _IteratorFactory_::createHLineIterator(this->m_painter->device(),
327 rect.x(), rect.y(), rect.width(),
328 dataRect);
329
330 int initialOffset = cacheRowStride * halfKernelHeight + halfKernelWidth;
331
332 const int channelCount = info.numChannels();
333 QVector<double*> channelPtr(channelCount);
334 const auto channelPtrBegin = channelPtr.begin();
335 const auto channelPtrEnd = channelPtr.end();
336
337 auto iFFt = m_channelFFT.constBegin();
338 for (auto i = channelPtrBegin; i != channelPtrEnd; ++i, ++iFFt) {
339 *i = (double*)*iFFt + initialOffset;
340 }
341
342 // prepare cache, reused in all loops
343 QVector<double*> cacheRowStart(channelCount);
344 const auto cacheRowStartBegin = cacheRowStart.begin();
345
346 for (int y = 0; y < rect.height(); ++y) {
347 // cache current channelPtr in cacheRowStart
348 memcpy(cacheRowStart.data(), channelPtr.data(), channelCount * sizeof(double*));
349
350 for (int x = 0; x < rect.width(); ++x) {
351 quint8 *dstPtr = hitDst->rawData();
352
353 if (info.alphaCachePos >= 0) {
354 bool alphaIsNullInDstSpace = false;
355
356 qreal alphaValue =
357 writeAlphaFromCache(dstPtr,
358 info.alphaCachePos,
359 info,
360 channelPtr.at(info.alphaCachePos),
361 &alphaIsNullInDstSpace);
362
363 if (!alphaIsNullInDstSpace &&
364 alphaValue > std::numeric_limits<qreal>::epsilon()) {
365
366 qreal alphaValueInv = 1.0 / alphaValue;
367
368 int k = 0;
369 for (auto i = channelPtrBegin; i != channelPtrEnd; ++i, ++k) {
370 if (k != info.alphaCachePos) {
371 writeOneChannelFromCache<true>(dstPtr,
372 k,
373 info,
374 *i,
375 alphaValueInv);
376 }
377 ++(*i);
378 }
379 } else {
380 int k = 0;
381 for (auto i = channelPtrBegin; i != channelPtrEnd; ++i, ++k) {
382 if (k != info.alphaCachePos) {
383 info.fromDoubleFuncPtr[k](dstPtr,
384 info.convChannelList[k]->pos(),
385 0.0);
386 }
387 ++(*i);
388 }
389 }
390 } else {
391 int k = 0;
392 for (auto i = channelPtrBegin; i != channelPtrEnd; ++i, ++k) {
393 writeOneChannelFromCache<false>(dstPtr,
394 k,
395 info,
396 *i);
397 ++(*i);
398 }
399 }
400
401 hitDst->nextPixel();
402 }
403
404 auto iRowStart = cacheRowStartBegin;
405 for (auto i = channelPtrBegin; i != channelPtrEnd; ++i, ++iRowStart) {
406 *i = *iRowStart + cacheRowStride;
407 }
408
409 hitDst->nextRow();
410 }
411
412 }
413
414private:
416 {
417 // find central item
418 QPoint offset((kernel->width() - 1) / 2, (kernel->height() - 1) / 2);
419
420 qint32 xShift = m_fftWidth - offset.x();
421 qint32 yShift = m_fftHeight - offset.y();
422
423 quint32 absXpos, absYpos;
424
425 for (quint32 y = 0; y < kernel->height(); y++)
426 {
427 absYpos = y + yShift;
428 if (absYpos >= m_fftHeight)
429 absYpos -= m_fftHeight;
430
431 for (quint32 x = 0; x < kernel->width(); x++)
432 {
433 absXpos = x + xShift;
434 if (absXpos >= m_fftWidth)
435 absXpos -= m_fftWidth;
436
437 ((double*)m_kernelFFT)[(m_fftWidth + m_extraMem) * absYpos + absXpos] = kernel->data()->coeff(y, x);
438 }
439 }
440 }
441
442 void fftMultiply(fftw_complex* channel, fftw_complex* kernel)
443 {
444 // perform complex multiplication
445 fftw_complex *channelPtr = channel;
446 fftw_complex *kernelPtr = kernel;
447
448 fftw_complex tmp;
449
450 for (quint32 pixelPos = 0; pixelPos < m_fftLength; ++pixelPos)
451 {
452 tmp[0] = ((*channelPtr)[0] * (*kernelPtr)[0]) - ((*channelPtr)[1] * (*kernelPtr)[1]);
453 tmp[1] = ((*channelPtr)[0] * (*kernelPtr)[1]) + ((*channelPtr)[1] * (*kernelPtr)[0]);
454
455 (*channelPtr)[0] = tmp[0];
456 (*channelPtr)[1] = tmp[1];
457
458 ++channelPtr;
459 ++kernelPtr;
460 }
461 }
462
463 void optimumDimensions(quint32& w, quint32& h)
464 {
465 // FFTW is most efficient when array size is a factor of 2, 3, 5 or 7
466 quint32 optW = w, optH = h;
467
468 while ((optW % 2 != 0) || (optW % 3 != 0) || (optW % 5 != 0) || (optW % 7 != 0))
469 ++optW;
470
471 while ((optH % 2 != 0) || (optH % 3 != 0) || (optH % 5 != 0) || (optH % 7 != 0))
472 ++optH;
473
474 quint32 optAreaW = optW * h;
475 quint32 optAreaH = optH * w;
476
477 if (optAreaW < optAreaH) {
478 w = optW;
479 }
480 else {
481 h = optH;
482 }
483 }
484
485 void fftLogMatrix(double* channel, const QString &f)
486 {
488 QString filename(QDir::homePath() + "/log_" + f + ".txt");
489 dbgKrita << "Log File Name: " << filename;
490 QFile file (filename);
491 if (!file.open(QIODevice::WriteOnly | QIODevice::Truncate | QIODevice::Text))
492 {
493 dbgKrita << "Failed";
495 return;
496 }
497
498 QTextStream in(&file);
500 for (quint32 y = 0; y < m_fftHeight; y++)
501 {
502 for (quint32 x = 0; x < m_fftWidth; x++)
503 {
504 QString num = QString::number(channel[y * m_fftWidth + x]);
505 while (num.length() < 15)
506 num += " ";
507
508 in << num << " ";
509 }
510 in << "\n";
511 }
513 }
514
515 void addToProgress(float amount)
516 {
517 m_currentProgress += amount;
518
519 if (this->m_progress) {
521 }
522 }
523
525 {
526 if (this->m_progress && this->m_progress->interrupted()) {
527 cleanUp();
528 return true;
529 }
530
531 return false;
532 }
533
534 void cleanUp()
535 {
536 // free kernel fft data
537 if (m_kernelFFT) {
538 fftw_free(m_kernelFFT);
539 }
540
541 Q_FOREACH (fftw_complex *channel, m_channelFFT) {
542 fftw_free(channel);
543 }
544 m_channelFFT.clear();
545 }
546private:
547 quint32 m_fftWidth {0};
548 quint32 m_fftHeight {0};
549 quint32 m_fftLength {0};
550 quint32 m_extraMem {0};
551 float m_currentProgress {0.0};
552
553 fftw_complex* m_kernelFFT {0};
555};
556
557#endif
float value(const T *src, size_t ch)
void fillCacheFromDevice(KisPaintDeviceSP src, const QRect &rect, const int cacheRowStride, const FFTInfo &info, const QRect &dataRect)
void optimumDimensions(quint32 &w, quint32 &h)
KisConvolutionWorkerFFT(KisPainter *painter, KoUpdater *progress)
void fftFillKernelMatrix(const KisConvolutionKernelSP kernel, fftw_complex *m_kernelFFT)
void writeResultToDevice(const QRect &rect, const int cacheRowStride, const int halfKernelWidth, const int halfKernelHeight, const FFTInfo &info, const QRect &dataRect)
void fftLogMatrix(double *channel, const QString &f)
void execute(const KisConvolutionKernelSP kernel, const KisPaintDeviceSP src, QPoint srcPos, QPoint dstPos, QSize areaSize, const QRect &dataRect) override
void limitValue(qreal *value, qreal lowBound, qreal highBound)
QVector< fftw_complex * > m_channelFFT
qreal writeAlphaFromCache(quint8 *dstPtr, const quint32 channel, const FFTInfo &info, double *channelValuePtr, bool *dstValueIsNull)
void fftMultiply(fftw_complex *channel, fftw_complex *kernel)
qreal writeOneChannelFromCache(quint8 *dstPtr, const quint32 channel, const FFTInfo &info, double *channelValuePtr, const qreal additionalMultiplier=0.0)
QList< KoChannelInfo * > convolvableChannelList(const KisPaintDeviceSP src)
bool getToDoubleChannelPtr(QList< KoChannelInfo * > cis, QVector< PtrToDouble > &f)
bool getFromDoubleChannelPtr(QList< KoChannelInfo * > cis, QVector< PtrFromDouble > &f)
double minChannelValue(KoChannelInfo *)
double maxChannelValue(KoChannelInfo *)
bool getFromDoubleCheckNullChannelPtr(QList< KoChannelInfo * > cis, QVector< PtrFromDoubleCheckNull > &f)
const KoColorSpace * colorSpace() const
KisSelectionSP selection
KisPaintDeviceSP device
@ ALPHA
The channel represents the opacity of a pixel.
bool interrupted() const
Definition KoUpdater.cpp:54
void setProgress(int percent)
Definition KoUpdater.cpp:38
#define KIS_ASSERT(cond)
Definition kis_assert.h:33
#define dbgKrita
Definition kis_debug.h:45
void setUtf8OnStream(QTextStream &stream)
Eigen::Matrix< qreal, Eigen::Dynamic, Eigen::Dynamic > data
FFTInfo(qreal _fftScale, const QList< KoChannelInfo * > &_convChannelList, const KisConvolutionKernelSP kernel, const KoColorSpace *)
QVector< PtrFromDoubleCheckNull > fromDoubleCheckNullFuncPtr
QRect selectedRect() const