Composes src pixels into dst pixels. Is optimized for 32-bit-per-pixel colorspaces. Uses Compositor strategy parameter for doing actual math of the composition
326 {
327 const int vectorSize = static_cast<int>(float_v::size);
328 const qint32 vectorInc = pixelSize * vectorSize;
329 const qint32 linearInc = pixelSize;
330 qint32 srcVectorInc = vectorInc;
331 qint32 srcLinearInc = pixelSize;
332
336 typename Compositor::ParamsWrapper paramsWrapper(params);
337
339 if (pixelSize == 4) {
340 auto *buf =
reinterpret_cast<uint_v *
>(xsimd::vector_aligned_malloc<typename uint_v::value_type>(vectorSize));
341 *buf =
uint_v(*(
reinterpret_cast<const quint32 *
>(srcRowStart)));
342 srcRowStart = reinterpret_cast<quint8 *>(buf);
343 srcLinearInc = 0;
344 srcVectorInc = 0;
345 } else {
346 auto *buf = xsimd::vector_aligned_malloc<quint8>(vectorInc);
347 quint8 *ptr = buf;
348
349 for (size_t i = 0; i < vectorSize; i++) {
351 ptr += pixelSize;
352 }
353
354 srcRowStart = buf;
355 srcLinearInc = 0;
356 srcVectorInc = 0;
357 }
358 }
359#if BLOCKDEBUG
360 int totalBlockAlign = 0;
361 int totalBlockAlignedVector = 0;
362 int totalBlockUnalignedVector = 0;
363 int totalBlockRest = 0;
364#endif
365
366 for (qint32 r = params.
rows;
r > 0; --
r) {
367
368 const quint8 *mask = maskRowStart;
369
370 const quint8 *
src = srcRowStart;
371 quint8 *dst = dstRowStart;
372
373 const int pixelsAlignmentMask = vectorSize * sizeof(float) - 1;
374 auto srcPtrValue =
reinterpret_cast<uintptr_t
>(
src);
375 auto dstPtrValue = reinterpret_cast<uintptr_t>(dst);
376 uintptr_t srcAlignment = srcPtrValue & pixelsAlignmentMask;
377 uintptr_t dstAlignment = dstPtrValue & pixelsAlignmentMask;
378
379
380
381
382
383 int blockAlign = params.
cols;
384 int blockAlignedVector = 0;
385 int blockUnalignedVector = 0;
386 int blockRest = 0;
387
388 int *vectorBlock =
389 srcAlignment == dstAlignment || !srcVectorInc ? &blockAlignedVector : &blockUnalignedVector;
390
391 if (!dstAlignment) {
392 blockAlign = 0;
393 *vectorBlock = params.
cols / vectorSize;
394 blockRest = params.
cols % vectorSize;
395 }
else if (params.
cols > 2 * vectorSize) {
396 blockAlign = (vectorInc - dstAlignment) / pixelSize;
397 const int restCols = params.
cols - blockAlign;
398 if (restCols > 0) {
399 *vectorBlock = restCols / vectorSize;
400 blockRest = restCols % vectorSize;
401 } else {
402 blockAlign = params.
cols;
403 *vectorBlock = 0;
404 blockRest = 0;
405 }
406 }
407#if BLOCKDEBUG
408 totalBlockAlign += blockAlign;
409 totalBlockAlignedVector += blockAlignedVector;
410 totalBlockUnalignedVector += blockUnalignedVector;
411 totalBlockRest += blockRest;
412#endif
413
414 for (int i = 0; i < blockAlign; i++) {
415 Compositor::template compositeOnePixelScalar<useMask, _impl>(src,
416 dst,
417 mask,
419 paramsWrapper);
421 dst += linearInc;
422
423 if (useMask) {
424 mask++;
425 }
426 }
427
428 for (int i = 0; i < blockAlignedVector; i++) {
429 Compositor::template compositeVector<useMask, true, _impl>(src,
430 dst,
431 mask,
433 paramsWrapper);
435 dst += vectorInc;
436
437 if (useMask) {
438 mask += vectorSize;
439 }
440 }
441
442 for (int i = 0; i < blockUnalignedVector; i++) {
443 Compositor::template compositeVector<useMask, false, _impl>(src,
444 dst,
445 mask,
447 paramsWrapper);
449 dst += vectorInc;
450
451 if (useMask) {
452 mask += vectorSize;
453 }
454 }
455
456 for (int i = 0; i < blockRest; i++) {
457 Compositor::template compositeOnePixelScalar<useMask, _impl>(src,
458 dst,
459 mask,
461 paramsWrapper);
463 dst += linearInc;
464
465 if (useMask) {
466 mask++;
467 }
468 }
469
472
473 if (useMask) {
475 }
476 }
477
478#if BLOCKDEBUG
480 <<
"rows:" << params.
rows <<
"\tpad(S):" << totalBlockAlign <<
"\tbav(V):" << totalBlockAlignedVector
481 << "\tbuv(V):" << totalBlockUnalignedVector << "\tres(S)"
482 << totalBlockRest;
483#endif
484
487 }
488 }
void vector_aligned_free(const T *ptr) noexcept
const quint8 * srcRowStart
const quint8 * maskRowStart