Composes src pixels into dst pixels. Is optimized for 32-bit-per-pixel colorspaces. Uses Compositor strategy parameter for doing actual math of the composition
328 {
329 const int vectorSize = static_cast<int>(float_v::size);
330 const qint32 vectorInc = pixelSize * vectorSize;
331 const qint32 linearInc = pixelSize;
332 qint32 srcVectorInc = vectorInc;
333 qint32 srcLinearInc = pixelSize;
334
338 typename Compositor::ParamsWrapper paramsWrapper(params);
339
341 if (pixelSize == 4) {
342 auto *buf =
reinterpret_cast<uint_v *
>(xsimd::vector_aligned_malloc<typename uint_v::value_type>(vectorSize));
343 *buf =
uint_v(*(
reinterpret_cast<const quint32 *
>(srcRowStart)));
344 srcRowStart = reinterpret_cast<quint8 *>(buf);
345 srcLinearInc = 0;
346 srcVectorInc = 0;
347 } else {
348 auto *buf = xsimd::vector_aligned_malloc<quint8>(vectorInc);
349 quint8 *ptr = buf;
350
351 for (size_t i = 0; i < vectorSize; i++) {
353 ptr += pixelSize;
354 }
355
356 srcRowStart = buf;
357 srcLinearInc = 0;
358 srcVectorInc = 0;
359 }
360 }
361#if BLOCKDEBUG
362 int totalBlockAlign = 0;
363 int totalBlockAlignedVector = 0;
364 int totalBlockUnalignedVector = 0;
365 int totalBlockRest = 0;
366#endif
367
368 for (qint32 r = params.
rows;
r > 0; --
r) {
369
370 const quint8 *mask = maskRowStart;
371
372 const quint8 *
src = srcRowStart;
373 quint8 *dst = dstRowStart;
374
375 const int pixelsAlignmentMask = vectorSize * sizeof(float) - 1;
376 auto srcPtrValue =
reinterpret_cast<uintptr_t
>(
src);
377 auto dstPtrValue = reinterpret_cast<uintptr_t>(dst);
378 uintptr_t srcAlignment = srcPtrValue & pixelsAlignmentMask;
379 uintptr_t dstAlignment = dstPtrValue & pixelsAlignmentMask;
380
381
382
383
384
385 int blockAlign = params.
cols;
386 int blockAlignedVector = 0;
387 int blockUnalignedVector = 0;
388 int blockRest = 0;
389
390 int *vectorBlock =
391 srcAlignment == dstAlignment || !srcVectorInc ? &blockAlignedVector : &blockUnalignedVector;
392
393 if (!dstAlignment) {
394 blockAlign = 0;
395 *vectorBlock = params.
cols / vectorSize;
396 blockRest = params.
cols % vectorSize;
397 }
else if (params.
cols > 2 * vectorSize) {
398 blockAlign = (vectorInc - dstAlignment) / pixelSize;
399 const int restCols = params.
cols - blockAlign;
400 if (restCols > 0) {
401 *vectorBlock = restCols / vectorSize;
402 blockRest = restCols % vectorSize;
403 } else {
404 blockAlign = params.
cols;
405 *vectorBlock = 0;
406 blockRest = 0;
407 }
408 }
409#if BLOCKDEBUG
410 totalBlockAlign += blockAlign;
411 totalBlockAlignedVector += blockAlignedVector;
412 totalBlockUnalignedVector += blockUnalignedVector;
413 totalBlockRest += blockRest;
414#endif
415
416 for (int i = 0; i < blockAlign; i++) {
417 Compositor::template compositeOnePixelScalar<useMask, _impl>(src,
418 dst,
419 mask,
421 paramsWrapper);
423 dst += linearInc;
424
425 if (useMask) {
426 mask++;
427 }
428 }
429
430 for (int i = 0; i < blockAlignedVector; i++) {
431 Compositor::template compositeVector<useMask, true, _impl>(src,
432 dst,
433 mask,
435 paramsWrapper);
437 dst += vectorInc;
438
439 if (useMask) {
440 mask += vectorSize;
441 }
442 }
443
444 for (int i = 0; i < blockUnalignedVector; i++) {
445 Compositor::template compositeVector<useMask, false, _impl>(src,
446 dst,
447 mask,
449 paramsWrapper);
451 dst += vectorInc;
452
453 if (useMask) {
454 mask += vectorSize;
455 }
456 }
457
458 for (int i = 0; i < blockRest; i++) {
459 Compositor::template compositeOnePixelScalar<useMask, _impl>(src,
460 dst,
461 mask,
463 paramsWrapper);
465 dst += linearInc;
466
467 if (useMask) {
468 mask++;
469 }
470 }
471
474
475 if (useMask) {
477 }
478 }
479
480#if BLOCKDEBUG
482 <<
"rows:" << params.
rows <<
"\tpad(S):" << totalBlockAlign <<
"\tbav(V):" << totalBlockAlignedVector
483 << "\tbuv(V):" << totalBlockUnalignedVector << "\tres(S)"
484 << totalBlockRest;
485#endif
486
489 }
490 }
void vector_aligned_free(const T *ptr) noexcept
const quint8 * srcRowStart
const quint8 * maskRowStart