Krita Source Code Documentation
Loading...
Searching...
No Matches
xsimd_generic_details.hpp
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: 2022 L. E. Segovia <amy@amyspark.me>
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
7#ifndef KIS_XSIMD_GENERIC_DETAILS_DECL_HPP
8#define KIS_XSIMD_GENERIC_DETAILS_DECL_HPP
9
10#include <cstdint>
11#include <tuple>
12
13namespace xsimd
14{
15/***********************
16 * Truncate-initialize *
17 ***********************/
18
19template<typename V, typename T, typename A>
20batch<T, A> truncate_to_type(batch<T, A> const &self) noexcept;
21
22/**************************
23 * Masked initializations *
24 **************************/
25
26// Mask to 0 elements of a vector.
27template<typename T, typename A>
28inline auto set_zero(const batch<T, A> &src, const batch_bool<T, A> &mask) noexcept;
29
30// Mask to 1 elements of a vector.
31template<typename T, typename A>
32inline auto set_one(const batch<T, A> &src, const batch_bool<T, A> &mask) noexcept;
33
34/**********************************
35 * Sign-extending unaligned loads *
36 **********************************/
37
38// Load `T::size` values from the array of `T2` elements.
39template<typename T, typename T2>
40inline T load_and_extend(const T2 *src) noexcept;
41
42/*************************************************
43 * Type-inferred, auto-aligned memory allocation *
44 *************************************************/
45
46// Allocate size bytes of memory aligned to `batch<T, A>::alignment()`.
47template<typename T, typename A = xsimd::current_arch>
48inline T *aligned_malloc(size_t size) noexcept;
49
50// Allocate memory for `sz` T items, aligned to the selected architecture's
51// alignment.
52template<typename T, typename A = xsimd::current_arch>
53inline T *vector_aligned_malloc(size_t sz) noexcept;
54
55// Free allocated memory, hiding the `const_cast` if necessary.
56template<typename T>
57inline void vector_aligned_free(const T *ptr) noexcept;
58
59/****************
60 * Interleaving *
61 ****************/
62
63// Return the tuple of interleaved batches `a` and `b`.
64// First element is the low half, second is the upper half.
65template<typename V>
66inline std::pair<V, V> interleave(const V &a, const V &b) noexcept;
67
68template<typename T, typename A>
69inline xsimd::batch<T, A> pow2(xsimd::batch<T, A> const &self) noexcept;
70
71namespace kernel
72{
73namespace detail
74{
75/*****************************
76 * Helpers: unary applicator *
77 *****************************/
78
79template<class F, class A, class T>
80inline batch<T, A> apply_with_value(F &&func, batch<T, A> const &self) noexcept
81{
82 alignas(A::alignment()) std::array<T, batch<T, A>::size> self_buffer;
83 self.store_aligned(self_buffer.data());
84 for (std::size_t i = 0; i < batch<T, A>::size; ++i) {
85 self_buffer[i] = func(self_buffer[i]);
86 }
87 return batch<T, A>::load_aligned(self_buffer.data());
88}
89
90template<class F, class A, class T>
91inline batch<T, A> apply_with_index_and_value(F &&func, batch<T, A> const &self) noexcept
92{
93 alignas(A::alignment()) std::array<T, batch<T, A>::size> self_buffer;
94 self.store_aligned(self_buffer.data());
95 for (std::size_t i = 0; i < batch<T, A>::size; ++i) {
96 self_buffer[i] = func(i, self_buffer[i]);
97 }
98 return batch<T, A>::load_aligned(self_buffer.data());
99}
100} // namespace kernel
101} // namespace detail
102
103}; // namespace xsimd
104
105#endif // KIS_XSIMD_GENERIC_DETAILS_DECL_HPP
batch< T, A > apply_with_index_and_value(F &&func, batch< T, A > const &self) noexcept
batch< T, A > apply_with_value(F &&func, batch< T, A > const &self) noexcept
std::pair< V, V > interleave(const V &a, const V &b) noexcept
T * vector_aligned_malloc(size_t sz) noexcept
auto set_one(const batch< T, A > &src, const batch_bool< T, A > &mask) noexcept
batch< T, A > pow2(batch< T, A > const &self) noexcept
auto set_zero(const batch< T, A > &src, const batch_bool< T, A > &mask) noexcept
void vector_aligned_free(const T *ptr) noexcept
T * aligned_malloc(size_t size) noexcept
batch< T, A > truncate_to_type(xsimd::batch< T, A > const &self) noexcept
T load_and_extend(const T2 *src) noexcept