35 {
36#ifdef __SSE__
37
38
39 return _mm_cvtss_si32(_mm_set_ss(
value));
40#elif XSIMD_WITH_NEON64
41 return vgetq_lane_s32(vcvtnq_s32_f32(vrndiq_f32(vdupq_n_f32(
value))),
42 0);
43#elif XSIMD_WITH_NEON
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90 const auto nearbyint_as_int = [](
const float v) {
91 const auto a = vdupq_n_f32(
v);
92 const auto signmask = vdupq_n_u32(0x80000000);
93 const auto half =
94 vbslq_f32(signmask, a, vdupq_n_f32(0.5f));
95 const auto r_normal = vcvtq_s32_f32(
96 vaddq_f32(a, half));
97 const auto r_trunc =
98 vcvtq_s32_f32(a);
99 const auto plusone = vreinterpretq_s32_u32(
100 vshrq_n_u32(vreinterpretq_u32_s32(vnegq_s32(r_trunc)),
101 31));
102 const auto r_even =
103 vbicq_s32(vaddq_s32(r_trunc, plusone),
104 vdupq_n_s32(1));
105 const auto delta = vsubq_f32(
106 a,
107 vcvtq_f32_s32(r_trunc));
108 const auto is_delta_half =
109 vceqq_f32(delta, half);
110 return vbslq_s32(is_delta_half, r_even, r_normal);
111 };
112 return vgetq_lane_s32(nearbyint_as_int(
value), 0);
113#else
114 return std::lroundf(
value);
115#endif
116 }
float value(const T *src, size_t ch)