31 {
32#ifdef __SSE__
33
34
35 return _mm_cvtss_si32(_mm_set_ss(
value));
36#elif XSIMD_WITH_NEON64
37 return vgetq_lane_s32(vcvtnq_s32_f32(vrndiq_f32(vdupq_n_f32(
value))),
38 0);
39#elif XSIMD_WITH_NEON
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86 const auto nearbyint_as_int = [](
const float v) {
87 const auto a = vdupq_n_f32(
v);
88 const auto signmask = vdupq_n_u32(0x80000000);
89 const auto half =
90 vbslq_f32(signmask, a, vdupq_n_f32(0.5f));
91 const auto r_normal = vcvtq_s32_f32(
92 vaddq_f32(a, half));
93 const auto r_trunc =
94 vcvtq_s32_f32(a);
95 const auto plusone = vreinterpretq_s32_u32(
96 vshrq_n_u32(vreinterpretq_u32_s32(vnegq_s32(r_trunc)),
97 31));
98 const auto r_even =
99 vbicq_s32(vaddq_s32(r_trunc, plusone),
100 vdupq_n_s32(1));
101 const auto delta = vsubq_f32(
102 a,
103 vcvtq_f32_s32(r_trunc));
104 const auto is_delta_half =
105 vceqq_f32(delta, half);
106 return vbslq_s32(is_delta_half, r_even, r_normal);
107 };
108 return vgetq_lane_s32(nearbyint_as_int(
value), 0);
109#else
110 return std::lroundf(
value);
111#endif
112 }
float value(const T *src, size_t ch)