12 #include <mrpt/config.h> 34 template <
bool MemIsAligned>
42 const __m128i m0 = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x0E, 0x0D, 0x0C, 0x08, 0x07, 0x06, 0x02, 0x01, 0x00);
43 const __m128i m1 = _mm_set_epi8(0x0E, 0x0A, 0x09, 0x08, 0x04, 0x03, 0x02, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
44 const __m128i m2 = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x0C, 0x0B, 0x0A, 0x06, 0x05, 0x04, 0x00, 0x80);
45 const __m128i m3 = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x0F);
48 SSE_RESTORE_SIGN_WARNINGS
50 const int sw =
w / 16;
52 const int rest_w =
w - (16 *
w);
54 for (
int i = 0; i < sh; i++)
56 const __m128i* inp =
reinterpret_cast<const __m128i*
>(
in);
59 for (
int j = 0; j < sw; j++)
62 __m128i d0 = mm_load_si128<MemIsAligned>(inp++);
63 __m128i d1 = mm_load_si128<MemIsAligned>(inp++);
66 __m128i shuf0 = _mm_shuffle_epi8(d0, m0);
67 __m128i shuf1 = _mm_shuffle_epi8(d1, m1);
69 __m128i res0 = _mm_or_si128(shuf0, shuf1);
72 reinterpret_cast<__m128i*>(outp), res0);
76 __m128i d2 = mm_load_si128<MemIsAligned>(inp++);
80 reinterpret_cast<__m128i*>(outp),
82 _mm_shuffle_epi8(d2, m2), _mm_shuffle_epi8(d1, m3)));
89 const uint8_t* in_rest =
in + 3 * 16 * sw;
90 for (
int p = 0;
p < rest_w / 2;
p++)
119 if (mrpt::system::is_aligned<16>(
in) && mrpt::system::is_aligned<16>(out) &&
120 is_multiple<16>(step_in) && is_multiple<16>(step_out))
122 impl_image_SSSE3_scale_half_3c8u<true>(
123 in, out,
w, h, step_in, step_out);
127 impl_image_SSSE3_scale_half_3c8u<false>(
128 in, out,
w, h, step_in, step_out);
134 template <
bool IS_RGB,
bool MemIsAligned>
144 const __m128i mask0 = _mm_setr_epi8(0x80, 0x00, 0x80, 0x03, 0x80, 0x06, 0x80, 0x09, 0x80, 0x0C, 0x80, 0x0F, 0x80, 0x80, 0x80, 0x80);
146 const __m128i mask1 = _mm_setr_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x02, 0x80, 0x05);
148 const __m128i mask2 = _mm_setr_epi8(0x80, 0x01, 0x80, 0x04, 0x80, 0x07, 0x80, 0x0A, 0x80, 0x0D, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
150 const __m128i mask3 = _mm_setr_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x80, 0x03, 0x80, 0x06);
152 const __m128i mask4 = _mm_setr_epi8(0x80, 0x02, 0x80, 0x05, 0x80, 0x08, 0x80, 0x0B, 0x80, 0x0E, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
154 const __m128i mask5 = _mm_setr_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x80, 0x04, 0x80, 0x07);
156 const __m128i mask6 = _mm_setr_epi8(0x80, 0x08, 0x80, 0x0B, 0x80, 0x0E, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
158 const __m128i mask7 = _mm_setr_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, 0x80, 0x04, 0x80, 0x07, 0x80, 0x0A, 0x80, 0x0D);
160 const __m128i mask8 = _mm_setr_epi8(0x80, 0x09, 0x80, 0x0C, 0x80, 0x0F, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
162 const __m128i mask9 = _mm_setr_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x02, 0x80, 0x05, 0x80, 0x08, 0x80, 0x0B, 0x80, 0x0E);
164 const __m128i mask10 = _mm_setr_epi8(0x80, 0x0A, 0x80, 0x0D, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
166 const __m128i mask11 = _mm_setr_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x80, 0x03, 0x80, 0x06, 0x80, 0x09, 0x80, 0x0C, 0x80, 0x0F);
168 const __m128i VAL_R = _mm_setr_epi8(0x00, 0x1D, 0x00, 0x1D, 0x00, 0x1D, 0x00, 0x1D, 0x00, 0x1D, 0x00, 0x1D, 0x00, 0x1D, 0x00, 0x1D);
169 const __m128i VAL_G = _mm_setr_epi8(0x00, 0x96, 0x00, 0x96, 0x00, 0x96, 0x00, 0x96, 0x00, 0x96, 0x00, 0x96, 0x00, 0x96, 0x00, 0x96);
170 const __m128i VAL_B = _mm_setr_epi8(0x00, 0x4D, 0x00, 0x4D, 0x00, 0x4D, 0x00, 0x4D, 0x00, 0x4D, 0x00, 0x4D, 0x00, 0x4D, 0x00, 0x4D);
172 const __m128i mask_low = _mm_setr_epi8(0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
175 SSE_RESTORE_SIGN_WARNINGS
177 const __m128i m0 = IS_RGB ? mask4 : mask0;
178 const __m128i m1 = IS_RGB ? mask5 : mask1;
179 const __m128i m2 = mask2;
180 const __m128i m3 = mask3;
181 const __m128i m4 = IS_RGB ? mask0 : mask4;
182 const __m128i m5 = IS_RGB ? mask1 : mask5;
183 const __m128i m6 = IS_RGB ? mask10 : mask6;
184 const __m128i m7 = IS_RGB ? mask11 : mask7;
185 const __m128i m8 = mask8;
186 const __m128i m9 = mask9;
187 const __m128i m10 = IS_RGB ? mask6 : mask10;
188 const __m128i m11 = IS_RGB ? mask7 : mask11;
190 const int sw =
w >> 4;
193 for (
int i = 0; i < sh; i++)
195 const __m128i* inp =
reinterpret_cast<const __m128i*
>(
in);
198 for (
int j = 0; j < sw; j++)
201 const __m128i d0 = mm_load_si128<MemIsAligned>(inp++);
202 const __m128i d1 = mm_load_si128<MemIsAligned>(inp++);
203 const __m128i d2 = mm_load_si128<MemIsAligned>(inp++);
207 const __m128i BLUES_0_7 = _mm_or_si128(
208 _mm_shuffle_epi8(d0, m0), _mm_shuffle_epi8(d1, m1));
209 const __m128i GREENS_0_7 = _mm_or_si128(
210 _mm_shuffle_epi8(d0, m2), _mm_shuffle_epi8(d1, m3));
211 const __m128i REDS_0_7 = _mm_or_si128(
212 _mm_shuffle_epi8(d0, m4), _mm_shuffle_epi8(d1, m5));
221 const __m128i GRAYS_0_7 = _mm_adds_epu16(
222 _mm_mulhi_epu16(REDS_0_7, VAL_R),
224 _mm_mulhi_epu16(GREENS_0_7, VAL_G),
225 _mm_mulhi_epu16(BLUES_0_7, VAL_B)));
228 reinterpret_cast<__m128i*>(outp),
229 _mm_shuffle_epi8(GRAYS_0_7, mask_low));
235 const __m128i BLUES_8_15 = _mm_or_si128(
236 _mm_shuffle_epi8(d1, m6), _mm_shuffle_epi8(d2, m7));
237 const __m128i GREENS_8_15 = _mm_or_si128(
238 _mm_shuffle_epi8(d1, m8), _mm_shuffle_epi8(d2, m9));
239 const __m128i REDS_8_15 = _mm_or_si128(
240 _mm_shuffle_epi8(d1, m10), _mm_shuffle_epi8(d2, m11));
242 const __m128i GRAYS_8_15 = _mm_adds_epu16(
243 _mm_mulhi_epu16(REDS_8_15, VAL_R),
245 _mm_mulhi_epu16(GREENS_8_15, VAL_G),
246 _mm_mulhi_epu16(BLUES_8_15, VAL_B)));
249 reinterpret_cast<__m128i*>(outp),
250 _mm_shuffle_epi8(GRAYS_8_15, mask_low));
275 ASSERTMSG_((step_in & 0x0f) == 0,
"step of input image must be 16*k");
276 ASSERTMSG_((step_out & 0x0f) == 0,
"step of output image must be 16*k");
278 if (mrpt::system::is_aligned<16>(
in) && mrpt::system::is_aligned<16>(out))
280 impl_image_SSSE3_rgb_or_bgr_to_gray_8u<false, true>(
281 in, out,
w, h, step_in, step_out);
285 impl_image_SSSE3_rgb_or_bgr_to_gray_8u<false, false>(
286 in, out,
w, h, step_in, step_out);
305 ASSERTMSG_((step_in & 0x0f) == 0,
"step of input image must be 16*k");
306 ASSERTMSG_((step_out & 0x0f) == 0,
"step of output image must be 16*k");
308 if (mrpt::system::is_aligned<16>(
in) && mrpt::system::is_aligned<16>(out))
310 impl_image_SSSE3_rgb_or_bgr_to_gray_8u<true, true>(
311 in, out,
w, h, step_in, step_out);
315 impl_image_SSSE3_rgb_or_bgr_to_gray_8u<true, false>(
316 in, out,
w, h, step_in, step_out);
322 #endif // end of MRPT_HAS_SSE3
void impl_image_SSSE3_rgb_or_bgr_to_gray_8u(const uint8_t *in, uint8_t *out, int w, int h, size_t step_in, size_t step_out)
void image_SSSE3_scale_half_3c8u(const uint8_t *in, uint8_t *out, int w, int h, size_t step_in, size_t step_out)
Subsample each 2x2 pixel block into 1x1 pixel, taking the first pixel & ignoring the other 3...
GLubyte GLubyte GLubyte GLubyte w
void impl_image_SSSE3_scale_half_3c8u(const uint8_t *in, uint8_t *out, int w, int h, size_t step_in, size_t step_out)
#define ASSERTMSG_(f, __ERROR_MSG)
Defines an assertion mechanism.
void image_SSSE3_bgr_to_gray_8u(const uint8_t *in, uint8_t *out, int w, int h, size_t step_in, size_t step_out)
Convert a RGB image (3cu8) into a GRAYSCALE (1c8u) image, using Y=77*R+150*G+29*B.
void image_SSSE3_rgb_to_gray_8u(const uint8_t *in, uint8_t *out, int w, int h, size_t step_in, size_t step_out)
Convert a RGB image (3cu8) into a GRAYSCALE (1c8u) image, using Y=77*R+150*G+29*B.