43 0x0D0C080706020100ull, 0x808080808080800Eull};
47 0x0E0A090804030280ull};
49 0x8080808080808080ull};
51 0x8080808080808080ull};
53 const __m128i m0 = _mm_load_si128((
const __m128i*)mask0);
54 const __m128i m1 = _mm_load_si128((
const __m128i*)mask1);
55 const __m128i m2 = _mm_load_si128((
const __m128i*)mask2);
56 const __m128i m3 = _mm_load_si128((
const __m128i*)mask3);
58 const int sw =
w >> 4;
59 const int sh = h >> 1;
63 for (
int i = 0; i < sh; i++)
65 for (
int j = 0; j < sw; j++)
68 __m128i d0 = _mm_load_si128((
const __m128i*)
in);
70 __m128i d1 = _mm_load_si128((
const __m128i*)
in);
74 __m128i shuf0 = _mm_shuffle_epi8(d0, m0);
75 __m128i shuf1 = _mm_shuffle_epi8(d1, m1);
77 __m128i res0 = _mm_or_si128(shuf0, shuf1);
79 if ((odd_row & 0x1) != 0)
80 _mm_storeu_si128((__m128i*)out, res0);
82 _mm_store_si128((__m128i*)out, res0);
86 __m128i d2 = _mm_load_si128((
const __m128i*)
in);
92 _mm_shuffle_epi8(d2, m2), _mm_shuffle_epi8(d1, m3)));
102 template <
bool IS_RGB>
109 mask0, 80, 00, 80, 03, 80, 06, 80, 09, 80, 0C, 80, 0F, 80, 80, 80,
112 mask1, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 02, 80,
116 mask2, 80, 01, 80, 04, 80, 07, 80, 0A, 80, 0D, 80, 80, 80, 80, 80,
119 mask3, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 00, 80, 03, 80,
123 mask4, 80, 02, 80, 05, 80, 08, 80, 0B, 80, 0E, 80, 80, 80, 80, 80,
126 mask5, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 01, 80, 04, 80,
130 mask6, 80, 08, 80, 0B, 80, 0E, 80, 80, 80, 80, 80, 80, 80, 80, 80,
133 mask7, 80, 80, 80, 80, 80, 80, 80, 01, 80, 04, 80, 07, 80, 0A, 80,
137 mask8, 80, 09, 80, 0C, 80, 0F, 80, 80, 80, 80, 80, 80, 80, 80, 80,
140 mask9, 80, 80, 80, 80, 80, 80, 80, 02, 80, 05, 80, 08, 80, 0B, 80,
144 mask10, 80, 0A, 80, 0D, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
147 mask11, 80, 80, 80, 80, 80, 00, 80, 03, 80, 06, 80, 09, 80, 0C, 80,
151 mask_to_low, 01, 03, 05, 07, 09, 0B, 0D, 0F, 80, 80, 80, 80, 80, 80, 80,
156 val_red, 00, 1D, 00, 1D, 00, 1D, 00, 1D, 00, 1D, 00, 1D, 00, 1D, 00, 1D)
158 val_green, 00, 96, 00, 96, 00, 96, 00, 96, 00, 96, 00, 96, 00, 96, 00,
161 val_blue, 00, 4D, 00, 4D, 00, 4D, 00, 4D, 00, 4D, 00, 4D, 00, 4D, 00,
165 _mm_load_si128(IS_RGB ? (
const __m128i*)mask4 : (
const __m128i*)mask0);
167 _mm_load_si128(IS_RGB ? (
const __m128i*)mask5 : (
const __m128i*)mask1);
168 const __m128i m2 = _mm_load_si128((
const __m128i*)mask2);
169 const __m128i m3 = _mm_load_si128((
const __m128i*)mask3);
171 _mm_load_si128(IS_RGB ? (
const __m128i*)mask0 : (
const __m128i*)mask4);
173 _mm_load_si128(IS_RGB ? (
const __m128i*)mask1 : (
const __m128i*)mask5);
176 _mm_load_si128(IS_RGB ? (
const __m128i*)mask10 : (
const __m128i*)mask6);
178 _mm_load_si128(IS_RGB ? (
const __m128i*)mask11 : (
const __m128i*)mask7);
179 const __m128i m8 = _mm_load_si128((
const __m128i*)mask8);
180 const __m128i m9 = _mm_load_si128((
const __m128i*)mask9);
182 _mm_load_si128(IS_RGB ? (
const __m128i*)mask6 : (
const __m128i*)mask10);
184 _mm_load_si128(IS_RGB ? (
const __m128i*)mask7 : (
const __m128i*)mask11);
186 const __m128i mask_low = _mm_load_si128((
const __m128i*)mask_to_low);
188 const __m128i VAL_R = _mm_load_si128((
const __m128i*)val_red);
189 const __m128i VAL_G = _mm_load_si128((
const __m128i*)val_green);
190 const __m128i VAL_B = _mm_load_si128((
const __m128i*)val_blue);
192 const int sw =
w >> 4;
195 for (
int i = 0; i < sh; i++)
197 for (
int j = 0; j < sw; j++)
200 const __m128i d0 = _mm_load_si128((
const __m128i*)
in);
202 const __m128i d1 = _mm_load_si128((
const __m128i*)
in);
204 const __m128i d2 = _mm_load_si128((
const __m128i*)
in);
209 const __m128i BLUES_0_7 = _mm_or_si128(
210 _mm_shuffle_epi8(d0, m0), _mm_shuffle_epi8(d1, m1));
211 const __m128i GREENS_0_7 = _mm_or_si128(
212 _mm_shuffle_epi8(d0, m2), _mm_shuffle_epi8(d1, m3));
213 const __m128i REDS_0_7 = _mm_or_si128(
214 _mm_shuffle_epi8(d0, m4), _mm_shuffle_epi8(d1, m5));
223 const __m128i GRAYS_0_7 = _mm_adds_epu16(
224 _mm_mulhi_epu16(REDS_0_7, VAL_R),
226 _mm_mulhi_epu16(GREENS_0_7, VAL_G),
227 _mm_mulhi_epu16(BLUES_0_7, VAL_B)));
230 (__m128i*)out, _mm_shuffle_epi8(GRAYS_0_7, mask_low));
236 const __m128i BLUES_8_15 = _mm_or_si128(
237 _mm_shuffle_epi8(d1, m6), _mm_shuffle_epi8(d2, m7));
238 const __m128i GREENS_8_15 = _mm_or_si128(
239 _mm_shuffle_epi8(d1, m8), _mm_shuffle_epi8(d2, m9));
240 const __m128i REDS_8_15 = _mm_or_si128(
241 _mm_shuffle_epi8(d1, m10), _mm_shuffle_epi8(d2, m11));
243 const __m128i GRAYS_8_15 = _mm_adds_epu16(
244 _mm_mulhi_epu16(REDS_8_15, VAL_R),
246 _mm_mulhi_epu16(GREENS_8_15, VAL_G),
247 _mm_mulhi_epu16(BLUES_8_15, VAL_B)));
250 (__m128i*)out, _mm_shuffle_epi8(GRAYS_8_15, mask_low));
271 private_image_SSSE3_rgb_or_bgr_to_gray_8u<false>(
in, out,
w, h);
287 private_image_SSSE3_rgb_or_bgr_to_gray_8u<true>(
in, out,
w, h);
292 #endif // end of MRPT_HAS_SSE3
#define MRPT_MAX_ALIGN_BYTES
void image_SSSE3_bgr_to_gray_8u(const uint8_t *in, uint8_t *out, int w, int h)
Convert a RGB image (3cu8) into a GRAYSCALE (1c8u) image, using Y=77*R+150*G+29*B.
#define BUILD_128BIT_CONST( _name, B0, B1, B2, B3, B4, B5, B6, B7, B8, B9, B10, B11, B12, B13, B14, B15)
GLubyte GLubyte GLubyte GLubyte w
void image_SSSE3_scale_half_3c8u(const uint8_t *in, uint8_t *out, int w, int h)
Subsample each 2x2 pixel block into 1x1 pixel, taking the first pixel & ignoring the other 3...
void image_SSSE3_rgb_to_gray_8u(const uint8_t *in, uint8_t *out, int w, int h)
Convert a RGB image (3cu8) into a GRAYSCALE (1c8u) image, using Y=77*R+150*G+29*B.
void private_image_SSSE3_rgb_or_bgr_to_gray_8u(const uint8_t *in, uint8_t *out, int w, int h)