MRPT  1.9.9
CImage_SSE2.cpp
Go to the documentation of this file.
1 /* +------------------------------------------------------------------------+
2  | Mobile Robot Programming Toolkit (MRPT) |
3  | https://www.mrpt.org/ |
4  | |
5  | Copyright (c) 2005-2019, Individual contributors, see AUTHORS file |
6  | See: https://www.mrpt.org/Authors - All rights reserved. |
7  | Released under BSD License. See: https://www.mrpt.org/License |
8  +------------------------------------------------------------------------+ */
9 
10 #include "img-precomp.h" // Precompiled headers
11 
12 #include <mrpt/config.h>
13 
14 #if MRPT_HAS_SSE2
15 // ---------------------------------------------------------------------------
16 // This file contains the SSE2 optimized functions for mrpt::img::CImage
17 // See the sources and the doxygen documentation page "sse_optimizations" for
18 // more details.
19 //
20 // Some functions here are derived from sources in libcvd, released
21 // under BSD. https://www.edwardrosten.com/cvd/
22 //
23 // ---------------------------------------------------------------------------
24 
25 #include <mrpt/core/SSE_macros.h>
26 #include <mrpt/core/SSE_types.h>
27 #include <mrpt/img/CImage.h>
28 #include <mrpt/system/memory.h>
29 #include "CImage_SSEx.h"
30 
31 /** \addtogroup sse_optimizations
32  * SSE optimized functions
33  * @{
34  */
35 
36 template <bool MemIsAligned>
38  const uint8_t* in, uint8_t* out, int w, int h, size_t step_in,
39  size_t step_out)
40 {
41  SSE_DISABLE_WARNINGS
42  // clang-format off
43 
44  const __m128i m = _mm_set_epi8(0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff);
45 
46  // clang-format on
47  SSE_RESTORE_SIGN_WARNINGS
48 
49  const int sw = w / 16;
50  const int sh = h / 2;
51  const int rest_w = w - (16 * w);
52 
53  for (int i = 0; i < sh; i++)
54  {
55  auto inp = reinterpret_cast<const __m128i*>(in);
56  uint8_t* outp = out;
57  for (int j = 0; j < sw; j++)
58  {
59  const __m128i x =
60  _mm_and_si128(mm_load_si128<MemIsAligned>(inp++), m);
61  auto o = reinterpret_cast<__m128i*>(outp);
62  _mm_storel_epi64(o, _mm_packus_epi16(x, x));
63  outp += 8;
64  }
65  // Extra pixels? (w mod 16 != 0)
66  if (rest_w != 0)
67  {
68  const uint8_t* in_rest = in + 16 * sw;
69  for (int p = 0; p < rest_w / 2; p++)
70  {
71  *outp++ = in_rest[0];
72  in_rest += 2;
73  }
74  }
75 
76  in += 2 * step_in; // Skip one row
77  out += step_out;
78  }
79 }
80 
81 template <bool MemIsAligned>
83  const uint8_t* in, uint8_t* out, int w, int h, size_t step_in,
84  size_t step_out)
85 {
86  SSE_DISABLE_WARNINGS
87  // clang-format off
88 
89  const __m128i m = _mm_set_epi8(0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff);
90 
91  // clang-format on
92  SSE_RESTORE_SIGN_WARNINGS
93 
94  const int sw = w / 16;
95  const int sh = h / 2;
96  const int rest_w = w - (16 * w);
97 
98  for (int i = 0; i < sh; i++)
99  {
100  auto inp = reinterpret_cast<const __m128i*>(in);
101  auto nextRow = reinterpret_cast<const __m128i*>(in + step_in);
102  uint8_t* outp = out;
103 
104  for (int j = 0; j < sw; j++)
105  {
106  __m128i here = mm_load_si128<MemIsAligned>(inp++);
107  __m128i next = mm_load_si128<MemIsAligned>(nextRow++);
108  here = _mm_avg_epu8(here, next);
109  next = _mm_and_si128(_mm_srli_si128(here, 1), m);
110  here = _mm_and_si128(here, m);
111  here = _mm_avg_epu16(here, next);
112  _mm_storel_epi64(
113  reinterpret_cast<__m128i*>(outp), _mm_packus_epi16(here, here));
114  outp += 8;
115  }
116 
117  // Extra pixels? (w mod 16 != 0)
118  if (rest_w != 0)
119  {
120  const uint8_t* ir = in + 16 * sw;
121  const uint8_t* irr = in + step_in;
122  for (int p = 0; p < rest_w / 2; p++)
123  {
124  *outp++ = (ir[0] + ir[1] + irr[0] + irr[1]) / 4;
125  ir += 2;
126  irr += 2;
127  }
128  }
129 
130  in += 2 * step_in; // Skip one row
131  out += step_out;
132  }
133 }
134 
135 /** Subsample each 2x2 pixel block into 1x1 pixel, taking the first pixel &
136  * ignoring the other 3
137  * - <b>Input format:</b> uint8_t, 1 channel
138  * - <b>Output format:</b> uint8_t, 1 channel
139  * - <b>Preconditions:</b> in & out aligned to 16bytes (faster) or not, step =
140  * k*16 (faster) or not
141  * - <b>Notes:</b>
142  * - <b>Requires:</b> SSE2
143  * - <b>Invoked from:</b> mrpt::img::CImage::scaleHalf()
144  */
146  const uint8_t* in, uint8_t* out, int w, int h, size_t step_in,
147  size_t step_out)
148 {
149  if (mrpt::system::is_aligned<16>(in) && mrpt::system::is_aligned<16>(out) &&
150  is_multiple<16>(step_in) && is_multiple<16>(step_out))
151  {
152  impl_image_SSE2_scale_half_1c8u<true>(in, out, w, h, step_in, step_out);
153  }
154  else
155  {
156  impl_image_SSE2_scale_half_1c8u<false>(
157  in, out, w, h, step_in, step_out);
158  }
159 }
160 
161 /** Average each 2x2 pixels into 1x1 pixel (arithmetic average)
162  * - <b>Input format:</b> uint8_t, 1 channel
163  * - <b>Output format:</b> uint8_t, 1 channel
164  * - <b>Preconditions:</b> in & out aligned to 16bytes (faster) or not, step =
165  * k*16 (faster) or not
166  * - <b>Notes:</b>
167  * - <b>Requires:</b> SSE2
168  * - <b>Invoked from:</b> mrpt::img::CImage::scaleHalfSmooth()
169  */
171  const uint8_t* in, uint8_t* out, int w, int h, size_t step_in,
172  size_t step_out)
173 {
174  if (mrpt::system::is_aligned<16>(in) && mrpt::system::is_aligned<16>(out) &&
175  is_multiple<16>(step_in) && is_multiple<16>(step_out))
176  {
177  impl_image_SSE2_scale_half_smooth_1c8u<true>(
178  in, out, w, h, step_in, step_out);
179  }
180  else
181  {
182  impl_image_SSE2_scale_half_smooth_1c8u<false>(
183  in, out, w, h, step_in, step_out);
184  }
185 }
186 
187 // TODO:
188 // Sum of absolute differences: Use _mm_sad_epu8
189 
190 /** @} */
191 
192 #endif // end if MRPT_HAS_SSE2
void image_SSE2_scale_half_smooth_1c8u(const uint8_t *in, uint8_t *out, int w, int h, size_t step_in, size_t step_out)
Average each 2x2 pixels into 1x1 pixel (arithmetic average)
void impl_image_SSE2_scale_half_smooth_1c8u(const uint8_t *in, uint8_t *out, int w, int h, size_t step_in, size_t step_out)
Definition: CImage_SSE2.cpp:82
void image_SSE2_scale_half_1c8u(const uint8_t *in, uint8_t *out, int w, int h, size_t step_in, size_t step_out)
Subsample each 2x2 pixel block into 1x1 pixel, taking the first pixel & ignoring the other 3...
GLubyte GLubyte GLubyte GLubyte w
Definition: glext.h:4199
unsigned char uint8_t
Definition: rptypes.h:44
GLuint in
Definition: glext.h:7391
GLenum GLint x
Definition: glext.h:3542
GLfloat GLfloat p
Definition: glext.h:6398
void impl_image_SSE2_scale_half_1c8u(const uint8_t *in, uint8_t *out, int w, int h, size_t step_in, size_t step_out)
Definition: CImage_SSE2.cpp:37



Page generated by Doxygen 1.8.14 for MRPT 1.9.9 Git: 8fe78517f Sun Jul 14 19:43:28 2019 +0200 at lun oct 28 02:10:00 CET 2019