Vector Optimized Library of Kernels  2.3
Architecture-tuned implementations of math kernels
volk_32fc_deinterleave_real_64f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
71 #ifndef INCLUDED_volk_32fc_deinterleave_real_64f_a_H
72 #define INCLUDED_volk_32fc_deinterleave_real_64f_a_H
73 
74 #include <inttypes.h>
75 #include <stdio.h>
76 
77 #ifdef LV_HAVE_AVX2
78 #include <immintrin.h>
79 
80 static inline void volk_32fc_deinterleave_real_64f_a_avx2(double* iBuffer,
81  const lv_32fc_t* complexVector,
82  unsigned int num_points)
83 {
84  unsigned int number = 0;
85 
86  const float* complexVectorPtr = (float*)complexVector;
87  double* iBufferPtr = iBuffer;
88 
89  const unsigned int quarterPoints = num_points / 4;
90  __m256 cplxValue;
91  __m128 fVal;
92  __m256d dVal;
93  __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0);
94  for (; number < quarterPoints; number++) {
95 
96  cplxValue = _mm256_load_ps(complexVectorPtr);
97  complexVectorPtr += 8;
98 
99  // Arrange in i1i2i1i2 format
100  cplxValue = _mm256_permutevar8x32_ps(cplxValue, idx);
101  fVal = _mm256_extractf128_ps(cplxValue, 0);
102  dVal = _mm256_cvtps_pd(fVal);
103  _mm256_store_pd(iBufferPtr, dVal);
104 
105  iBufferPtr += 4;
106  }
107 
108  number = quarterPoints * 4;
109  for (; number < num_points; number++) {
110  *iBufferPtr++ = (double)*complexVectorPtr++;
111  complexVectorPtr++;
112  }
113 }
114 #endif /* LV_HAVE_AVX2 */
115 
116 #ifdef LV_HAVE_SSE2
117 #include <emmintrin.h>
118 
119 static inline void volk_32fc_deinterleave_real_64f_a_sse2(double* iBuffer,
120  const lv_32fc_t* complexVector,
121  unsigned int num_points)
122 {
123  unsigned int number = 0;
124 
125  const float* complexVectorPtr = (float*)complexVector;
126  double* iBufferPtr = iBuffer;
127 
128  const unsigned int halfPoints = num_points / 2;
129  __m128 cplxValue, fVal;
130  __m128d dVal;
131  for (; number < halfPoints; number++) {
132 
133  cplxValue = _mm_load_ps(complexVectorPtr);
134  complexVectorPtr += 4;
135 
136  // Arrange in i1i2i1i2 format
137  fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
138  dVal = _mm_cvtps_pd(fVal);
139  _mm_store_pd(iBufferPtr, dVal);
140 
141  iBufferPtr += 2;
142  }
143 
144  number = halfPoints * 2;
145  for (; number < num_points; number++) {
146  *iBufferPtr++ = (double)*complexVectorPtr++;
147  complexVectorPtr++;
148  }
149 }
150 #endif /* LV_HAVE_SSE */
151 
152 #ifdef LV_HAVE_GENERIC
153 
154 static inline void volk_32fc_deinterleave_real_64f_generic(double* iBuffer,
155  const lv_32fc_t* complexVector,
156  unsigned int num_points)
157 {
158  unsigned int number = 0;
159  const float* complexVectorPtr = (float*)complexVector;
160  double* iBufferPtr = iBuffer;
161  for (number = 0; number < num_points; number++) {
162  *iBufferPtr++ = (double)*complexVectorPtr++;
163  complexVectorPtr++;
164  }
165 }
166 #endif /* LV_HAVE_GENERIC */
167 
168 #ifdef LV_HAVE_NEONV8
169 #include <arm_neon.h>
170 
171 static inline void volk_32fc_deinterleave_real_64f_neon(double* iBuffer,
172  const lv_32fc_t* complexVector,
173  unsigned int num_points)
174 {
175  unsigned int number = 0;
176  unsigned int quarter_points = num_points / 4;
177  const float* complexVectorPtr = (float*)complexVector;
178  double* iBufferPtr = iBuffer;
179  float32x2x4_t complexInput;
180  float64x2_t iVal1;
181  float64x2_t iVal2;
182  float64x2x2_t iVal;
183 
184  for (number = 0; number < quarter_points; number++) {
185  // Load data into register
186  complexInput = vld4_f32(complexVectorPtr);
187 
188  // Perform single to double precision conversion
189  iVal1 = vcvt_f64_f32(complexInput.val[0]);
190  iVal2 = vcvt_f64_f32(complexInput.val[2]);
191  iVal.val[0] = iVal1;
192  iVal.val[1] = iVal2;
193 
194  // Store results into memory buffer
195  vst2q_f64(iBufferPtr, iVal);
196 
197  // Update pointers
198  iBufferPtr += 4;
199  complexVectorPtr += 8;
200  }
201 
202  for (number = quarter_points * 4; number < num_points; number++) {
203  *iBufferPtr++ = (double)*complexVectorPtr++;
204  complexVectorPtr++;
205  }
206 }
207 #endif /* LV_HAVE_NEON */
208 
209 #endif /* INCLUDED_volk_32fc_deinterleave_real_64f_a_H */
210 
211 #ifndef INCLUDED_volk_32fc_deinterleave_real_64f_u_H
212 #define INCLUDED_volk_32fc_deinterleave_real_64f_u_H
213 
214 #include <inttypes.h>
215 #include <stdio.h>
216 
217 #ifdef LV_HAVE_AVX2
218 #include <immintrin.h>
219 
220 static inline void volk_32fc_deinterleave_real_64f_u_avx2(double* iBuffer,
221  const lv_32fc_t* complexVector,
222  unsigned int num_points)
223 {
224  unsigned int number = 0;
225 
226  const float* complexVectorPtr = (float*)complexVector;
227  double* iBufferPtr = iBuffer;
228 
229  const unsigned int quarterPoints = num_points / 4;
230  __m256 cplxValue;
231  __m128 fVal;
232  __m256d dVal;
233  __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0);
234  for (; number < quarterPoints; number++) {
235 
236  cplxValue = _mm256_loadu_ps(complexVectorPtr);
237  complexVectorPtr += 8;
238 
239  // Arrange in i1i2i1i2 format
240  cplxValue = _mm256_permutevar8x32_ps(cplxValue, idx);
241  fVal = _mm256_extractf128_ps(cplxValue, 0);
242  dVal = _mm256_cvtps_pd(fVal);
243  _mm256_storeu_pd(iBufferPtr, dVal);
244 
245  iBufferPtr += 4;
246  }
247 
248  number = quarterPoints * 4;
249  for (; number < num_points; number++) {
250  *iBufferPtr++ = (double)*complexVectorPtr++;
251  complexVectorPtr++;
252  }
253 }
254 #endif /* LV_HAVE_AVX2 */
255 
256 #endif /* INCLUDED_volk_32fc_deinterleave_real_64f_u_H */
volk_32fc_deinterleave_real_64f_generic
static void volk_32fc_deinterleave_real_64f_generic(double *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_64f.h:154
volk_32fc_deinterleave_real_64f_a_sse2
static void volk_32fc_deinterleave_real_64f_a_sse2(double *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_64f.h:119
lv_32fc_t
float complex lv_32fc_t
Definition: volk_complex.h:70