131 const double* aVector,
132 const double* bVector,
133 unsigned int num_points)
135 unsigned int number = 0;
136 const unsigned int quarter_points = num_points / 4;
138 double* cPtr = cVector;
139 const double* aPtr = aVector;
140 const double* bPtr = bVector;
142 __m256d aVal, bVal, cVal;
143 for (; number < quarter_points; number++) {
145 aVal = _mm256_loadu_pd(aPtr);
146 bVal = _mm256_loadu_pd(bPtr);
148 cVal = _mm256_add_pd(aVal, bVal);
150 _mm256_storeu_pd(cPtr, cVal);
157 number = quarter_points * 4;
158 for (; number < num_points; number++) {
159 *cPtr++ = (*aPtr++) + (*bPtr++);
213 const double* aVector,
214 const double* bVector,
215 unsigned int num_points)
217 unsigned int number = 0;
218 const unsigned int quarter_points = num_points / 4;
220 double* cPtr = cVector;
221 const double* aPtr = aVector;
222 const double* bPtr = bVector;
224 __m256d aVal, bVal, cVal;
225 for (; number < quarter_points; number++) {
227 aVal = _mm256_load_pd(aPtr);
228 bVal = _mm256_load_pd(bPtr);
230 cVal = _mm256_add_pd(aVal, bVal);
232 _mm256_store_pd(cPtr, cVal);
239 number = quarter_points * 4;
240 for (; number < num_points; number++) {
241 *cPtr++ = (*aPtr++) + (*bPtr++);