AceInfinity
Emeritus, Contributor
Here's an example I wrote using compiler intrinsics in GCC for the SSE instruction set to operate on multiple data simultaneously.
Result:
Code:
[NO-PARSE]#include <cstdio>
#include <cassert>
#include <immintrin.h>
void insert(__m64 &m, int value, unsigned int index)
{
assert(index < 4);
switch (index)
{
case 0: m = _mm_insert_pi16(m, value, 0); break;
case 1: m = _mm_insert_pi16(m, value, 1); break;
case 2: m = _mm_insert_pi16(m, value, 2); break;
case 3: m = _mm_insert_pi16(m, value, 3); break;
}
}
int extract(const __m64 &m, unsigned int index)
{
assert(index < 4);
switch (index)
{
case 0: return _mm_extract_pi16(m, 0);
case 1: return _mm_extract_pi16(m, 1);
case 2: return _mm_extract_pi16(m, 2);
default: return _mm_extract_pi16(m, 3);
}
}
int main()
{
// 32 byte aligned memory block for 8 int elements
const int n = 8;
const int mem_block = 32;
int *arr = (int *)_mm_malloc(n * sizeof(int), mem_block);
// initialize array data
for (int i = 0; i < n; ++i) arr[i] = i;
__m64 a, b; // variables for SSE
// insert values at selector
for (unsigned int i = 0; i < 4; ++i)
{
int k = i * 2;
insert(a, arr[k], i);
insert(b, arr[k + 1] * (arr[k] + 1), i);
}
_mm_free(arr); // free allocated memory
// retrieve results for all averages
__m64 result = _mm_avg_pu16(a, b);
for (unsigned int i = 0; i < 4; ++i)
printf("__m64 average a[%u] : b[%u] = %d : %d = %d\n",
i, i, extract(a, i), extract(b, i),
extract(result, i)
);
}[/NO-PARSE]
Result:
Code:
[NO-PARSE]__m64 average a[0] : b[0] = 0 : 1 = 1
__m64 average a[1] : b[1] = 2 : 9 = 6
__m64 average a[2] : b[2] = 4 : 25 = 15
__m64 average a[3] : b[3] = 6 : 49 = 28[/NO-PARSE]