Last active
November 29, 2018 15:48
-
-
Save momchil-velikov/faed1b0c56e88447a26db8b4078f4d63 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <immintrin.h> | |
#define _mm_pshufd(r,i) __m128 (_mm_shuffle_epi32 (__m128i (r), i)) | |
/* Nehalem/Westmere/SandyBidge/IvyBridge insn timings. */ | |
__m128 | |
qmul (__m128 abcd, __m128 xyzw) | |
{ | |
__m128 t0 = _mm_pshufd (abcd, _MM_SHUFFLE (3, 3, 3, 3)); /* 1, 0.5 */ | |
__m128 t1 = _mm_pshufd (xyzw, _MM_SHUFFLE (2, 3, 0, 1)); /* 1, 0.5 */ | |
__m128 t3 = _mm_pshufd (abcd, _MM_SHUFFLE (0, 0, 0, 0)); /* 1, 0.5 */ | |
__m128 t4 = _mm_pshufd (xyzw, _MM_SHUFFLE (1, 0, 3, 2)); /* 1, 0.5 */ | |
__m128 t5 = _mm_pshufd (abcd, _MM_SHUFFLE (1, 1, 1, 1)); /* 1, 0.5 */ | |
__m128 t6 = _mm_pshufd (xyzw, _MM_SHUFFLE (2, 0, 3, 1)); /* 1, 0.5 */ | |
/* [d,d,d,d]*[z,w,x,y] = [dz,dw,dx,dy] */ | |
__m128 m0 = _mm_mul_ps (t0, t1); /* 5/4, 1 */ | |
/* [a,a,a,a]*[y,x,w,z] = [ay,ax,aw,az]*/ | |
__m128 m1 = _mm_mul_ps (t3, t4); /* 5/4, 1 */ | |
/* [b,b,b,b]*[z,x,w,y] = [bz,bx,bw,by]*/ | |
__m128 m2 = _mm_mul_ps (t5, t6); /* 5/4, 1 */ | |
/* [c,c,c,c]*[w,z,x,y] = [cw,cz,cx,cy] */ | |
__m128 t7 = _mm_pshufd (abcd, _MM_SHUFFLE (2, 2, 2, 2)); /* 1, 0.5 */ | |
__m128 t8 = _mm_pshufd (xyzw, _MM_SHUFFLE (3, 2, 0, 1)); /* 1, 0.5 */ | |
__m128 m3 = _mm_mul_ps (t7, t8); /* 5/4, 1 */ | |
/* 1 */ | |
/* [dz,dw,dx,dy]+-[ay,ax,aw,az] = [dz+ay,dw-ax,dx+aw,dy-az] */ | |
__m128 e = _mm_addsub_ps (m0, m1); /* 3, 1 */ | |
/* 2 */ | |
/* [dx+aw,dz+ay,dy-az,dw-ax] */ | |
e = _mm_pshufd (e, _MM_SHUFFLE (1, 3, 0, 2)); /* 1, 0.5 */ | |
/* [dx+aw,dz+ay,dy-az,dw-ax]+-[bz,bx,bw,by] = [dx+aw+bz,dz+ay-bx,dy-az+bw,dw-ax-by]*/ | |
e = _mm_addsub_ps (e, m2); /* 3, 1 */ | |
/* 2 */ | |
/* [dz+ay-bx,dw-ax-by,dy-az+bw,dx+aw+bz] */ | |
e = _mm_pshufd (e, _MM_SHUFFLE (2, 0, 1, 3)); /* 1, 0.5 */ | |
/* [dz+ay-bx,dw-ax-by,dy-az+bw,dx+aw+bz]+-[cw,cz,cx,cy] | |
= [dz+ay-bx+cw,dw-ax-by-cz,dy-az+bw+cx,dx+aw+bz-cy] */ | |
e = _mm_addsub_ps (e, m3); /* 3, 1 */ | |
/* 2 */ | |
/* [dw-ax-by-cz,dz+ay-bx+cw,dy-az+bw+cx,dx+aw+bz-cy] */ | |
e = _mm_pshufd (e, _MM_SHUFFLE (2, 3, 1, 0)); /* 1, 0.5 */ | |
return e; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment