Skip to content

Instantly share code, notes, and snippets.

@xenobrain
Last active September 19, 2024 13:35
Show Gist options
  • Save xenobrain/50082c93444a3a3382ae086245976842 to your computer and use it in GitHub Desktop.
Save xenobrain/50082c93444a3a3382ae086245976842 to your computer and use it in GitHub Desktop.
simd math, no templates
#ifndef ENGINE_TYPES_H
#define ENGINE_TYPES_H
#include <immintrin.h>
auto inline sqrt(float const s) -> float { return _mm_cvtss_f32(_mm_sqrt_ss(_mm_set_ss(s))); }
auto inline rsqrt(float const s) -> float { return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(s))); }
struct vector2 { float x, y; };
struct vector3 { float x, y, z; };
struct vector4 { float x, y, z, w; };
struct matrix4 { float xx, xy, xz, xw, yx, yy, yz, yw, zx, zy, zz, zw, wx, wy, wz, ww;};
auto static inline operator+(vector2 const& a, vector2 const&b) -> vector2 { return {a.x + b.x, a.y + b.y}; }
auto static inline operator-(vector2 const& a, vector2 const&b) -> vector2 { return {a.x - b.x, a.y - b.y}; }
auto static inline operator*(vector2 const& a, vector2 const&b) -> vector2 { return {a.x * b.x, a.y * b.y}; }
auto static inline operator+(vector3 const& a, vector3 const&b) -> vector3 { return {a.x + b.x, a.y + b.y, a.z + b.z}; }
auto static inline operator-(vector3 const& a, vector3 const&b) -> vector3 { return {a.x - b.x, a.y - b.y, a.z - b.z}; }
auto static inline operator*(vector3 const& a, vector3 const&b) -> vector3 { return {a.x * b.x, a.y * b.y, a.z * b.z}; }
auto static inline operator+(vector4 const& a, vector4 const&b) -> vector4 { return {a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w}; }
auto static inline operator-(vector4 const& a, vector4 const&b) -> vector4 { return {a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w}; }
auto static inline operator*(vector4 const& a, vector4 const&b) -> vector4 { return {a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w}; }
auto static inline operator*(matrix4 const& a, vector3 const& b) -> vector3 { return {a.xx * b.x + a.yx * b.y + a.zx * b.z + a.wx, a.xy * b.x + a.yy * b.y + a.zy * b.z + a.wy, a.xz * b.x + a.yz * b.y + a.zz * b.z + a.wz}; };
auto static inline operator*(matrix4 const& a, vector4 const& b) -> vector4 { return {a.xx * b.x + a.yx * b.y + a.zx * b.z + b.w * a.wx, a.xy * b.x + a.yy * b.y + a.zy * b.z + b.w * a.wy, a.xz * b.x + a.yz * b.y + a.zz * b.z +b.w * a.wz, a.xw * b.x + a.yw * b.y + a.zw * b.z +b.w * a.ww}; }
auto static inline operator*(matrix4 const& a, matrix4 const& b) -> matrix4 {
matrix4 m{};
__m128 vx = _mm_loadu_ps(&b.xx); __m128 vy = _mm_loadu_ps(&b.yx); __m128 vz = _mm_loadu_ps(&b.zx); __m128 vw = _mm_loadu_ps(&b.wx); __m128 sx = _mm_set1_ps(a.xx); __m128 sy = _mm_set1_ps(a.xy); __m128 sz = _mm_set1_ps(a.xz); __m128 sw = _mm_set1_ps(a.xw);
sx = _mm_mul_ps(sx, vx); sy = _mm_mul_ps(sy, vy); sz = _mm_mul_ps(sz, vz); sw = _mm_mul_ps(sw, vw); sx = _mm_add_ps(sx, sz); sy = _mm_add_ps(sy, sw); sx = _mm_add_ps(sx, sy); _mm_storeu_ps(&m.xx, sx);
sx = _mm_set1_ps(a.yx); sy = _mm_set1_ps(a.yy); sz = _mm_set1_ps(a.yz); sw = _mm_set1_ps(a.yw);sx = _mm_mul_ps(sx, vx); sy = _mm_mul_ps(sy, vy); sz = _mm_mul_ps(sz, vz); sw = _mm_mul_ps(sw, vw);sx = _mm_add_ps(sx, sz); sy = _mm_add_ps(sy, sw); sx = _mm_add_ps(sx, sy); _mm_storeu_ps(&m.yx, sx);
sx = _mm_set1_ps(a.zx); sy = _mm_set1_ps(a.zy); sz = _mm_set1_ps(a.zz); sw = _mm_set1_ps(a.zw); sx = _mm_mul_ps(sx, vx); sy = _mm_mul_ps(sy, vy); sz = _mm_mul_ps(sz, vz); sw = _mm_mul_ps(sw, vw);sx = _mm_add_ps(sx, sz); sy = _mm_add_ps(sy, sw); sx = _mm_add_ps(sx, sy);_mm_storeu_ps(&m.zx, sx);
sx = _mm_set1_ps(a.wx); sy = _mm_set1_ps(a.wy); sz = _mm_set1_ps(a.wz); sw = _mm_set1_ps(a.ww); sx = _mm_mul_ps(sx, vx); sy = _mm_mul_ps(sy, vy); sz = _mm_mul_ps(sz, vz); sw = _mm_mul_ps(sw, vw);sx = _mm_add_ps(sx, sz); sy = _mm_add_ps(sy, sw); sx = _mm_add_ps(sx, sy);_mm_storeu_ps(&m.wx, sx);
return m;
}
auto static inline dot(vector2 const& a, vector2 const& b) -> float { return a.x * b.x + a.y * b.y; }
auto static inline dot(vector3 const& a, vector3 const& b) -> float { return a.x * b.x + a.y * b.y + a.z * b.z; }
auto static inline dot(vector4 const& a, vector4 const& b) -> float { return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; }
auto static inline cross(vector2 const& a, vector2 const& b) -> float { return a.x * b.y - a.y * b.x; }
auto static inline cross(vector3 const& a, vector3 const& b) -> vector3 { return {a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x}; }
auto static inline normalize(vector2 const& a) -> vector2 { auto l = rsqrt(dot(a, a)); return { a.x * l, a.y * l }; }
auto static inline normalize(vector3 const& a) -> vector3 { auto l = rsqrt(dot(a, a)); return { a.x * l, a.y * l, a.z * l }; }
auto static inline normalize(vector4 const& a) -> vector4 { auto l = rsqrt(dot(a, a)); return { a.x * l, a.y * l, a.z * l, a.w * l }; }
struct color { float r, g, b, a; };
struct rectangle { float x, y, w, h; };
struct transform { vector3 position, rotation, scale; };
#endif // ENGINE_TYPES_H
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment