Skip to content

Instantly share code, notes, and snippets.

@attilaz
Last active February 10, 2020 22:16
Show Gist options
  • Save attilaz/eb629b22346d2d2e3652b965887da215 to your computer and use it in GitHub Desktop.
Save attilaz/eb629b22346d2d2e3652b965887da215 to your computer and use it in GitHub Desktop.
simd function name changes
# for shuffle should we somehow show that it is handled as 32bitx4
# if we want shuffling with 16bit and 8 bit values...
void simd_shuf_xyAB(Ty _a, Ty _b); -> simd_shuf_v32x4_xyAB ???
void simd_shuf_ABxy(Ty _a, Ty _b);
void simd_shuf_CDzw(Ty _a, Ty _b);
void simd_shuf_zwCD(Ty _a, Ty _b);
void simd_shuf_xAyB(Ty _a, Ty _b);
void simd_shuf_AxBy(Ty _a, Ty _b);
void simd_shuf_zCwD(Ty _a, Ty _b);
void simd_shuf_CzDw(Ty _a, Ty _b);
float simd_x(Ty _a); -> simd_f32_x
float simd_y(Ty _a); -> simd_f32_y
float simd_z(Ty _a); -> simd_f32_z
float simd_w(Ty _a); -> simd_f32_w
void simd_ld(const void* _ptr); #no change
void simd_st(void* _ptr, Ty _a); #no change
void simd_stx(void* _ptr, Ty _a); #no change
void simd_stream(void* _ptr, Ty _a); #no change
Ty simd_ld(float _x, float _y, float _z, float _w); -> simd_f32_ld
Ty simd_ld(float _x, float _y, float _z, float _w, float _a, float _b, float _c, float _d); -> simd_f32_ld
Ty simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w); -> simd_i32_ld or simd_u32_ld ???
Ty simd_ild(uint32_t _x, uint32_t _y, uint32_t _z, uint32_t _w,
uint32_t _a, uint32_t _b, uint32_t _c, uint32_t _d); -> simd_i32_ld or simd_u32_ld ???
Ty simd_splat(const void* _ptr); #no change
Ty simd_splat(float _a); -> simd_f32_splat
Ty simd_isplat(uint32_t _a); -> simd_i32_splat or simd_u32_splat ???
Ty simd_zero(); #no change
Ty simd_itof(Ty _a); -> simd_i32_to_f32 ???
Ty simd_ftoi(Ty _a); -> simd_f32_to_i32 ??
Ty simd_round(Ty _a); -> simd_f32_round
Ty simd_add(Ty _a, Ty _b); -> simd_f32_add
Ty simd_sub(Ty _a, Ty _b); -> simd_f32_sub
Ty simd_mul(Ty _a, Ty _b); -> simd_f32_mul
Ty simd_div(Ty _a, Ty _b); -> simd_f32_div
Ty simd_rcp_est(Ty _a); -> simd_f32_rcp_est
Ty simd_sqrt(Ty _a); -> simd_f32_sqrt
Ty simd_rsqrt_est(Ty _a); -> simd_f32_rsqrt_est
Ty simd_dot3(Ty _a, Ty _b); -> simd_f32_dot3
Ty simd_dot(Ty _a, Ty _b); -> simd_f32_dot
Ty simd_cmpeq(Ty _a, Ty _b); -> simd_f32_cmpeq
Ty simd_cmplt(Ty _a, Ty _b); -> simd_f32_cmplt
Ty simd_cmple(Ty _a, Ty _b); -> simd_f32_cmple
Ty simd_cmpgt(Ty _a, Ty _b); -> simd_f32_cmpgt
Ty simd_cmpge(Ty _a, Ty _b); -> simd_f32_cmpge
Ty simd_min(Ty _a, Ty _b); -> simd_f32_min
Ty simd_max(Ty _a, Ty _b); -> simd_f32_max
Ty simd_and(Ty _a, Ty _b); # no change
Ty simd_andc(Ty _a, Ty _b); # no change
Ty simd_or(Ty _a, Ty _b); # no change
Ty simd_xor(Ty _a, Ty _b); # no change
Ty simd_sll(Ty _a, int _count); -> simd_i32_sll or simd_u32_sll ???
Ty simd_srl(Ty _a, int _count); -> simd_i32_srl or simd_u32_srl ???
Ty simd_sra(Ty _a, int _count); -> simd_i32_sra or simd_u32_sra ???
Ty simd_icmpeq(Ty _a, Ty _b); -> simd_i32_cmpeq
Ty simd_icmplt(Ty _a, Ty _b); -> simd_i32_cmplt
Ty simd_icmpgt(Ty _a, Ty _b); -> simd_i32_cmpgt
Ty simd_imin(Ty _a, Ty _b); -> simd_i32_min
Ty simd_imax(Ty _a, Ty _b); -> simd_i32_max
Ty simd_iadd(Ty _a, Ty _b); -> simd_i32_add
Ty simd_isub(Ty _a, Ty _b); -> simd_i32_sub
Ty simd_shuf_xAzC(Ty _a, Ty _b); -> ??? #no change or simd_shuf_v32x4_xAzC
Ty simd_shuf_yBwD(Ty _a, Ty _b);
Ty simd_rcp(Ty _a); -> simd_f32_rcp
Ty simd_orx(Ty _a); #no change ???
Ty simd_orc(Ty _a, Ty _b); #no change ???
Ty simd_neg(Ty _a); -> simd_f32_neg
Ty simd_madd(Ty _a, Ty _b, Ty _c); -> simd_f32_madd
Ty simd_nmsub(Ty _a, Ty _b, Ty _c); -> simd_f32_nmsub
Ty simd_div_nr(Ty _a, Ty _b); -> simd_f32_div_nr
Ty simd_selb(Ty _mask, Ty _a, Ty _b); #no change or simd_i32_selb
Ty simd_sels(Ty _test, Ty _a, Ty _b); #no change or simd_i32_sels
Ty simd_not(Ty _a); #no change
Ty simd_abs(Ty _a); -> simd_f32_abs
Ty simd_clamp(Ty _a, Ty _min, Ty _max); -> simd_f32_abs
Ty simd_lerp(Ty _a, Ty _b, Ty _s); -> simd_f32_lerp
Ty simd_rsqrt(Ty _a); -> simd_f32_rsqrt
Ty simd_rsqrt_nr(Ty _a); -> simd_f32_rsqrt_nr
Ty simd_rsqrt_carmack(Ty _a); -> simd_f32_rsqrt_carmack
Ty simd_sqrt_nr(Ty _a); -> simd_f32_sqrt_nr
Ty simd_log2(Ty _a); -> simd_f32_log2
Ty simd_exp2(Ty _a); -> simd_f32_exp2
Ty simd_pow(Ty _a, Ty _b); -> simd_f32_pow
Ty simd_cross3(Ty _a, Ty _b); -> simd_f32_cross3
Ty simd_normalize3(Ty _a); -> simd_f32_normalize3
Ty simd_ceil(Ty _a); -> simd_f32_ceil
Ty simd_floor(Ty _a); -> simd_f32_floor
bool simd_test_any_ni(Ty _a); # no change
bool simd_test_all_ni(Ty _a); # no change
extra functions that nudge has:
sse : _mm_movemask_ps (highest bits of 32 bit components to int 4bit bitmask)
bx: simd_i32_mask or simd_i32_test_mask ??? (assumes that input it result of compare )
bx: simd_i32_sign_mask ???
sse: _mm_castps_si128
// Create mask from the most significant bit of each 8-bit element in a, and store the result in dst.
bx: simd_i8_mask
sse: _mm_packs_epi32 (convert 2 4xint32 -> 8xint16 with signed saturation)
bx: simd_pack_i32_to_i16 ???
sse: _mm_packs_epi16 (convert 2 8xint16 -> 16xint8 with signed saturation)
bx: simd_pack_i16_to_i8 ??
sse: _mm_unpacklo_epi16
neon: vzip1q_s16
Unpack and interleave 16 bit integers from the low half of a and b, and store the results in dst.
bx: simd_?????
simd_i16_add
simd_i16_cmpeq
simd_i16_srl
I guess add/sub, cmpXXX, min/max should be added for i16, i8. u32/u16/u8 ?
And shifts for i16,i8.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment