Skip to content

Instantly share code, notes, and snippets.

@imaami
Last active September 23, 2024 21:32
Show Gist options
  • Save imaami/b74edbf7c212faa1f40241bda3c55f54 to your computer and use it in GitHub Desktop.
Save imaami/b74edbf7c212faa1f40241bda3c55f54 to your computer and use it in GitHub Desktop.
B(2, 4)
/**
* @file b24.c
*
* Compiling with MSVC 19.41 or later:
*
* cl.exe /TC /std:clatest /O2 /Oi /GL /GF /Zo- /favor:AMD64 /arch:AVX2 b24.c /Fe: b24.exe /MD
*/
#include <errno.h>
#include <inttypes.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "neuron.h"
#include "popcnt.h"
#include "vec.h"
#if defined(__INTELLISENSE__) || defined(_MSC_VER)
# define BITINT_C(n) n
# define _BitInt(n) int
#else
# define BITINT_C(n) n##WB
#endif
#ifdef _MSC_VER
# pragma intrinsic(_BitScanForward)
static const_inline int __builtin_ctzl(unsigned long x) {
unsigned long y;
return _BitScanForward(&y, x) ? (int)y : (int)sizeof(x) * CHAR_BIT;
}
#endif
pragma_msvc(warning(push))
pragma_msvc(warning(disable: 4710))
pragma_msvc(warning(disable: 4711))
static const_inline char
hexdig (unsigned _BitInt(4) n)
{
return (const char[16]){
'0','1','2','3','4','5','6','7',
'8','9','a','b','c','d','e','f',
}[n];
}
static inline char *
decstr_u5 (char *dst,
unsigned _BitInt(5) src,
bool aln)
{
if (src >= BITINT_C(10U)) {
*dst++ = (char)((unsigned char)'0' + src / BITINT_C(10U));
src %= BITINT_C(10U);
} else if (aln)
*dst++ = ' ';
*dst++ = (char)((unsigned char)'0' + src);
return dst;
}
#if 0
static force_inline char *
decstr_s5 (char *dst,
unsigned _BitInt(5) src,
bool pad)
{
const typeof(src) lt0 = src >> 4;
src = (src - lt0) ^ ((BITINT_C(31U) ^ BITINT_C(31U)) - lt0);
if (lt0) {
if (src >= BITINT_C(10U)) {
*dst++ = '-';
*dst++ = '1';
src -= BITINT_C(10U);
} else {
if (pad)
*dst++ = ' ';
*dst++ = '-';
}
} else {
if (src >= BITINT_C(10U)) {
if (pad)
*dst++ = ' ';
*dst++ = '1';
src -= BITINT_C(10U);
} else if (pad) {
*dst++ = ' ';
*dst++ = ' ';
}
}
*dst++ = (char)((unsigned char)'0' + src);
return dst;
}
#endif
static force_inline char *
decstr_u4 (char *dst,
unsigned _BitInt(4) src,
bool aln)
{
if (src >= BITINT_C(10U)) {
*dst++ = '1';
src -= BITINT_C(10U);
} else if (aln)
*dst++ = ' ';
*dst++ = (char)((unsigned char)'0' + src);
return dst;
}
static force_inline char *
decstr_s4 (char *dst,
unsigned _BitInt(4) src,
bool aln)
{
if (src >= BITINT_C(8U)) {
*dst++ = '-';
*dst++ = (char)((unsigned char)'8' +
(unsigned char)8 - src);
} else {
if (aln)
*dst++ = ' ';
*dst++ = (char)((unsigned char)'0' + src);
}
return dst;
}
static const uint16_t b24[] = {
0x9afU, 0x9ebU, 0xa6fU, 0xa7bU,
0xb3dU, 0xb4fU, 0xbcdU, 0xbd3U,
0xcbdU, 0xd2fU, 0xd79U, 0xde5U,
0xf2dU, 0xf4bU, 0xf59U, 0xf65U,
};
pragma_msvc(warning(push))
pragma_msvc(warning(disable: 4200))
pragma_msvc(warning(disable: 4820))
struct b24_cfg {
uint32_t have_opt;
uint16_t rotation;
uint16_t offset;
uint64_t neuron;
size_t n_seq;
uint16_t seq[];
};
pragma_msvc(warning(pop))
typedef typeof((struct b24_cfg){0}.have_opt) b24_cfg_flags;
typedef typeof((struct b24_cfg){0}.offset) b24_cfg_offset;
struct trace {
uint64_t cyc;
uint32_t seq;
uint16_t len;
uint16_t map;
};
const_function
static struct trace
b24_trace_init (struct b24_cfg const *const cfg,
unsigned i)
{
uint32_t seq = cfg->seq[i] | ((uint32_t)cfg->seq[i] << 16U);
return (struct trace){
.cyc = 0U,
.seq = cfg->rotation ? (seq >> (16 - cfg->rotation))
| (seq << cfg->rotation) : seq,
.len = 0U,
.map = 0U,
};
}
const_function
static struct trace
b24_trace_from (struct trace tc,
b24_cfg_offset off)
{
tc.cyc = off;
tc.len = 1U;
tc.map = 1U << off;
for (;;) {
off = (tc.seq >> off) & (b24_cfg_offset)15;
typeof(tc.map) bit = (typeof(bit))1 << off;
if (tc.map & bit)
break;
tc.map |= bit;
tc.cyc |= (typeof(tc.cyc))off
<< (tc.len++ << 2U);
}
return tc;
}
const_function
static struct trace
b24_trace_next (struct trace tc)
{
if (tc.map == (typeof(tc.map))0xffff)
return (struct trace){0};
b24_cfg_offset off = 0U;
uint_fast16_t map = tc.map;
while (map & (typeof(map))1) {
map >>= 1U;
++off;
}
return b24_trace_from(tc, off);
}
enum b24_opt {
/* The std_in option ('-') is 0 to guarantee all
* argument-accepting options stored in `expect`
* evaluate to true during argument parsing.
*/
b24_opt_std_in , // - (unused for now)
b24_opt_big_hex , // -b
b24_opt_cycles , // -c
b24_opt_debruijn, // -d
b24_opt_entries , // -e
b24_opt_graphviz, // -g
b24_opt_help , // -h
b24_opt_json , // -j
b24_opt_neuron , // -n
b24_opt_one_path, // -o
b24_opt_python , // -p
b24_opt_expand , // -q
b24_opt_rotation, // -r
b24_opt_sig_path, // -s
b24_opt_no_space, // -w
b24_opt_hex_path, // -x
b24_opt_no_align, // -y
b24_opt_no_zeros, // -z
// qualifier flags
ONLY_ONCE = 0x40U,
NEEDS_ARG = 0x80U,
};
#define b24_opt_char(x) \
_Generic(&(int[1U+b24_opt_##x]){0} \
, int(*)[1U+b24_opt_std_in ]: 0 \
, int(*)[1U+b24_opt_big_hex ]: 'b' \
, int(*)[1U+b24_opt_cycles ]: 'c' \
, int(*)[1U+b24_opt_debruijn]: 'd' \
, int(*)[1U+b24_opt_entries ]: 'e' \
, int(*)[1U+b24_opt_graphviz]: 'g' \
, int(*)[1U+b24_opt_help ]: 'h' \
, int(*)[1U+b24_opt_json ]: 'j' \
, int(*)[1U+b24_opt_neuron ]: 'n' \
, int(*)[1U+b24_opt_one_path]: 'o' \
, int(*)[1U+b24_opt_python ]: 'p' \
, int(*)[1U+b24_opt_expand ]: 'q' \
, int(*)[1U+b24_opt_rotation]: 'r' \
, int(*)[1U+b24_opt_sig_path]: 's' \
, int(*)[1U+b24_opt_no_space]: 'w' \
, int(*)[1U+b24_opt_hex_path]: 'x' \
, int(*)[1U+b24_opt_no_align]: 'y' \
, int(*)[1U+b24_opt_no_zeros]: 'z' )
static const char b24_opt_to_char[] = {
#define b24_option(x) [b24_opt_##x] = b24_opt_char(x)
b24_option(std_in ),
b24_option(big_hex ),
b24_option(cycles ),
b24_option(debruijn),
b24_option(entries ),
b24_option(graphviz),
b24_option(help ),
b24_option(json ),
b24_option(neuron ),
b24_option(one_path),
b24_option(python ),
b24_option(expand ),
b24_option(rotation),
b24_option(sig_path),
b24_option(no_space),
b24_option(hex_path),
b24_option(no_align),
b24_option(no_zeros),
#undef b24_option
};
static const unsigned char
b24_char_to_opt[1U << CHAR_BIT] = {
#define b24_option(x) [b24_opt_char(x)] = b24_opt_##x
b24_option(std_in )| ONLY_ONCE,
b24_option(big_hex )| ONLY_ONCE,
b24_option(cycles )| ONLY_ONCE,
b24_option(debruijn)| NEEDS_ARG,
b24_option(entries )| ONLY_ONCE,
b24_option(graphviz)| ONLY_ONCE,
b24_option(help )| ONLY_ONCE,
b24_option(json )| ONLY_ONCE,
b24_option(neuron )| ONLY_ONCE
| NEEDS_ARG,
b24_option(one_path)| ONLY_ONCE
| NEEDS_ARG,
b24_option(python )| ONLY_ONCE,
b24_option(expand )| ONLY_ONCE,
b24_option(rotation)| ONLY_ONCE
| NEEDS_ARG,
b24_option(sig_path)| ONLY_ONCE,
b24_option(no_space)| ONLY_ONCE,
b24_option(hex_path)| ONLY_ONCE,
b24_option(no_align)| ONLY_ONCE,
b24_option(no_zeros)| ONLY_ONCE,
#undef b24_option
};
/**
* @brief Check if option `opt` is present.
*/
static pure_inline bool
b24_has_option (struct b24_cfg const *const cfg,
const enum b24_opt opt)
{
return cfg->have_opt & ((b24_cfg_flags)1 << opt);
}
/**
* @brief Mark option `opt` as present.
*/
static force_inline void
b24_option_add (struct b24_cfg *const cfg,
const enum b24_opt opt)
{
cfg->have_opt |= (b24_cfg_flags)1 << opt;
}
struct b24_syntax {
char bra;
char ket;
char sep;
char spc;
};
static force_inline char *
hexstr_u4 (char *dst,
unsigned _BitInt(4) src)
{
*dst++ = '0';
*dst++ = 'x';
*dst++ = hexdig(src);
return dst;
}
static inline char *
hexstr_u16 (char *dst,
uint16_t src,
bool pad)
{
*dst++ = '0';
*dst++ = 'x';
if (pad) {
*dst++ = hexdig(src >> 12U );
*dst++ = hexdig(src >> 8U & 15U);
*dst++ = hexdig(src >> 4U & 15U);
} else if (src > UINT16_C(0x000f)) {
if (src > UINT16_C(0x00ff)) {
if (src > UINT16_C(0x0fff))
*dst++ = hexdig(src >> 12U);
*dst++ = hexdig(src >> 8U & 15U);
}
*dst++ = hexdig(src >> 4U & 15U);
}
*dst++ = hexdig(src & 15U);
return dst;
}
static force_inline char *
hexstr_16x4 (char *dst,
uint64_t src,
unsigned len,
struct b24_syntax syn)
{
if (len) {
for (;; src >>= 4) {
dst = hexstr_u4(dst, src & 15U);
if (!--len)
break;
*dst++ = syn.sep;
*dst = syn.spc;
dst+= !!syn.spc;
}
}
return dst;
}
static char *
hexstr_u64 (char *dst,
uint64_t src,
bool pad)
{
char buf[16] = "000000000000000";
char *p = &buf[15];
for (;; --p) {
*p = hexdig(src & 15U);
src >>= 4U;
if (!src)
break;
}
if (pad)
p = &buf[0];
*dst++ = '0';
*dst++ = 'x';
for (;; ++p) {
*dst++ = *p;
if (p == &buf[15])
break;
}
return dst;
}
static force_inline char *
decstr_16x4 (char *dst,
uint64_t src,
unsigned len,
struct b24_syntax syn,
bool aln,
bool sig)
{
if (len) {
bool w = syn.spc;
if (sig) {
for (;; src >>= 4) {
dst = decstr_s4(dst, src & 15U, aln);
if (!--len)
break;
*dst++ = syn.sep;
*dst = syn.spc;
dst += w;
}
} else {
for (;; src >>= 4) {
dst = decstr_u4(dst, src & 15U, aln);
if (!--len)
break;
*dst++ = syn.sep;
*dst = syn.spc;
dst += w;
}
}
}
return dst;
}
static char *
b24_cfg_trace_print (struct b24_cfg const *cfg,
char *dst,
struct trace src,
unsigned idx,
struct b24_syntax syn)
{
bool align = !b24_has_option(cfg, b24_opt_no_align);
bool zeros = !b24_has_option(cfg, b24_opt_no_zeros);
bool expand = b24_has_option(cfg, b24_opt_expand);
bool all = !b24_has_option(cfg, b24_opt_one_path) &&
!expand;
if (all) {
*dst++ = syn.bra;
dst = hexstr_u16(dst, (uint16_t)src.seq, zeros);
*dst++ = syn.sep;
*dst = syn.spc;
dst+= !!syn.spc;
dst = decstr_u5(dst, idx, align);
*dst++ = syn.sep;
*dst = syn.spc;
dst+= !!syn.spc;
dst = hexstr_u16(dst, src.map, zeros);
*dst++ = syn.sep;
*dst = syn.spc;
dst+= !!syn.spc;
dst = decstr_u5(dst, src.len, align);
*dst++ = syn.sep;
*dst = syn.spc;
dst+= !!syn.spc;
}
if (b24_has_option(cfg, b24_opt_big_hex)) {
dst = hexstr_u64(dst, src.cyc, zeros);
} else {
*dst++ = syn.bra;
dst = b24_has_option(cfg, b24_opt_hex_path)
? hexstr_16x4(dst, src.cyc, src.len, syn)
: decstr_16x4(dst, src.cyc, src.len, syn, align,
b24_has_option(cfg, b24_opt_sig_path));
*dst++ = syn.ket;
}
if (all)
*dst++ = syn.ket;
*dst = '\0';
return dst;
}
static struct trace
b24_expand (struct trace tc)
{
struct neuron nrn = neuron(tc.seq, (vec128){
.u8 = {
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0xa7,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
}
});
putchar('\n');
for (;;) {
#ifndef __aarch64__
__m128i simd = neuron_tick(&nrn, _mm_set_epi64x(0, 0));
vec128 x = (vec128){
.u64 = {
[0] = _mm_extract_epi64(simd, 0),
[1] = _mm_extract_epi64(simd, 1),
}
};
#else
uint64x2_t simd = vreinterpretq_u64_u8(
neuron_tick(&nrn, vdupq_n_u8(0))
);
vec128 x;
vst1q_lane_u64(&x.u64[0], simd, 0);
vst1q_lane_u64(&x.u64[1], simd, 1);
#endif // __aarch64__
vec128 y = (vec128){
.u4x16[0] = {
x.u4x2[ 0].x, x.u4x2[ 1].x,
x.u4x2[ 2].x, x.u4x2[ 3].x,
x.u4x2[ 4].x, x.u4x2[ 5].x,
x.u4x2[ 6].x, x.u4x2[ 7].x,
x.u4x2[ 8].x, x.u4x2[ 9].x,
x.u4x2[10].x, x.u4x2[11].x,
x.u4x2[12].x, x.u4x2[13].x,
x.u4x2[14].x, x.u4x2[15].x,
},
.u4x16[1] = {
x.u4x2[ 0].y, x.u4x2[ 1].y,
x.u4x2[ 2].y, x.u4x2[ 3].y,
x.u4x2[ 4].y, x.u4x2[ 5].y,
x.u4x2[ 6].y, x.u4x2[ 7].y,
x.u4x2[ 8].y, x.u4x2[ 9].y,
x.u4x2[10].y, x.u4x2[11].y,
x.u4x2[12].y, x.u4x2[13].y,
x.u4x2[14].y, x.u4x2[15].y,
}
};
printf("%016" PRIx64 " %016" PRIx64 "\n", y.u64[1], y.u64[0]);
if (!y.u64[0])
break;
}
tc.cyc = seq_expand((uint16_t)tc.seq);
tc.len = 16U;
tc.map = 0xffffU;
return tc;
}
static void
b24_cfg_trace (struct b24_cfg const *cfg)
{
struct b24_syntax syn = (struct b24_syntax[]){
{'{', '}', ',', ' '},
{'[', ']', ',', ' '}, // json or python
{'{', '}', ',', 0x0}, // no space
{'[', ']', ',', 0x0}, // js/py, no space
}[b24_has_option(cfg, b24_opt_json) |
b24_has_option(cfg, b24_opt_python) |
(b24_has_option(cfg, b24_opt_no_space) << 1U)];
bool expand = b24_has_option(cfg, b24_opt_expand);
bool once = b24_has_option(cfg, b24_opt_one_path);
bool all = !once && !expand;
bool please = false;
for (unsigned i = 0; i < cfg->n_seq;) {
struct trace tc = b24_trace_init(cfg, i);
unsigned cycles = 0;
for (;; ++cycles) {
struct trace t;
if (once) {
t = b24_trace_from(tc, cfg->offset);
} else if (expand) {
t = b24_expand(tc);
} else {
t = b24_trace_next(tc);
if (!t.map)
break;
tc.cyc |= t.cyc << (tc.len << 2U);
tc.len += t.len;
tc.map |= t.map;
}
char buf[160];
char *p = b24_cfg_trace_print(cfg, buf, t,
cycles, syn);
if (p) {
if (please) {
(void)putchar(syn.sep);
(void)putchar('\n');
}
(void)fputs(buf, stdout);
please = true;
}
if (!all)
break;
}
if (++i == cfg->n_seq && please)
(void)putchar('\n');
}
}
static void
b24_help (void)
{
(void)printf(
"Usage: b24 [OPTIONS]... [<0..65535>]...\n"
"Options:\n"
" -%c Print this help text and exit\n"
" -%c <state> Trace a neuron's state evolution\n"
" -%c <0..15> Trace a single path at offset\n"
" -%c Expand subsequences in place\n"
"\n"
"Input options:\n"
" -%c <0..15> Add distinct B(2, 4) as input\n"
" -%c <0..15> Apply rotation before tracing\n"
"\n"
"Output options:\n"
" -%c Only cycles, no entry paths\n"
" -%c Always find all entry paths\n"
"\n"
"Formatting options:\n"
" -%c Output in Graphviz format\n"
" -%c Output in JSON format\n"
" -%c Output in Python format\n"
" -%c Signed decimal path steps\n"
" -%c Hexadecimal path steps\n"
" -%c Print every sequence as one big hex number\n"
" -%c Don't align output columns\n"
" -%c Don't zero-pad hexadecimals\n"
" -%c Omit non-separator whitespace\n",
b24_opt_char(help), b24_opt_char(neuron),
b24_opt_char(one_path), b24_opt_char(expand),
b24_opt_char(debruijn), b24_opt_char(rotation),
b24_opt_char(cycles), b24_opt_char(entries),
b24_opt_char(graphviz), b24_opt_char(json),
b24_opt_char(python), b24_opt_char(sig_path),
b24_opt_char(hex_path), b24_opt_char(big_hex),
b24_opt_char(no_align), b24_opt_char(no_zeros),
b24_opt_char(no_space)
);
}
struct str7 {
union {
char d[8U];
uint64_t u;
};
};
const_inline
static struct str7
b24_opt_char_str (char ch)
{
struct str7 str = {{{'"', '-', ch}}};
str.d[3U - !ch] = '"';
return str;
}
static void
b24_cfg_destroy (struct b24_cfg **cfg)
{
if (cfg && *cfg) {
free(*cfg);
*cfg = NULL;
}
}
_Noreturn static void
b24_helpful_exit (struct b24_cfg **cfg,
int ret)
{
b24_cfg_destroy(cfg);
b24_help();
exit(ret);
}
static struct b24_cfg *
b24_cfg_create (size_t n_seq)
{
struct b24_cfg *cfg = calloc(1, offsetof(struct b24_cfg, seq)
+ n_seq * sizeof cfg->seq[0]);
if (!cfg)
perror("calloc");
return cfg;
}
_Noreturn static void
b24_cosmic_ray (struct b24_cfg **cfg,
char const *what,
char const *func,
int line)
{
size_t n = what ? strlen(what) : 0;
if (!n)
what = "";
(void)fprintf(stderr, "[\033[31mERROR\033[m] %s:%d: %s%s\n",
func, line, what, &".\nFailure caused by the impact "
"of a high-energy cosmic particle or a high-density "
"program author."[n ? what[n-1U] == '.' : 2U]);
b24_cfg_destroy(cfg);
exit(EXIT_FAILURE);
}
static int
b24_options_incompatible (struct b24_cfg *cfg,
enum b24_opt opt,
b24_cfg_flags bad,
const char sep)
{
if (!b24_has_option(cfg, opt))
return 0;
bad &= cfg->have_opt;
if (!bad)
return 0;
cfg->have_opt &= ~((typeof(cfg->have_opt))1 << opt);
int n = popcnt(bad);
if (n >= (int)array_size(b24_opt_to_char)) {
b24_cosmic_ray(&cfg, "Option flag outside expected range",
__func__, __LINE__);
}
char buf[64] = "";
char *dst = &buf[0];
b24_cfg_flags bit = 1U;
for (int prev = 0, i = 0; i < n;) {
int o = __builtin_ctzl(bad);
if (o >= (int)array_size(b24_opt_to_char))
break;
bit <<= o - prev;
bad &= ~bit;
prev = o;
if (++i > 1) {
if (n > 2)
*dst++ = sep;
*dst++ = ' ';
if (i == n) {
*dst++ = 'o';
*dst++ = 'r';
*dst++ = ' ';
}
}
struct str7 s = b24_opt_char_str(b24_opt_to_char[o]);
for (char *src = s.d; *src; ++src) { *dst++ = *src; }
}
*dst = '\0';
(void)fprintf(stderr, "Option %s cannot be combined with %s\n",
b24_opt_char_str(b24_opt_to_char[opt]).d, buf);
return 1;
}
static void
b24_option_assert_once (struct b24_cfg *cfg,
enum b24_opt opt)
{
if (b24_has_option(cfg, opt)) {
(void)fprintf(stderr, "Option %s specified twice\n",
b24_opt_char_str(b24_opt_to_char[opt]).d);
b24_helpful_exit(&cfg, EXIT_FAILURE);
}
}
static inline uint64_t
b24_get_u64_arg (char *arg,
int *err)
{
if (!arg) {
*err = EFAULT;
return 0;
}
if (!*arg) {
*err = EINVAL;
return 0;
}
errno = 0;
uint64_t u64 = 0U;
char *endptr = arg;
int64_t i64 = _Generic(i64, long: strtol,
long long: strtoll)(arg, &endptr, 0);
int e = errno;
if (!e) {
u64 = (uint64_t)i64;
if (*endptr)
e = EINVAL;
} else if (e == ERANGE && i64 == _Generic(i64, long: LONG_MAX,
long long: LLONG_MAX)) {
errno = 0;
endptr = arg;
u64 = _Generic(u64, unsigned long: strtoul,
unsigned long long: strtoull)(arg, &endptr, 0);
e = errno;
if (!e && *endptr)
e = EINVAL;
}
*err = e;
return u64;
}
static uint64_t
b24_get_int_arg (char *arg,
int *err,
uint64_t max)
{
int e = 0;
uint64_t u64 = b24_get_u64_arg(arg, &e);
if (u64 > max) {
u64 = max;
if (!e)
e = ERANGE;
}
*err = e;
return u64;
}
static struct b24_cfg *
b24_parse_args (int argc,
char **argv)
{
struct b24_cfg *cfg = b24_cfg_create(argc > 1 ? argc - 1 : 0);
if (!cfg)
return NULL;
enum b24_opt expect = 0;
char c_ = '\0';
for (int i = 1; i < argc; i++) {
char *a = argv[i];
if (expect) {
if (!*a)
goto missing_arg;
parse_arg: do{}while(0);
pragma_msvc(warning(push))
pragma_msvc(warning(disable: 4061))
uint64_t max = 15U;
switch (expect) {
case b24_opt_neuron:
max = UINT64_MAX;
break;
case b24_opt_debruijn:
case b24_opt_one_path:
case b24_opt_rotation:
break;
default:
b24_cosmic_ray(&cfg, "", __func__, __LINE__);
}
int e = 0;
uint64_t n = b24_get_int_arg(a, &e, max);
if (e) {
(void)fprintf(stderr, "Option %s "
"parse error: %s\n",
b24_opt_char_str(c_).d,
strerror(e));
b24_helpful_exit(&cfg, EXIT_FAILURE);
}
switch (expect) {
case b24_opt_debruijn:
cfg->seq[cfg->n_seq++] = b24[n];
break;
case b24_opt_neuron:
cfg->neuron = n;
break;
case b24_opt_one_path:
cfg->offset = (b24_cfg_offset)n;
break;
case b24_opt_rotation:
cfg->rotation = (uint16_t)n;
default:
break;
}
pragma_msvc(warning(pop))
b24_option_add(cfg, expect);
expect = 0;
continue;
}
if (*a == '-') {
c_ = *++a;
concatenated_option:
unsigned ch_opt = b24_char_to_opt[(unsigned char)c_];
if (!ch_opt) {
(void)fprintf(stderr, "Option %s is unknown"
"\n", b24_opt_char_str(c_).d);
b24_helpful_exit(&cfg, EXIT_FAILURE);
}
enum b24_opt opt = ch_opt & ~(NEEDS_ARG | ONLY_ONCE);
if (ch_opt & ONLY_ONCE)
b24_option_assert_once(cfg, opt);
if (ch_opt & NEEDS_ARG) {
expect = opt;
if (*++a)
goto parse_arg;
} else {
b24_option_add(cfg, opt);
if (c_ && (c_ = *++a))
goto concatenated_option;
}
continue;
}
int e = 0;
uint64_t n = b24_get_int_arg(a, &e, UINT16_MAX);
if (e) {
(void)fprintf(stderr, "Bad sequence: %s\n",
strerror(e));
b24_cfg_destroy(&cfg);
return NULL;
}
cfg->seq[cfg->n_seq++] = (uint16_t)n;
}
int yikes = b24_options_incompatible(cfg, b24_opt_big_hex ,
1U << b24_opt_hex_path |
1U << b24_opt_sig_path , ',')
+ b24_options_incompatible(cfg, b24_opt_cycles ,
1U << b24_opt_entries |
1U << b24_opt_neuron |
1U << b24_opt_expand , ',')
+ b24_options_incompatible(cfg, b24_opt_entries ,
1U << b24_opt_cycles |
1U << b24_opt_neuron |
1U << b24_opt_expand , ',')
+ b24_options_incompatible(cfg, b24_opt_graphviz ,
1U << b24_opt_json |
1U << b24_opt_python , ',')
+ b24_options_incompatible(cfg, b24_opt_json ,
1U << b24_opt_python |
1U << b24_opt_graphviz , ',')
+ b24_options_incompatible(cfg, b24_opt_neuron ,
1U << b24_opt_one_path |
1U << b24_opt_expand , ',')
+ b24_options_incompatible(cfg, b24_opt_one_path ,
1U << b24_opt_expand , ',')
+ b24_options_incompatible(cfg, b24_opt_python ,
1U << b24_opt_json |
1U << b24_opt_graphviz , ',')
+ b24_options_incompatible(cfg, b24_opt_expand ,
1U << b24_opt_cycles |
1U << b24_opt_entries |
1U << b24_opt_one_path , ',')
+ b24_options_incompatible(cfg, b24_opt_sig_path ,
1U << b24_opt_hex_path , ',');
if (yikes)
b24_helpful_exit(&cfg, EXIT_FAILURE);
if (expect)
goto missing_arg;
if (b24_has_option(cfg, b24_opt_help))
b24_helpful_exit(&cfg, EXIT_SUCCESS);
if (!cfg->n_seq) {
(void)fputs("No sequence(s) specified\n", stderr);
b24_helpful_exit(&cfg, EXIT_FAILURE);
}
return cfg;
missing_arg:
(void)fprintf(stderr, "Option %s expects an argument\n",
b24_opt_char_str(c_).d);
b24_helpful_exit(&cfg, EXIT_FAILURE);
}
static char *
b24_render_gv_struct (char *dst,
size_t siz,
uint16_t seq,
unsigned _BitInt(4) id)
{
if (siz <
sizeof "S0 [label=\"<f>0|<e>0|<d>0|<c>0|"
"<b>0|<a>0|<9>0|<8>0|"
"<7>0|<6>0|<5>0|<4>0|"
"<3>0|<2>0|<1>0|<0>0\"];\n")
return NULL;
memcpy(dst, "S0 [label=\"", sizeof "S0 [label=\"" - 1U);
*++dst = hexdig(id);
dst += sizeof " [label=\"";
for (unsigned off = 15U;; --off) {
*dst++ = '<';
*dst++ = hexdig(off);
*dst++ = '>';
*dst++ = (char)((unsigned char)'0' + (seq >> off & 1U));
if (!off)
break;
*dst++ = '|';
}
*dst++ = '"';
*dst++ = ']';
*dst++ = ';';
*dst++ = '\n';
return dst;
}
int
main (int c,
char **v)
{
struct b24_cfg *cfg = b24_parse_args(c, v);
if (!cfg)
return EXIT_FAILURE;
b24_cfg_trace(cfg);
b24_cfg_destroy(&cfg);
return EXIT_SUCCESS;
}
pragma_msvc(warning(pop))
#include <stdio.h>
#include <stdlib.h>
#include <sys/random.h>
#include "cortex.h"
struct cortex *
cortex_create (bool randomize)
{
vec128 vec[37] = {0};
vec128 *state = NULL;
struct cortex *ctx = malloc(sizeof *ctx);
if (!ctx) {
perror("malloc");
return NULL;
}
if (randomize) {
ssize_t e = getrandom(&vec[0], sizeof vec, GRND_NONBLOCK);
if (e == (ssize_t)sizeof vec)
state = &vec[0];
else if (e < 0)
perror("getrandom");
}
*ctx = cortex(state, state ? array_size(vec) : 0);
return ctx;
}
void
cortex_destroy (struct cortex **ctx)
{
if (ctx) {
struct cortex *c = *ctx;
*ctx = NULL;
ctx = NULL;
free(c);
c = NULL;
}
}
/** @file cortex.h
*/
#ifndef B24_CORTEX_H_
#define B24_CORTEX_H_
#include "neuron.h"
/**
* @brief Cortex structure.
*
* This is the structure that holds the RNA for all the neurons in the
* network.
*
* More like a resonant bucket than a brain.
*/
struct cortex {
struct rna rna[256];
};
extern struct cortex *
cortex_create (bool randomize);
extern void
cortex_destroy (struct cortex **ctx);
static pure_inline uint8_t
cortex_read_channel (struct cortex *ctx,
uint16_t chan,
bool tock)
{
return ctx->rna[chan >> 4U].tt[tock].u8[chan & 15U];
}
static pure_inline struct cortex
cortex (vec128 const *ini,
size_t len)
{
if (!len)
ini = NULL;
const uint16_t de_bruijn_seq_2_4[] = {
0x9afU, 0x9ebU, 0xa6fU, 0xa7bU,
0xb3dU, 0xb4fU, 0xbcdU, 0xbd3U,
0xcbdU, 0xd2fU, 0xd79U, 0xde5U,
0xf2dU, 0xf4bU, 0xf59U, 0xf65U,
};
struct cortex ctx = {0};
for (uint32_t i = 0, y = 0; y < 16U; ++y) {
uint32_t seq = de_bruijn_seq_2_4[y];
seq |= seq << 16U;
for (uint32_t x = i + 16U; i < x; ++i) {
ctx.rna[i] = rna(
(uint16_t)seq,
ini ? ini[i % len] : (vec128){0}
);
seq >>= 1U;
}
}
return ctx;
}
static force_inline void
cortex_write_rna (struct cortex *ctx,
vec128 data,
uint16_t dest,
bool tock)
{
ctx->rna[dest].tt[tock] = data;
}
#define M(T, W, odd) (((uint##T##_t)-1) / 3U << !!(odd) >> ((8U << ((W > 32U) + 2U)) - W))
#define fwd(T, W, v) (_pdep_u##T((v), M(T, W, 0)) | _pdep_u##T((v) >> W / 2U, M(T, W, 1)))
#define rev(T, W, v) (_pext_u##T((v), M(T, W, 0)) | _pext_u##T((v), M(T, W, 1)) << W / 2U)
static force_inline void
cortex_tick (struct cortex *ctx,
bool tock)
{
for (uint16_t i = 0; i < (uint16_t)array_size(ctx->rna); ++i) {
// input connections
vec128 in = {0};
uint16_t dst = i << 4U;
for (uint32_t j = 0; j < array_size(in.u8); ++j, ++dst) {
uint32_t src = fwd(32, 12, dst);
in.u8[j] = cortex_read_channel(ctx, src, tock);
}
struct neuron nrn = neuron_from_rna(ctx->rna[i], tock);
cortex_write_rna(ctx, neuron_tock(&nrn, in), i, !tock);
}
}
#endif /* B24_CORTEX_H_ */
/** @file neuron.h
*/
#ifndef B24_NEURON_H_
#define B24_NEURON_H_
#include <limits.h>
#include "seq.h"
#include "vec.h"
/**
* @brief Runtime neuron structure.
*
* This is what is actually used with the SIMD instructions.
*/
struct neuron {
u8x16 paths;
u8x16 state;
};
/**
* @brief Neuron state storage unit.
*
* This is the storage unit for the neuron state. Instead of one state
* vector it has two; the network uses a "tick-tock" execution pattern
* to prevent clobbering the state before it is read.
*/
struct rna {
vec128 paths;
vec128 tt[2];
};
static pure_inline struct neuron
neuron_from_vec128 (vec128 paths,
vec128 state)
{
return (struct neuron){
.paths = u8x16_from_vec128(paths),
.state = u8x16_from_vec128(state)
};
}
static pure_inline struct rna
rna_from_neuron (struct neuron *nrn)
{
const vec128 state = vec128_from_u8x16(nrn->state);
return (struct rna){
.paths = vec128_from_u8x16(nrn->paths),
.tt = {state, state}
};
}
static pure_inline struct neuron
neuron_from_rna (struct rna rna,
bool tock)
{
return neuron_from_vec128(rna.paths, rna.tt[tock]);
}
static pure_inline struct neuron
neuron (uint16_t seq,
vec128 state)
{
return (struct neuron){
.paths = seq_expand_u8x16(seq),
.state = u8x16_from_vec128(state)
};
}
static pure_inline struct rna
rna (uint16_t seq,
vec128 state)
{
return (struct rna){
.paths = seq_expand_vec128(seq),
.tt = {state, state}
};
}
static force_inline u8x16
neuron_tick (struct neuron *nrn,
u8x16 in)
{
u8x16 state = nrn->state;
nrn->state = age_and_mutate(
merge_input(
state,
in
),
nrn->paths
);
return state;
}
static force_inline vec128
neuron_tock (struct neuron *nrn,
vec128 in)
{
nrn->state = age_and_mutate(
merge_input(
nrn->state,
u8x16_from_vec128(in)
),
nrn->paths
);
return vec128_from_u8x16(nrn->state);
}
#endif /* B24_NEURON_H_ */
/** @file popcnt.h
*/
#ifndef B24_POPCNT_H_
#define B24_POPCNT_H_
#include <limits.h>
#include <stdint.h>
#include "util.h"
#undef define_popcnt
#undef popcnt16_impl
#undef popcnt32_impl
#undef popcnt64_impl
#undef popcnt_compat
#ifdef _MSC_VER
# define popcnt16_impl return __popcnt16
# pragma intrinsic(__popcnt16)
# define popcnt32_impl return __popcnt
# pragma intrinsic(__popcnt)
# ifdef _M_IX86
# undef popcnt32x2
# define popcnt64_impl return popcnt32x2
# define popcnt32x2(x) \
(__popcnt((uint32_t)(x >> 32U)) + \
__popcnt((uint32_t)(x & ~(uint32_t)0)))
# else
# define popcnt64_impl return __popcnt64
# pragma intrinsic(__popcnt64)
# endif
# define popcnt_pre_pragma _Pragma("warning(push)") \
_Pragma("warning(disable: 4116)")
# define popcnt_post_pragma _Pragma("warning(pop)")
#else
# if __has_builtin(__builtin_popcount) && (UINT_MAX == UINT32_MAX)
# define popcnt16_impl return (uint16_t)__builtin_popcount
# define popcnt32_impl return (uint32_t)__builtin_popcount
# endif
# if !defined(popcnt32_impl) && \
__has_builtin(__builtin_popcountl) && (ULONG_MAX == UINT32_MAX)
# define popcnt16_impl return (uint16_t)__builtin_popcountl
# define popcnt32_impl return (uint32_t)__builtin_popcountl
# endif
# if __has_builtin(__builtin_popcountl) && (ULONG_MAX == UINT64_MAX)
# define popcnt64_impl return (uint64_t)__builtin_popcountl
# endif
# if !defined(popcnt64_impl) && \
__has_builtin(__builtin_popcountll) && (ULLONG_MAX == UINT64_MAX)
# define popcnt64_impl return (uint64_t)__builtin_popcountll
# endif
# if __has_builtin(__builtin_popcountg)
# if !defined(popcnt16_impl)
# define popcnt16_impl return (uint16_t)__builtin_popcountg
# endif
# if !defined(popcnt32_impl)
# define popcnt32_impl return (uint32_t)__builtin_popcountg
# endif
# if !defined(popcnt64_impl)
# define popcnt64_impl return (uint64_t)__builtin_popcountg
# endif
# endif
#endif
#define define_popcnt(b, ...) const_inline \
static uint##b##_t popcnt##b(uint##b##_t val) \
{ __VA_ARGS__(val); } _Static_assert(b>0, #b)
#define popcnt_compat(x) typeof(_Generic( \
(char(*)[2 - (sizeof(x) > 4U)])0 \
,char(*)[1]: (uint64_t)0 \
,char(*)[2]: (uint32_t)0)) y = (x); \
y -= (typeof(y))-1/3 & (y >> 1U); \
y = ((typeof(y))-1/5 & (y >> 2U)) \
+ ((typeof(y))-1/5 & y); \
return ((typeof(y))-1/ 17 & ((y >> 4U) + y)) \
* ((typeof(y))-1/255) >> (sizeof y - 1U) \
* CHAR_BIT
#ifndef popcnt16_impl
# define popcnt16_impl popcnt_compat
#endif
#ifndef popcnt32_impl
# define popcnt32_impl popcnt_compat
#endif
#ifndef popcnt64_impl
# define popcnt64_impl popcnt_compat
#endif
#ifndef popcnt_pre_pragma
# define popcnt_pre_pragma
#endif
#ifndef popcnt_post_pragma
# define popcnt_post_pragma
#endif
define_popcnt(16, popcnt16_impl);
define_popcnt(32, popcnt32_impl);
define_popcnt(64, popcnt64_impl);
#undef define_popcnt
#undef popcnt16_impl
#undef popcnt32_impl
#undef popcnt64_impl
#undef popcnt_compat
#if defined(_MSC_VER) && defined(_M_IX86)
# undef popcnt32x2
#endif
#define popcnt(x) popcnt_pre_pragma _Generic((x), \
typeof(_Generic((char)0, \
signed char: (struct{int i;}){0}, \
unsigned char: (struct{int i;}){0}, \
default: (char)0)): popcnt16, \
signed char: popcnt16, short: popcnt16, \
unsigned char: popcnt16, int: popcnt32, \
unsigned short: popcnt16, unsigned: popcnt32, \
long long: popcnt64, unsigned long: _Generic( \
&(int[sizeof 1UL]){0}, \
int(*)[sizeof(uint32_t)]: popcnt32, \
int(*)[sizeof(uint64_t)]: popcnt64), \
unsigned long long: popcnt64, long: _Generic( \
&(int[sizeof 1L]){0}, \
int(*)[sizeof(int32_t)]: popcnt32, \
int(*)[sizeof(int64_t)]: popcnt64)) \
(_Generic((x), default: (x), \
long long: (unsigned long long)(x), \
signed char: (unsigned char)(x), \
short: (unsigned short)(x), \
typeof(_Generic((char)0, \
signed char: \
(struct{int i;}){0}, \
unsigned char: \
(struct{int i;}){0}, \
default: (char)0)): \
(unsigned char)(x), \
long: (unsigned long)(x), \
int: (unsigned)(x))) popcnt_post_pragma
#endif /* B24_POPCNT_H_ */
/** @file seq.h
*/
#ifndef B24_SEQ_H_
#define B24_SEQ_H_
#include "vec.h"
#ifdef __aarch64__
/**
* @brief Age and mutate the state.
*
* Decrement each non-zero strength by one and apply the given mutation.
*
* @param state The current state.
* @param paths The mutation descriptor.
* @return The resulting state.
*/
static const_inline u8x16
age_and_mutate (u8x16 state,
u8x16 paths)
{
return vqtbl1q_u8(
vsubq_u8(
state,
vbslq_u8(
vtstq_u8(
state,
vdupq_n_u8(0xf0)
),
vdupq_n_u8(0x10),
state
)
),
paths
);
}
/**
* @brief Merge input into the current state, preferring the input.
*
* For each channel, if the input channel has a non-zero strength,
* use that channel in the output, otherwise use the corresponding
* channel from the current state.
*
* @param state The current state.
* @param input The input to merge into the state.
* @return The merged state.
*/
static const_inline u8x16
merge_input (u8x16 state,
u8x16 input)
{
return vbslq_u8(
vtstq_u8(
input,
vdupq_n_u8(0xf0)
),
input,
state
);
}
static force_inline uint64_t
seq_expand (uint16_t seq)
{
uint16_t rot = seq << 12U | seq >> 4U;
uint64_t ret = 0;
uint64x2_t x = vreinterpretq_u64_u8(
vqtbl1q_u8(
vreinterpretq_u8_u16(
vld1q_lane_u16(
&rot,
vld1q_lane_u16(
&seq,
vdupq_n_u16(0),
0
),
1
)
),
vld1q_u8(((uint8_t[16]){
0x00, 0x00, 0x00, 0x00,
0x02, 0x02, 0x02, 0x02,
0x01, 0x01, 0x01, 0x01,
0x03, 0x03, 0x03, 0x03})
)
)
);
x = vorrq_u64(
vandq_u64(
x,
vdupq_n_u64(UINT64_C(0x003c000f003c000f))
),
vshrq_n_u64(
vandq_u64(
x,
vdupq_n_u64(UINT64_C(0x78001e0078001e00))
),
5
)
);
x = vorrq_u64(
vandq_u64(
x,
vdupq_n_u64(UINT64_C(0x000000ff000000ff))
),
vshrq_n_u64(
vandq_u64(
x,
vdupq_n_u64(UINT64_C(0x03fc000003fc0000))
),
10
)
);
vst1q_lane_u64(
&ret,
vreinterpretq_u64_u8(
vqtbl1q_u8(
vreinterpretq_u8_u64(x),
vld1q_u8(((uint8_t[16]){
0x00, 0x01, 0x04, 0x05,
0x08, 0x09, 0x0c, 0x0d,
0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80})
)
)
),
0
);
return ret;
}
static force_inline u8x16
seq_expand_u8x16 (uint16_t seq)
{
uint64x2_t x = vreinterpretq_u64_u8(
vqtbl1q_u8(
vreinterpretq_u8_u16(
vld1q_lane_u16(
(uint16_t[1]){
seq << 12U |
seq >> 4U
},
vld1q_lane_u16(
&seq,
vdupq_n_u16(0),
0
),
1
)
),
vld1q_u8(((uint8_t[16]){
0x00, 0x00, 0x00, 0x00,
0x02, 0x02, 0x02, 0x02,
0x01, 0x01, 0x01, 0x01,
0x03, 0x03, 0x03, 0x03})
)
)
);
x = vorrq_u64(
vandq_u64(
x,
vdupq_n_u64(UINT64_C(0x003c000f003c000f))
),
vshrq_n_u64(
vandq_u64(
x,
vdupq_n_u64(UINT64_C(0x78001e0078001e00))
),
5
)
);
x = vorrq_u64(
vandq_u64(
x,
vdupq_n_u64(UINT64_C(0x000000ff000000ff))
),
vshrq_n_u64(
vandq_u64(
x,
vdupq_n_u64(UINT64_C(0x03fc000003fc0000))
),
10
)
);
x = vreinterpretq_u64_u8(
vqtbl1q_u8(
vreinterpretq_u8_u64(x),
vld1q_u8(((uint8_t[16]){
0x00, 0x80, 0x01, 0x80,
0x04, 0x80, 0x05, 0x80,
0x08, 0x80, 0x09, 0x80,
0x0c, 0x80, 0x0d, 0x80})
)
)
);
uint64x2_t y = vandq_u64(
x,
vdupq_n_u64(UINT64_C(0x00f000f000f000f0))
);
return vreinterpretq_u8_u64(
vorrq_u64(
veorq_u64(x, y),
vshlq_n_u64(y, 4)
)
);
}
static force_inline vec128
seq_expand_vec128 (uint16_t seq)
{
return vec128_from_u8x16(seq_expand_u8x16(seq));
}
#endif /* __aarch64__ */
#if defined(__x86_64__) || defined(_MSC_VER)
# define pext_mask 0x783c1e0f783c1e0fULL
# define pdep_mask 0x0f0f0f0f0f0f0f0fULL
static force_inline uint64_t
seq_expand (uint16_t seq)
{
u8x16 k = _mm_shuffle_epi8(
_mm_set_epi64x(
0, (uint32_t)(seq << 12U | seq >> 4U) << 16U | seq
),
_mm_set_epi64x(0x0303030301010101LL, 0x0202020200000000LL)
);
return _pext_u64((uint64_t)_mm_extract_epi64(k, 1), pext_mask) << 32U
| _pext_u64((uint64_t)_mm_extract_epi64(k, 0), pext_mask);
}
static force_inline vec128
seq_expand_vec128 (uint16_t seq)
{
u8x16 k = _mm_shuffle_epi8(
_mm_set_epi64x(
0, (uint32_t)(seq << 12U | seq >> 4U) << 16U | seq
),
_mm_set_epi64x(0x0303030301010101LL, 0x0202020200000000LL)
);
return (vec128){
.u64[0] = _pdep_u64(
_pext_u64(
(unsigned long long)_mm_extract_epi64(k, 0),
pext_mask
),
pdep_mask
),
.u64[1] = _pdep_u64(
_pext_u64(
(unsigned long long)_mm_extract_epi64(k, 1),
pext_mask
),
pdep_mask
)
};
}
static force_inline u8x16
seq_expand_u8x16 (uint16_t seq)
{
return u8x16_from_vec128(seq_expand_vec128(seq));
}
# undef pdep_mask
# undef pext_mask
static const_inline u8x16
age_and_mutate (u8x16 state,
u8x16 paths)
{
return _mm_shuffle_epi8(
_mm_subs_epu8(
state,
_mm_set_epi64x(
0x1010101010101010LL,
0x1010101010101010LL
)
),
paths
);
}
static const_inline u8x16
merge_input (u8x16 state,
u8x16 input)
{
return _mm_blendv_epi8(
state,
input,
/* Mask construction: adding 0x70 (saturated)
* sets the top bit iff the value is at least
* 0x10, and the result can be used as a mask
* in `_mm_blendv_epi8()`.
*/
_mm_adds_epu8(
input,
_mm_set_epi64x(
0x7070707070707070LL,
0x7070707070707070LL
)
)
);
}
#endif /* __x86_64__ || _MSC_VER */
#endif /* B24_SEQ_H_ */
/** @file util.h
*/
#ifndef B24_UTIL_H_
#define B24_UTIL_H_
#ifdef _MSC_VER
# include <intrin.h>
#elif defined(__aarch64__)
# include <arm_neon.h>
#elif defined(__x86_64__)
# include <immintrin.h>
#endif
#ifdef __aarch64__
typedef uint8x16_t u8x16;
#else
typedef __m128i u8x16;
#endif
#ifndef __has_builtin
# define __has_builtin(...) 0
#endif
#ifdef _MSC_VER
# define __attribute__(...)
# define force_inline __forceinline
# define pragma_msvc(...) _Pragma(#__VA_ARGS__)
# define typeof __typeof__
#else
# define force_inline __attribute__((always_inline)) inline
# define pragma_msvc(...)
#endif
#define array_size(a) (sizeof (a) / sizeof (a)[0])
#define const_function __attribute__((const))
#define const_inline const_function force_inline
#define pure_inline __attribute__((pure)) force_inline
#endif /* B24_UTIL_H_ */
/** @file vec.h
*/
#ifndef B24_VEC_H_
#define B24_VEC_H_
#include <stdint.h>
#include "util.h"
typedef struct vec4x2 {
uint8_t x : 4; uint8_t y : 4;
} vec4x2;
typedef struct vec4x4 {
uint16_t x0 : 4; uint16_t y0 : 4;
uint16_t x1 : 4; uint16_t y1 : 4;
} vec4x4;
typedef struct vec4x8 {
uint32_t x0 : 4; uint32_t y0 : 4;
uint32_t x1 : 4; uint32_t y1 : 4;
uint32_t x2 : 4; uint32_t y2 : 4;
uint32_t x3 : 4; uint32_t y3 : 4;
} vec4x8;
typedef struct vec4x16 {
uint64_t x0 : 4; uint64_t y0 : 4;
uint64_t x1 : 4; uint64_t y1 : 4;
uint64_t x2 : 4; uint64_t y2 : 4;
uint64_t x3 : 4; uint64_t y3 : 4;
uint64_t x4 : 4; uint64_t y4 : 4;
uint64_t x5 : 4; uint64_t y5 : 4;
uint64_t x6 : 4; uint64_t y6 : 4;
uint64_t x7 : 4; uint64_t y7 : 4;
} vec4x16;
typedef struct vec128 {
union {
uint8_t u8[ 16];
vec4x2 u4x2[16];
uint16_t u16[ 8];
vec4x4 u4x4[ 8];
uint32_t u32[ 4];
vec4x8 u4x8[ 4];
uint64_t u64[ 2];
vec4x16 u4x16[2];
};
} vec128;
_Static_assert(sizeof(vec4x2 ) == 1U,"");
_Static_assert(sizeof(vec4x4 ) == 2U,"");
_Static_assert(sizeof(vec4x8 ) == 4U,"");
_Static_assert(sizeof(vec4x16) == 8U,"");
_Static_assert(sizeof(vec128 ) == 16U,"");
#ifdef __aarch64__
static pure_inline u8x16
u8x16_from_vec128 (vec128 v)
{
return vld1q_u8(&v.u8[0]);
}
static pure_inline vec128
vec128_from_u8x16 (u8x16 v)
{
vec128 ret;
vst1q_u8(&ret.u8[0], v);
return ret;
}
#else
static pure_inline u8x16
u8x16_from_vec128 (vec128 v)
{
return _mm_set_epi64x(
(long long)v.u64[1],
(long long)v.u64[0]
);
}
static pure_inline vec128
vec128_from_u8x16 (u8x16 v)
{
return (vec128){
.u64[0] = (uint64_t)_mm_extract_epi64(v, 0),
.u64[1] = (uint64_t)_mm_extract_epi64(v, 1),
};
}
#endif
#endif /* B24_VEC_H_ */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment