Last active
September 23, 2024 21:32
-
-
Save imaami/b74edbf7c212faa1f40241bda3c55f54 to your computer and use it in GitHub Desktop.
B(2, 4)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @file b24.c | |
* | |
* Compiling with MSVC 19.41 or later: | |
* | |
* cl.exe /TC /std:clatest /O2 /Oi /GL /GF /Zo- /favor:AMD64 /arch:AVX2 b24.c /Fe: b24.exe /MD | |
*/ | |
#include <errno.h> | |
#include <inttypes.h> | |
#include <stdbool.h> | |
#include <stddef.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include "neuron.h" | |
#include "popcnt.h" | |
#include "vec.h" | |
#if defined(__INTELLISENSE__) || defined(_MSC_VER) | |
# define BITINT_C(n) n | |
# define _BitInt(n) int | |
#else | |
# define BITINT_C(n) n##WB | |
#endif | |
#ifdef _MSC_VER | |
# pragma intrinsic(_BitScanForward) | |
static const_inline int __builtin_ctzl(unsigned long x) { | |
unsigned long y; | |
return _BitScanForward(&y, x) ? (int)y : (int)sizeof(x) * CHAR_BIT; | |
} | |
#endif | |
pragma_msvc(warning(push)) | |
pragma_msvc(warning(disable: 4710)) | |
pragma_msvc(warning(disable: 4711)) | |
static const_inline char | |
hexdig (unsigned _BitInt(4) n) | |
{ | |
return (const char[16]){ | |
'0','1','2','3','4','5','6','7', | |
'8','9','a','b','c','d','e','f', | |
}[n]; | |
} | |
static inline char * | |
decstr_u5 (char *dst, | |
unsigned _BitInt(5) src, | |
bool aln) | |
{ | |
if (src >= BITINT_C(10U)) { | |
*dst++ = (char)((unsigned char)'0' + src / BITINT_C(10U)); | |
src %= BITINT_C(10U); | |
} else if (aln) | |
*dst++ = ' '; | |
*dst++ = (char)((unsigned char)'0' + src); | |
return dst; | |
} | |
#if 0 | |
static force_inline char * | |
decstr_s5 (char *dst, | |
unsigned _BitInt(5) src, | |
bool pad) | |
{ | |
const typeof(src) lt0 = src >> 4; | |
src = (src - lt0) ^ ((BITINT_C(31U) ^ BITINT_C(31U)) - lt0); | |
if (lt0) { | |
if (src >= BITINT_C(10U)) { | |
*dst++ = '-'; | |
*dst++ = '1'; | |
src -= BITINT_C(10U); | |
} else { | |
if (pad) | |
*dst++ = ' '; | |
*dst++ = '-'; | |
} | |
} else { | |
if (src >= BITINT_C(10U)) { | |
if (pad) | |
*dst++ = ' '; | |
*dst++ = '1'; | |
src -= BITINT_C(10U); | |
} else if (pad) { | |
*dst++ = ' '; | |
*dst++ = ' '; | |
} | |
} | |
*dst++ = (char)((unsigned char)'0' + src); | |
return dst; | |
} | |
#endif | |
static force_inline char * | |
decstr_u4 (char *dst, | |
unsigned _BitInt(4) src, | |
bool aln) | |
{ | |
if (src >= BITINT_C(10U)) { | |
*dst++ = '1'; | |
src -= BITINT_C(10U); | |
} else if (aln) | |
*dst++ = ' '; | |
*dst++ = (char)((unsigned char)'0' + src); | |
return dst; | |
} | |
static force_inline char * | |
decstr_s4 (char *dst, | |
unsigned _BitInt(4) src, | |
bool aln) | |
{ | |
if (src >= BITINT_C(8U)) { | |
*dst++ = '-'; | |
*dst++ = (char)((unsigned char)'8' + | |
(unsigned char)8 - src); | |
} else { | |
if (aln) | |
*dst++ = ' '; | |
*dst++ = (char)((unsigned char)'0' + src); | |
} | |
return dst; | |
} | |
static const uint16_t b24[] = { | |
0x9afU, 0x9ebU, 0xa6fU, 0xa7bU, | |
0xb3dU, 0xb4fU, 0xbcdU, 0xbd3U, | |
0xcbdU, 0xd2fU, 0xd79U, 0xde5U, | |
0xf2dU, 0xf4bU, 0xf59U, 0xf65U, | |
}; | |
pragma_msvc(warning(push)) | |
pragma_msvc(warning(disable: 4200)) | |
pragma_msvc(warning(disable: 4820)) | |
struct b24_cfg { | |
uint32_t have_opt; | |
uint16_t rotation; | |
uint16_t offset; | |
uint64_t neuron; | |
size_t n_seq; | |
uint16_t seq[]; | |
}; | |
pragma_msvc(warning(pop)) | |
typedef typeof((struct b24_cfg){0}.have_opt) b24_cfg_flags; | |
typedef typeof((struct b24_cfg){0}.offset) b24_cfg_offset; | |
struct trace { | |
uint64_t cyc; | |
uint32_t seq; | |
uint16_t len; | |
uint16_t map; | |
}; | |
const_function | |
static struct trace | |
b24_trace_init (struct b24_cfg const *const cfg, | |
unsigned i) | |
{ | |
uint32_t seq = cfg->seq[i] | ((uint32_t)cfg->seq[i] << 16U); | |
return (struct trace){ | |
.cyc = 0U, | |
.seq = cfg->rotation ? (seq >> (16 - cfg->rotation)) | |
| (seq << cfg->rotation) : seq, | |
.len = 0U, | |
.map = 0U, | |
}; | |
} | |
const_function | |
static struct trace | |
b24_trace_from (struct trace tc, | |
b24_cfg_offset off) | |
{ | |
tc.cyc = off; | |
tc.len = 1U; | |
tc.map = 1U << off; | |
for (;;) { | |
off = (tc.seq >> off) & (b24_cfg_offset)15; | |
typeof(tc.map) bit = (typeof(bit))1 << off; | |
if (tc.map & bit) | |
break; | |
tc.map |= bit; | |
tc.cyc |= (typeof(tc.cyc))off | |
<< (tc.len++ << 2U); | |
} | |
return tc; | |
} | |
const_function | |
static struct trace | |
b24_trace_next (struct trace tc) | |
{ | |
if (tc.map == (typeof(tc.map))0xffff) | |
return (struct trace){0}; | |
b24_cfg_offset off = 0U; | |
uint_fast16_t map = tc.map; | |
while (map & (typeof(map))1) { | |
map >>= 1U; | |
++off; | |
} | |
return b24_trace_from(tc, off); | |
} | |
enum b24_opt { | |
/* The std_in option ('-') is 0 to guarantee all | |
* argument-accepting options stored in `expect` | |
* evaluate to true during argument parsing. | |
*/ | |
b24_opt_std_in , // - (unused for now) | |
b24_opt_big_hex , // -b | |
b24_opt_cycles , // -c | |
b24_opt_debruijn, // -d | |
b24_opt_entries , // -e | |
b24_opt_graphviz, // -g | |
b24_opt_help , // -h | |
b24_opt_json , // -j | |
b24_opt_neuron , // -n | |
b24_opt_one_path, // -o | |
b24_opt_python , // -p | |
b24_opt_expand , // -q | |
b24_opt_rotation, // -r | |
b24_opt_sig_path, // -s | |
b24_opt_no_space, // -w | |
b24_opt_hex_path, // -x | |
b24_opt_no_align, // -y | |
b24_opt_no_zeros, // -z | |
// qualifier flags | |
ONLY_ONCE = 0x40U, | |
NEEDS_ARG = 0x80U, | |
}; | |
#define b24_opt_char(x) \ | |
_Generic(&(int[1U+b24_opt_##x]){0} \ | |
, int(*)[1U+b24_opt_std_in ]: 0 \ | |
, int(*)[1U+b24_opt_big_hex ]: 'b' \ | |
, int(*)[1U+b24_opt_cycles ]: 'c' \ | |
, int(*)[1U+b24_opt_debruijn]: 'd' \ | |
, int(*)[1U+b24_opt_entries ]: 'e' \ | |
, int(*)[1U+b24_opt_graphviz]: 'g' \ | |
, int(*)[1U+b24_opt_help ]: 'h' \ | |
, int(*)[1U+b24_opt_json ]: 'j' \ | |
, int(*)[1U+b24_opt_neuron ]: 'n' \ | |
, int(*)[1U+b24_opt_one_path]: 'o' \ | |
, int(*)[1U+b24_opt_python ]: 'p' \ | |
, int(*)[1U+b24_opt_expand ]: 'q' \ | |
, int(*)[1U+b24_opt_rotation]: 'r' \ | |
, int(*)[1U+b24_opt_sig_path]: 's' \ | |
, int(*)[1U+b24_opt_no_space]: 'w' \ | |
, int(*)[1U+b24_opt_hex_path]: 'x' \ | |
, int(*)[1U+b24_opt_no_align]: 'y' \ | |
, int(*)[1U+b24_opt_no_zeros]: 'z' ) | |
static const char b24_opt_to_char[] = { | |
#define b24_option(x) [b24_opt_##x] = b24_opt_char(x) | |
b24_option(std_in ), | |
b24_option(big_hex ), | |
b24_option(cycles ), | |
b24_option(debruijn), | |
b24_option(entries ), | |
b24_option(graphviz), | |
b24_option(help ), | |
b24_option(json ), | |
b24_option(neuron ), | |
b24_option(one_path), | |
b24_option(python ), | |
b24_option(expand ), | |
b24_option(rotation), | |
b24_option(sig_path), | |
b24_option(no_space), | |
b24_option(hex_path), | |
b24_option(no_align), | |
b24_option(no_zeros), | |
#undef b24_option | |
}; | |
static const unsigned char | |
b24_char_to_opt[1U << CHAR_BIT] = { | |
#define b24_option(x) [b24_opt_char(x)] = b24_opt_##x | |
b24_option(std_in )| ONLY_ONCE, | |
b24_option(big_hex )| ONLY_ONCE, | |
b24_option(cycles )| ONLY_ONCE, | |
b24_option(debruijn)| NEEDS_ARG, | |
b24_option(entries )| ONLY_ONCE, | |
b24_option(graphviz)| ONLY_ONCE, | |
b24_option(help )| ONLY_ONCE, | |
b24_option(json )| ONLY_ONCE, | |
b24_option(neuron )| ONLY_ONCE | |
| NEEDS_ARG, | |
b24_option(one_path)| ONLY_ONCE | |
| NEEDS_ARG, | |
b24_option(python )| ONLY_ONCE, | |
b24_option(expand )| ONLY_ONCE, | |
b24_option(rotation)| ONLY_ONCE | |
| NEEDS_ARG, | |
b24_option(sig_path)| ONLY_ONCE, | |
b24_option(no_space)| ONLY_ONCE, | |
b24_option(hex_path)| ONLY_ONCE, | |
b24_option(no_align)| ONLY_ONCE, | |
b24_option(no_zeros)| ONLY_ONCE, | |
#undef b24_option | |
}; | |
/** | |
* @brief Check if option `opt` is present. | |
*/ | |
static pure_inline bool | |
b24_has_option (struct b24_cfg const *const cfg, | |
const enum b24_opt opt) | |
{ | |
return cfg->have_opt & ((b24_cfg_flags)1 << opt); | |
} | |
/** | |
* @brief Mark option `opt` as present. | |
*/ | |
static force_inline void | |
b24_option_add (struct b24_cfg *const cfg, | |
const enum b24_opt opt) | |
{ | |
cfg->have_opt |= (b24_cfg_flags)1 << opt; | |
} | |
struct b24_syntax { | |
char bra; | |
char ket; | |
char sep; | |
char spc; | |
}; | |
static force_inline char * | |
hexstr_u4 (char *dst, | |
unsigned _BitInt(4) src) | |
{ | |
*dst++ = '0'; | |
*dst++ = 'x'; | |
*dst++ = hexdig(src); | |
return dst; | |
} | |
static inline char * | |
hexstr_u16 (char *dst, | |
uint16_t src, | |
bool pad) | |
{ | |
*dst++ = '0'; | |
*dst++ = 'x'; | |
if (pad) { | |
*dst++ = hexdig(src >> 12U ); | |
*dst++ = hexdig(src >> 8U & 15U); | |
*dst++ = hexdig(src >> 4U & 15U); | |
} else if (src > UINT16_C(0x000f)) { | |
if (src > UINT16_C(0x00ff)) { | |
if (src > UINT16_C(0x0fff)) | |
*dst++ = hexdig(src >> 12U); | |
*dst++ = hexdig(src >> 8U & 15U); | |
} | |
*dst++ = hexdig(src >> 4U & 15U); | |
} | |
*dst++ = hexdig(src & 15U); | |
return dst; | |
} | |
static force_inline char * | |
hexstr_16x4 (char *dst, | |
uint64_t src, | |
unsigned len, | |
struct b24_syntax syn) | |
{ | |
if (len) { | |
for (;; src >>= 4) { | |
dst = hexstr_u4(dst, src & 15U); | |
if (!--len) | |
break; | |
*dst++ = syn.sep; | |
*dst = syn.spc; | |
dst+= !!syn.spc; | |
} | |
} | |
return dst; | |
} | |
static char * | |
hexstr_u64 (char *dst, | |
uint64_t src, | |
bool pad) | |
{ | |
char buf[16] = "000000000000000"; | |
char *p = &buf[15]; | |
for (;; --p) { | |
*p = hexdig(src & 15U); | |
src >>= 4U; | |
if (!src) | |
break; | |
} | |
if (pad) | |
p = &buf[0]; | |
*dst++ = '0'; | |
*dst++ = 'x'; | |
for (;; ++p) { | |
*dst++ = *p; | |
if (p == &buf[15]) | |
break; | |
} | |
return dst; | |
} | |
static force_inline char * | |
decstr_16x4 (char *dst, | |
uint64_t src, | |
unsigned len, | |
struct b24_syntax syn, | |
bool aln, | |
bool sig) | |
{ | |
if (len) { | |
bool w = syn.spc; | |
if (sig) { | |
for (;; src >>= 4) { | |
dst = decstr_s4(dst, src & 15U, aln); | |
if (!--len) | |
break; | |
*dst++ = syn.sep; | |
*dst = syn.spc; | |
dst += w; | |
} | |
} else { | |
for (;; src >>= 4) { | |
dst = decstr_u4(dst, src & 15U, aln); | |
if (!--len) | |
break; | |
*dst++ = syn.sep; | |
*dst = syn.spc; | |
dst += w; | |
} | |
} | |
} | |
return dst; | |
} | |
static char * | |
b24_cfg_trace_print (struct b24_cfg const *cfg, | |
char *dst, | |
struct trace src, | |
unsigned idx, | |
struct b24_syntax syn) | |
{ | |
bool align = !b24_has_option(cfg, b24_opt_no_align); | |
bool zeros = !b24_has_option(cfg, b24_opt_no_zeros); | |
bool expand = b24_has_option(cfg, b24_opt_expand); | |
bool all = !b24_has_option(cfg, b24_opt_one_path) && | |
!expand; | |
if (all) { | |
*dst++ = syn.bra; | |
dst = hexstr_u16(dst, (uint16_t)src.seq, zeros); | |
*dst++ = syn.sep; | |
*dst = syn.spc; | |
dst+= !!syn.spc; | |
dst = decstr_u5(dst, idx, align); | |
*dst++ = syn.sep; | |
*dst = syn.spc; | |
dst+= !!syn.spc; | |
dst = hexstr_u16(dst, src.map, zeros); | |
*dst++ = syn.sep; | |
*dst = syn.spc; | |
dst+= !!syn.spc; | |
dst = decstr_u5(dst, src.len, align); | |
*dst++ = syn.sep; | |
*dst = syn.spc; | |
dst+= !!syn.spc; | |
} | |
if (b24_has_option(cfg, b24_opt_big_hex)) { | |
dst = hexstr_u64(dst, src.cyc, zeros); | |
} else { | |
*dst++ = syn.bra; | |
dst = b24_has_option(cfg, b24_opt_hex_path) | |
? hexstr_16x4(dst, src.cyc, src.len, syn) | |
: decstr_16x4(dst, src.cyc, src.len, syn, align, | |
b24_has_option(cfg, b24_opt_sig_path)); | |
*dst++ = syn.ket; | |
} | |
if (all) | |
*dst++ = syn.ket; | |
*dst = '\0'; | |
return dst; | |
} | |
static struct trace | |
b24_expand (struct trace tc) | |
{ | |
struct neuron nrn = neuron(tc.seq, (vec128){ | |
.u8 = { | |
0x0, 0x0, 0x0, 0x0, | |
0x0, 0x0, 0x0, 0xa7, | |
0x0, 0x0, 0x0, 0x0, | |
0x0, 0x0, 0x0, 0x0, | |
} | |
}); | |
putchar('\n'); | |
for (;;) { | |
#ifndef __aarch64__ | |
__m128i simd = neuron_tick(&nrn, _mm_set_epi64x(0, 0)); | |
vec128 x = (vec128){ | |
.u64 = { | |
[0] = _mm_extract_epi64(simd, 0), | |
[1] = _mm_extract_epi64(simd, 1), | |
} | |
}; | |
#else | |
uint64x2_t simd = vreinterpretq_u64_u8( | |
neuron_tick(&nrn, vdupq_n_u8(0)) | |
); | |
vec128 x; | |
vst1q_lane_u64(&x.u64[0], simd, 0); | |
vst1q_lane_u64(&x.u64[1], simd, 1); | |
#endif // __aarch64__ | |
vec128 y = (vec128){ | |
.u4x16[0] = { | |
x.u4x2[ 0].x, x.u4x2[ 1].x, | |
x.u4x2[ 2].x, x.u4x2[ 3].x, | |
x.u4x2[ 4].x, x.u4x2[ 5].x, | |
x.u4x2[ 6].x, x.u4x2[ 7].x, | |
x.u4x2[ 8].x, x.u4x2[ 9].x, | |
x.u4x2[10].x, x.u4x2[11].x, | |
x.u4x2[12].x, x.u4x2[13].x, | |
x.u4x2[14].x, x.u4x2[15].x, | |
}, | |
.u4x16[1] = { | |
x.u4x2[ 0].y, x.u4x2[ 1].y, | |
x.u4x2[ 2].y, x.u4x2[ 3].y, | |
x.u4x2[ 4].y, x.u4x2[ 5].y, | |
x.u4x2[ 6].y, x.u4x2[ 7].y, | |
x.u4x2[ 8].y, x.u4x2[ 9].y, | |
x.u4x2[10].y, x.u4x2[11].y, | |
x.u4x2[12].y, x.u4x2[13].y, | |
x.u4x2[14].y, x.u4x2[15].y, | |
} | |
}; | |
printf("%016" PRIx64 " %016" PRIx64 "\n", y.u64[1], y.u64[0]); | |
if (!y.u64[0]) | |
break; | |
} | |
tc.cyc = seq_expand((uint16_t)tc.seq); | |
tc.len = 16U; | |
tc.map = 0xffffU; | |
return tc; | |
} | |
static void | |
b24_cfg_trace (struct b24_cfg const *cfg) | |
{ | |
struct b24_syntax syn = (struct b24_syntax[]){ | |
{'{', '}', ',', ' '}, | |
{'[', ']', ',', ' '}, // json or python | |
{'{', '}', ',', 0x0}, // no space | |
{'[', ']', ',', 0x0}, // js/py, no space | |
}[b24_has_option(cfg, b24_opt_json) | | |
b24_has_option(cfg, b24_opt_python) | | |
(b24_has_option(cfg, b24_opt_no_space) << 1U)]; | |
bool expand = b24_has_option(cfg, b24_opt_expand); | |
bool once = b24_has_option(cfg, b24_opt_one_path); | |
bool all = !once && !expand; | |
bool please = false; | |
for (unsigned i = 0; i < cfg->n_seq;) { | |
struct trace tc = b24_trace_init(cfg, i); | |
unsigned cycles = 0; | |
for (;; ++cycles) { | |
struct trace t; | |
if (once) { | |
t = b24_trace_from(tc, cfg->offset); | |
} else if (expand) { | |
t = b24_expand(tc); | |
} else { | |
t = b24_trace_next(tc); | |
if (!t.map) | |
break; | |
tc.cyc |= t.cyc << (tc.len << 2U); | |
tc.len += t.len; | |
tc.map |= t.map; | |
} | |
char buf[160]; | |
char *p = b24_cfg_trace_print(cfg, buf, t, | |
cycles, syn); | |
if (p) { | |
if (please) { | |
(void)putchar(syn.sep); | |
(void)putchar('\n'); | |
} | |
(void)fputs(buf, stdout); | |
please = true; | |
} | |
if (!all) | |
break; | |
} | |
if (++i == cfg->n_seq && please) | |
(void)putchar('\n'); | |
} | |
} | |
static void | |
b24_help (void) | |
{ | |
(void)printf( | |
"Usage: b24 [OPTIONS]... [<0..65535>]...\n" | |
"Options:\n" | |
" -%c Print this help text and exit\n" | |
" -%c <state> Trace a neuron's state evolution\n" | |
" -%c <0..15> Trace a single path at offset\n" | |
" -%c Expand subsequences in place\n" | |
"\n" | |
"Input options:\n" | |
" -%c <0..15> Add distinct B(2, 4) as input\n" | |
" -%c <0..15> Apply rotation before tracing\n" | |
"\n" | |
"Output options:\n" | |
" -%c Only cycles, no entry paths\n" | |
" -%c Always find all entry paths\n" | |
"\n" | |
"Formatting options:\n" | |
" -%c Output in Graphviz format\n" | |
" -%c Output in JSON format\n" | |
" -%c Output in Python format\n" | |
" -%c Signed decimal path steps\n" | |
" -%c Hexadecimal path steps\n" | |
" -%c Print every sequence as one big hex number\n" | |
" -%c Don't align output columns\n" | |
" -%c Don't zero-pad hexadecimals\n" | |
" -%c Omit non-separator whitespace\n", | |
b24_opt_char(help), b24_opt_char(neuron), | |
b24_opt_char(one_path), b24_opt_char(expand), | |
b24_opt_char(debruijn), b24_opt_char(rotation), | |
b24_opt_char(cycles), b24_opt_char(entries), | |
b24_opt_char(graphviz), b24_opt_char(json), | |
b24_opt_char(python), b24_opt_char(sig_path), | |
b24_opt_char(hex_path), b24_opt_char(big_hex), | |
b24_opt_char(no_align), b24_opt_char(no_zeros), | |
b24_opt_char(no_space) | |
); | |
} | |
struct str7 { | |
union { | |
char d[8U]; | |
uint64_t u; | |
}; | |
}; | |
const_inline | |
static struct str7 | |
b24_opt_char_str (char ch) | |
{ | |
struct str7 str = {{{'"', '-', ch}}}; | |
str.d[3U - !ch] = '"'; | |
return str; | |
} | |
static void | |
b24_cfg_destroy (struct b24_cfg **cfg) | |
{ | |
if (cfg && *cfg) { | |
free(*cfg); | |
*cfg = NULL; | |
} | |
} | |
_Noreturn static void | |
b24_helpful_exit (struct b24_cfg **cfg, | |
int ret) | |
{ | |
b24_cfg_destroy(cfg); | |
b24_help(); | |
exit(ret); | |
} | |
static struct b24_cfg * | |
b24_cfg_create (size_t n_seq) | |
{ | |
struct b24_cfg *cfg = calloc(1, offsetof(struct b24_cfg, seq) | |
+ n_seq * sizeof cfg->seq[0]); | |
if (!cfg) | |
perror("calloc"); | |
return cfg; | |
} | |
_Noreturn static void | |
b24_cosmic_ray (struct b24_cfg **cfg, | |
char const *what, | |
char const *func, | |
int line) | |
{ | |
size_t n = what ? strlen(what) : 0; | |
if (!n) | |
what = ""; | |
(void)fprintf(stderr, "[\033[31mERROR\033[m] %s:%d: %s%s\n", | |
func, line, what, &".\nFailure caused by the impact " | |
"of a high-energy cosmic particle or a high-density " | |
"program author."[n ? what[n-1U] == '.' : 2U]); | |
b24_cfg_destroy(cfg); | |
exit(EXIT_FAILURE); | |
} | |
static int | |
b24_options_incompatible (struct b24_cfg *cfg, | |
enum b24_opt opt, | |
b24_cfg_flags bad, | |
const char sep) | |
{ | |
if (!b24_has_option(cfg, opt)) | |
return 0; | |
bad &= cfg->have_opt; | |
if (!bad) | |
return 0; | |
cfg->have_opt &= ~((typeof(cfg->have_opt))1 << opt); | |
int n = popcnt(bad); | |
if (n >= (int)array_size(b24_opt_to_char)) { | |
b24_cosmic_ray(&cfg, "Option flag outside expected range", | |
__func__, __LINE__); | |
} | |
char buf[64] = ""; | |
char *dst = &buf[0]; | |
b24_cfg_flags bit = 1U; | |
for (int prev = 0, i = 0; i < n;) { | |
int o = __builtin_ctzl(bad); | |
if (o >= (int)array_size(b24_opt_to_char)) | |
break; | |
bit <<= o - prev; | |
bad &= ~bit; | |
prev = o; | |
if (++i > 1) { | |
if (n > 2) | |
*dst++ = sep; | |
*dst++ = ' '; | |
if (i == n) { | |
*dst++ = 'o'; | |
*dst++ = 'r'; | |
*dst++ = ' '; | |
} | |
} | |
struct str7 s = b24_opt_char_str(b24_opt_to_char[o]); | |
for (char *src = s.d; *src; ++src) { *dst++ = *src; } | |
} | |
*dst = '\0'; | |
(void)fprintf(stderr, "Option %s cannot be combined with %s\n", | |
b24_opt_char_str(b24_opt_to_char[opt]).d, buf); | |
return 1; | |
} | |
static void | |
b24_option_assert_once (struct b24_cfg *cfg, | |
enum b24_opt opt) | |
{ | |
if (b24_has_option(cfg, opt)) { | |
(void)fprintf(stderr, "Option %s specified twice\n", | |
b24_opt_char_str(b24_opt_to_char[opt]).d); | |
b24_helpful_exit(&cfg, EXIT_FAILURE); | |
} | |
} | |
static inline uint64_t | |
b24_get_u64_arg (char *arg, | |
int *err) | |
{ | |
if (!arg) { | |
*err = EFAULT; | |
return 0; | |
} | |
if (!*arg) { | |
*err = EINVAL; | |
return 0; | |
} | |
errno = 0; | |
uint64_t u64 = 0U; | |
char *endptr = arg; | |
int64_t i64 = _Generic(i64, long: strtol, | |
long long: strtoll)(arg, &endptr, 0); | |
int e = errno; | |
if (!e) { | |
u64 = (uint64_t)i64; | |
if (*endptr) | |
e = EINVAL; | |
} else if (e == ERANGE && i64 == _Generic(i64, long: LONG_MAX, | |
long long: LLONG_MAX)) { | |
errno = 0; | |
endptr = arg; | |
u64 = _Generic(u64, unsigned long: strtoul, | |
unsigned long long: strtoull)(arg, &endptr, 0); | |
e = errno; | |
if (!e && *endptr) | |
e = EINVAL; | |
} | |
*err = e; | |
return u64; | |
} | |
static uint64_t | |
b24_get_int_arg (char *arg, | |
int *err, | |
uint64_t max) | |
{ | |
int e = 0; | |
uint64_t u64 = b24_get_u64_arg(arg, &e); | |
if (u64 > max) { | |
u64 = max; | |
if (!e) | |
e = ERANGE; | |
} | |
*err = e; | |
return u64; | |
} | |
static struct b24_cfg * | |
b24_parse_args (int argc, | |
char **argv) | |
{ | |
struct b24_cfg *cfg = b24_cfg_create(argc > 1 ? argc - 1 : 0); | |
if (!cfg) | |
return NULL; | |
enum b24_opt expect = 0; | |
char c_ = '\0'; | |
for (int i = 1; i < argc; i++) { | |
char *a = argv[i]; | |
if (expect) { | |
if (!*a) | |
goto missing_arg; | |
parse_arg: do{}while(0); | |
pragma_msvc(warning(push)) | |
pragma_msvc(warning(disable: 4061)) | |
uint64_t max = 15U; | |
switch (expect) { | |
case b24_opt_neuron: | |
max = UINT64_MAX; | |
break; | |
case b24_opt_debruijn: | |
case b24_opt_one_path: | |
case b24_opt_rotation: | |
break; | |
default: | |
b24_cosmic_ray(&cfg, "", __func__, __LINE__); | |
} | |
int e = 0; | |
uint64_t n = b24_get_int_arg(a, &e, max); | |
if (e) { | |
(void)fprintf(stderr, "Option %s " | |
"parse error: %s\n", | |
b24_opt_char_str(c_).d, | |
strerror(e)); | |
b24_helpful_exit(&cfg, EXIT_FAILURE); | |
} | |
switch (expect) { | |
case b24_opt_debruijn: | |
cfg->seq[cfg->n_seq++] = b24[n]; | |
break; | |
case b24_opt_neuron: | |
cfg->neuron = n; | |
break; | |
case b24_opt_one_path: | |
cfg->offset = (b24_cfg_offset)n; | |
break; | |
case b24_opt_rotation: | |
cfg->rotation = (uint16_t)n; | |
default: | |
break; | |
} | |
pragma_msvc(warning(pop)) | |
b24_option_add(cfg, expect); | |
expect = 0; | |
continue; | |
} | |
if (*a == '-') { | |
c_ = *++a; | |
concatenated_option: | |
unsigned ch_opt = b24_char_to_opt[(unsigned char)c_]; | |
if (!ch_opt) { | |
(void)fprintf(stderr, "Option %s is unknown" | |
"\n", b24_opt_char_str(c_).d); | |
b24_helpful_exit(&cfg, EXIT_FAILURE); | |
} | |
enum b24_opt opt = ch_opt & ~(NEEDS_ARG | ONLY_ONCE); | |
if (ch_opt & ONLY_ONCE) | |
b24_option_assert_once(cfg, opt); | |
if (ch_opt & NEEDS_ARG) { | |
expect = opt; | |
if (*++a) | |
goto parse_arg; | |
} else { | |
b24_option_add(cfg, opt); | |
if (c_ && (c_ = *++a)) | |
goto concatenated_option; | |
} | |
continue; | |
} | |
int e = 0; | |
uint64_t n = b24_get_int_arg(a, &e, UINT16_MAX); | |
if (e) { | |
(void)fprintf(stderr, "Bad sequence: %s\n", | |
strerror(e)); | |
b24_cfg_destroy(&cfg); | |
return NULL; | |
} | |
cfg->seq[cfg->n_seq++] = (uint16_t)n; | |
} | |
int yikes = b24_options_incompatible(cfg, b24_opt_big_hex , | |
1U << b24_opt_hex_path | | |
1U << b24_opt_sig_path , ',') | |
+ b24_options_incompatible(cfg, b24_opt_cycles , | |
1U << b24_opt_entries | | |
1U << b24_opt_neuron | | |
1U << b24_opt_expand , ',') | |
+ b24_options_incompatible(cfg, b24_opt_entries , | |
1U << b24_opt_cycles | | |
1U << b24_opt_neuron | | |
1U << b24_opt_expand , ',') | |
+ b24_options_incompatible(cfg, b24_opt_graphviz , | |
1U << b24_opt_json | | |
1U << b24_opt_python , ',') | |
+ b24_options_incompatible(cfg, b24_opt_json , | |
1U << b24_opt_python | | |
1U << b24_opt_graphviz , ',') | |
+ b24_options_incompatible(cfg, b24_opt_neuron , | |
1U << b24_opt_one_path | | |
1U << b24_opt_expand , ',') | |
+ b24_options_incompatible(cfg, b24_opt_one_path , | |
1U << b24_opt_expand , ',') | |
+ b24_options_incompatible(cfg, b24_opt_python , | |
1U << b24_opt_json | | |
1U << b24_opt_graphviz , ',') | |
+ b24_options_incompatible(cfg, b24_opt_expand , | |
1U << b24_opt_cycles | | |
1U << b24_opt_entries | | |
1U << b24_opt_one_path , ',') | |
+ b24_options_incompatible(cfg, b24_opt_sig_path , | |
1U << b24_opt_hex_path , ','); | |
if (yikes) | |
b24_helpful_exit(&cfg, EXIT_FAILURE); | |
if (expect) | |
goto missing_arg; | |
if (b24_has_option(cfg, b24_opt_help)) | |
b24_helpful_exit(&cfg, EXIT_SUCCESS); | |
if (!cfg->n_seq) { | |
(void)fputs("No sequence(s) specified\n", stderr); | |
b24_helpful_exit(&cfg, EXIT_FAILURE); | |
} | |
return cfg; | |
missing_arg: | |
(void)fprintf(stderr, "Option %s expects an argument\n", | |
b24_opt_char_str(c_).d); | |
b24_helpful_exit(&cfg, EXIT_FAILURE); | |
} | |
static char * | |
b24_render_gv_struct (char *dst, | |
size_t siz, | |
uint16_t seq, | |
unsigned _BitInt(4) id) | |
{ | |
if (siz < | |
sizeof "S0 [label=\"<f>0|<e>0|<d>0|<c>0|" | |
"<b>0|<a>0|<9>0|<8>0|" | |
"<7>0|<6>0|<5>0|<4>0|" | |
"<3>0|<2>0|<1>0|<0>0\"];\n") | |
return NULL; | |
memcpy(dst, "S0 [label=\"", sizeof "S0 [label=\"" - 1U); | |
*++dst = hexdig(id); | |
dst += sizeof " [label=\""; | |
for (unsigned off = 15U;; --off) { | |
*dst++ = '<'; | |
*dst++ = hexdig(off); | |
*dst++ = '>'; | |
*dst++ = (char)((unsigned char)'0' + (seq >> off & 1U)); | |
if (!off) | |
break; | |
*dst++ = '|'; | |
} | |
*dst++ = '"'; | |
*dst++ = ']'; | |
*dst++ = ';'; | |
*dst++ = '\n'; | |
return dst; | |
} | |
int | |
main (int c, | |
char **v) | |
{ | |
struct b24_cfg *cfg = b24_parse_args(c, v); | |
if (!cfg) | |
return EXIT_FAILURE; | |
b24_cfg_trace(cfg); | |
b24_cfg_destroy(&cfg); | |
return EXIT_SUCCESS; | |
} | |
pragma_msvc(warning(pop)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <sys/random.h> | |
#include "cortex.h" | |
struct cortex * | |
cortex_create (bool randomize) | |
{ | |
vec128 vec[37] = {0}; | |
vec128 *state = NULL; | |
struct cortex *ctx = malloc(sizeof *ctx); | |
if (!ctx) { | |
perror("malloc"); | |
return NULL; | |
} | |
if (randomize) { | |
ssize_t e = getrandom(&vec[0], sizeof vec, GRND_NONBLOCK); | |
if (e == (ssize_t)sizeof vec) | |
state = &vec[0]; | |
else if (e < 0) | |
perror("getrandom"); | |
} | |
*ctx = cortex(state, state ? array_size(vec) : 0); | |
return ctx; | |
} | |
void | |
cortex_destroy (struct cortex **ctx) | |
{ | |
if (ctx) { | |
struct cortex *c = *ctx; | |
*ctx = NULL; | |
ctx = NULL; | |
free(c); | |
c = NULL; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** @file cortex.h | |
*/ | |
#ifndef B24_CORTEX_H_ | |
#define B24_CORTEX_H_ | |
#include "neuron.h" | |
/** | |
* @brief Cortex structure. | |
* | |
* This is the structure that holds the RNA for all the neurons in the | |
* network. | |
* | |
* More like a resonant bucket than a brain. | |
*/ | |
struct cortex { | |
struct rna rna[256]; | |
}; | |
extern struct cortex * | |
cortex_create (bool randomize); | |
extern void | |
cortex_destroy (struct cortex **ctx); | |
static pure_inline uint8_t | |
cortex_read_channel (struct cortex *ctx, | |
uint16_t chan, | |
bool tock) | |
{ | |
return ctx->rna[chan >> 4U].tt[tock].u8[chan & 15U]; | |
} | |
static pure_inline struct cortex | |
cortex (vec128 const *ini, | |
size_t len) | |
{ | |
if (!len) | |
ini = NULL; | |
const uint16_t de_bruijn_seq_2_4[] = { | |
0x9afU, 0x9ebU, 0xa6fU, 0xa7bU, | |
0xb3dU, 0xb4fU, 0xbcdU, 0xbd3U, | |
0xcbdU, 0xd2fU, 0xd79U, 0xde5U, | |
0xf2dU, 0xf4bU, 0xf59U, 0xf65U, | |
}; | |
struct cortex ctx = {0}; | |
for (uint32_t i = 0, y = 0; y < 16U; ++y) { | |
uint32_t seq = de_bruijn_seq_2_4[y]; | |
seq |= seq << 16U; | |
for (uint32_t x = i + 16U; i < x; ++i) { | |
ctx.rna[i] = rna( | |
(uint16_t)seq, | |
ini ? ini[i % len] : (vec128){0} | |
); | |
seq >>= 1U; | |
} | |
} | |
return ctx; | |
} | |
static force_inline void | |
cortex_write_rna (struct cortex *ctx, | |
vec128 data, | |
uint16_t dest, | |
bool tock) | |
{ | |
ctx->rna[dest].tt[tock] = data; | |
} | |
#define M(T, W, odd) (((uint##T##_t)-1) / 3U << !!(odd) >> ((8U << ((W > 32U) + 2U)) - W)) | |
#define fwd(T, W, v) (_pdep_u##T((v), M(T, W, 0)) | _pdep_u##T((v) >> W / 2U, M(T, W, 1))) | |
#define rev(T, W, v) (_pext_u##T((v), M(T, W, 0)) | _pext_u##T((v), M(T, W, 1)) << W / 2U) | |
static force_inline void | |
cortex_tick (struct cortex *ctx, | |
bool tock) | |
{ | |
for (uint16_t i = 0; i < (uint16_t)array_size(ctx->rna); ++i) { | |
// input connections | |
vec128 in = {0}; | |
uint16_t dst = i << 4U; | |
for (uint32_t j = 0; j < array_size(in.u8); ++j, ++dst) { | |
uint32_t src = fwd(32, 12, dst); | |
in.u8[j] = cortex_read_channel(ctx, src, tock); | |
} | |
struct neuron nrn = neuron_from_rna(ctx->rna[i], tock); | |
cortex_write_rna(ctx, neuron_tock(&nrn, in), i, !tock); | |
} | |
} | |
#endif /* B24_CORTEX_H_ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** @file neuron.h | |
*/ | |
#ifndef B24_NEURON_H_ | |
#define B24_NEURON_H_ | |
#include <limits.h> | |
#include "seq.h" | |
#include "vec.h" | |
/** | |
* @brief Runtime neuron structure. | |
* | |
* This is what is actually used with the SIMD instructions. | |
*/ | |
struct neuron { | |
u8x16 paths; | |
u8x16 state; | |
}; | |
/** | |
* @brief Neuron state storage unit. | |
* | |
* This is the storage unit for the neuron state. Instead of one state | |
* vector it has two; the network uses a "tick-tock" execution pattern | |
* to prevent clobbering the state before it is read. | |
*/ | |
struct rna { | |
vec128 paths; | |
vec128 tt[2]; | |
}; | |
static pure_inline struct neuron | |
neuron_from_vec128 (vec128 paths, | |
vec128 state) | |
{ | |
return (struct neuron){ | |
.paths = u8x16_from_vec128(paths), | |
.state = u8x16_from_vec128(state) | |
}; | |
} | |
static pure_inline struct rna | |
rna_from_neuron (struct neuron *nrn) | |
{ | |
const vec128 state = vec128_from_u8x16(nrn->state); | |
return (struct rna){ | |
.paths = vec128_from_u8x16(nrn->paths), | |
.tt = {state, state} | |
}; | |
} | |
static pure_inline struct neuron | |
neuron_from_rna (struct rna rna, | |
bool tock) | |
{ | |
return neuron_from_vec128(rna.paths, rna.tt[tock]); | |
} | |
static pure_inline struct neuron | |
neuron (uint16_t seq, | |
vec128 state) | |
{ | |
return (struct neuron){ | |
.paths = seq_expand_u8x16(seq), | |
.state = u8x16_from_vec128(state) | |
}; | |
} | |
static pure_inline struct rna | |
rna (uint16_t seq, | |
vec128 state) | |
{ | |
return (struct rna){ | |
.paths = seq_expand_vec128(seq), | |
.tt = {state, state} | |
}; | |
} | |
static force_inline u8x16 | |
neuron_tick (struct neuron *nrn, | |
u8x16 in) | |
{ | |
u8x16 state = nrn->state; | |
nrn->state = age_and_mutate( | |
merge_input( | |
state, | |
in | |
), | |
nrn->paths | |
); | |
return state; | |
} | |
static force_inline vec128 | |
neuron_tock (struct neuron *nrn, | |
vec128 in) | |
{ | |
nrn->state = age_and_mutate( | |
merge_input( | |
nrn->state, | |
u8x16_from_vec128(in) | |
), | |
nrn->paths | |
); | |
return vec128_from_u8x16(nrn->state); | |
} | |
#endif /* B24_NEURON_H_ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** @file popcnt.h | |
*/ | |
#ifndef B24_POPCNT_H_ | |
#define B24_POPCNT_H_ | |
#include <limits.h> | |
#include <stdint.h> | |
#include "util.h" | |
#undef define_popcnt | |
#undef popcnt16_impl | |
#undef popcnt32_impl | |
#undef popcnt64_impl | |
#undef popcnt_compat | |
#ifdef _MSC_VER | |
# define popcnt16_impl return __popcnt16 | |
# pragma intrinsic(__popcnt16) | |
# define popcnt32_impl return __popcnt | |
# pragma intrinsic(__popcnt) | |
# ifdef _M_IX86 | |
# undef popcnt32x2 | |
# define popcnt64_impl return popcnt32x2 | |
# define popcnt32x2(x) \ | |
(__popcnt((uint32_t)(x >> 32U)) + \ | |
__popcnt((uint32_t)(x & ~(uint32_t)0))) | |
# else | |
# define popcnt64_impl return __popcnt64 | |
# pragma intrinsic(__popcnt64) | |
# endif | |
# define popcnt_pre_pragma _Pragma("warning(push)") \ | |
_Pragma("warning(disable: 4116)") | |
# define popcnt_post_pragma _Pragma("warning(pop)") | |
#else | |
# if __has_builtin(__builtin_popcount) && (UINT_MAX == UINT32_MAX) | |
# define popcnt16_impl return (uint16_t)__builtin_popcount | |
# define popcnt32_impl return (uint32_t)__builtin_popcount | |
# endif | |
# if !defined(popcnt32_impl) && \ | |
__has_builtin(__builtin_popcountl) && (ULONG_MAX == UINT32_MAX) | |
# define popcnt16_impl return (uint16_t)__builtin_popcountl | |
# define popcnt32_impl return (uint32_t)__builtin_popcountl | |
# endif | |
# if __has_builtin(__builtin_popcountl) && (ULONG_MAX == UINT64_MAX) | |
# define popcnt64_impl return (uint64_t)__builtin_popcountl | |
# endif | |
# if !defined(popcnt64_impl) && \ | |
__has_builtin(__builtin_popcountll) && (ULLONG_MAX == UINT64_MAX) | |
# define popcnt64_impl return (uint64_t)__builtin_popcountll | |
# endif | |
# if __has_builtin(__builtin_popcountg) | |
# if !defined(popcnt16_impl) | |
# define popcnt16_impl return (uint16_t)__builtin_popcountg | |
# endif | |
# if !defined(popcnt32_impl) | |
# define popcnt32_impl return (uint32_t)__builtin_popcountg | |
# endif | |
# if !defined(popcnt64_impl) | |
# define popcnt64_impl return (uint64_t)__builtin_popcountg | |
# endif | |
# endif | |
#endif | |
#define define_popcnt(b, ...) const_inline \ | |
static uint##b##_t popcnt##b(uint##b##_t val) \ | |
{ __VA_ARGS__(val); } _Static_assert(b>0, #b) | |
#define popcnt_compat(x) typeof(_Generic( \ | |
(char(*)[2 - (sizeof(x) > 4U)])0 \ | |
,char(*)[1]: (uint64_t)0 \ | |
,char(*)[2]: (uint32_t)0)) y = (x); \ | |
y -= (typeof(y))-1/3 & (y >> 1U); \ | |
y = ((typeof(y))-1/5 & (y >> 2U)) \ | |
+ ((typeof(y))-1/5 & y); \ | |
return ((typeof(y))-1/ 17 & ((y >> 4U) + y)) \ | |
* ((typeof(y))-1/255) >> (sizeof y - 1U) \ | |
* CHAR_BIT | |
#ifndef popcnt16_impl | |
# define popcnt16_impl popcnt_compat | |
#endif | |
#ifndef popcnt32_impl | |
# define popcnt32_impl popcnt_compat | |
#endif | |
#ifndef popcnt64_impl | |
# define popcnt64_impl popcnt_compat | |
#endif | |
#ifndef popcnt_pre_pragma | |
# define popcnt_pre_pragma | |
#endif | |
#ifndef popcnt_post_pragma | |
# define popcnt_post_pragma | |
#endif | |
define_popcnt(16, popcnt16_impl); | |
define_popcnt(32, popcnt32_impl); | |
define_popcnt(64, popcnt64_impl); | |
#undef define_popcnt | |
#undef popcnt16_impl | |
#undef popcnt32_impl | |
#undef popcnt64_impl | |
#undef popcnt_compat | |
#if defined(_MSC_VER) && defined(_M_IX86) | |
# undef popcnt32x2 | |
#endif | |
#define popcnt(x) popcnt_pre_pragma _Generic((x), \ | |
typeof(_Generic((char)0, \ | |
signed char: (struct{int i;}){0}, \ | |
unsigned char: (struct{int i;}){0}, \ | |
default: (char)0)): popcnt16, \ | |
signed char: popcnt16, short: popcnt16, \ | |
unsigned char: popcnt16, int: popcnt32, \ | |
unsigned short: popcnt16, unsigned: popcnt32, \ | |
long long: popcnt64, unsigned long: _Generic( \ | |
&(int[sizeof 1UL]){0}, \ | |
int(*)[sizeof(uint32_t)]: popcnt32, \ | |
int(*)[sizeof(uint64_t)]: popcnt64), \ | |
unsigned long long: popcnt64, long: _Generic( \ | |
&(int[sizeof 1L]){0}, \ | |
int(*)[sizeof(int32_t)]: popcnt32, \ | |
int(*)[sizeof(int64_t)]: popcnt64)) \ | |
(_Generic((x), default: (x), \ | |
long long: (unsigned long long)(x), \ | |
signed char: (unsigned char)(x), \ | |
short: (unsigned short)(x), \ | |
typeof(_Generic((char)0, \ | |
signed char: \ | |
(struct{int i;}){0}, \ | |
unsigned char: \ | |
(struct{int i;}){0}, \ | |
default: (char)0)): \ | |
(unsigned char)(x), \ | |
long: (unsigned long)(x), \ | |
int: (unsigned)(x))) popcnt_post_pragma | |
#endif /* B24_POPCNT_H_ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** @file seq.h | |
*/ | |
#ifndef B24_SEQ_H_ | |
#define B24_SEQ_H_ | |
#include "vec.h" | |
#ifdef __aarch64__ | |
/** | |
* @brief Age and mutate the state. | |
* | |
* Decrement each non-zero strength by one and apply the given mutation. | |
* | |
* @param state The current state. | |
* @param paths The mutation descriptor. | |
* @return The resulting state. | |
*/ | |
static const_inline u8x16 | |
age_and_mutate (u8x16 state, | |
u8x16 paths) | |
{ | |
return vqtbl1q_u8( | |
vsubq_u8( | |
state, | |
vbslq_u8( | |
vtstq_u8( | |
state, | |
vdupq_n_u8(0xf0) | |
), | |
vdupq_n_u8(0x10), | |
state | |
) | |
), | |
paths | |
); | |
} | |
/** | |
* @brief Merge input into the current state, preferring the input. | |
* | |
* For each channel, if the input channel has a non-zero strength, | |
* use that channel in the output, otherwise use the corresponding | |
* channel from the current state. | |
* | |
* @param state The current state. | |
* @param input The input to merge into the state. | |
* @return The merged state. | |
*/ | |
static const_inline u8x16 | |
merge_input (u8x16 state, | |
u8x16 input) | |
{ | |
return vbslq_u8( | |
vtstq_u8( | |
input, | |
vdupq_n_u8(0xf0) | |
), | |
input, | |
state | |
); | |
} | |
static force_inline uint64_t | |
seq_expand (uint16_t seq) | |
{ | |
uint16_t rot = seq << 12U | seq >> 4U; | |
uint64_t ret = 0; | |
uint64x2_t x = vreinterpretq_u64_u8( | |
vqtbl1q_u8( | |
vreinterpretq_u8_u16( | |
vld1q_lane_u16( | |
&rot, | |
vld1q_lane_u16( | |
&seq, | |
vdupq_n_u16(0), | |
0 | |
), | |
1 | |
) | |
), | |
vld1q_u8(((uint8_t[16]){ | |
0x00, 0x00, 0x00, 0x00, | |
0x02, 0x02, 0x02, 0x02, | |
0x01, 0x01, 0x01, 0x01, | |
0x03, 0x03, 0x03, 0x03}) | |
) | |
) | |
); | |
x = vorrq_u64( | |
vandq_u64( | |
x, | |
vdupq_n_u64(UINT64_C(0x003c000f003c000f)) | |
), | |
vshrq_n_u64( | |
vandq_u64( | |
x, | |
vdupq_n_u64(UINT64_C(0x78001e0078001e00)) | |
), | |
5 | |
) | |
); | |
x = vorrq_u64( | |
vandq_u64( | |
x, | |
vdupq_n_u64(UINT64_C(0x000000ff000000ff)) | |
), | |
vshrq_n_u64( | |
vandq_u64( | |
x, | |
vdupq_n_u64(UINT64_C(0x03fc000003fc0000)) | |
), | |
10 | |
) | |
); | |
vst1q_lane_u64( | |
&ret, | |
vreinterpretq_u64_u8( | |
vqtbl1q_u8( | |
vreinterpretq_u8_u64(x), | |
vld1q_u8(((uint8_t[16]){ | |
0x00, 0x01, 0x04, 0x05, | |
0x08, 0x09, 0x0c, 0x0d, | |
0x80, 0x80, 0x80, 0x80, | |
0x80, 0x80, 0x80, 0x80}) | |
) | |
) | |
), | |
0 | |
); | |
return ret; | |
} | |
static force_inline u8x16 | |
seq_expand_u8x16 (uint16_t seq) | |
{ | |
uint64x2_t x = vreinterpretq_u64_u8( | |
vqtbl1q_u8( | |
vreinterpretq_u8_u16( | |
vld1q_lane_u16( | |
(uint16_t[1]){ | |
seq << 12U | | |
seq >> 4U | |
}, | |
vld1q_lane_u16( | |
&seq, | |
vdupq_n_u16(0), | |
0 | |
), | |
1 | |
) | |
), | |
vld1q_u8(((uint8_t[16]){ | |
0x00, 0x00, 0x00, 0x00, | |
0x02, 0x02, 0x02, 0x02, | |
0x01, 0x01, 0x01, 0x01, | |
0x03, 0x03, 0x03, 0x03}) | |
) | |
) | |
); | |
x = vorrq_u64( | |
vandq_u64( | |
x, | |
vdupq_n_u64(UINT64_C(0x003c000f003c000f)) | |
), | |
vshrq_n_u64( | |
vandq_u64( | |
x, | |
vdupq_n_u64(UINT64_C(0x78001e0078001e00)) | |
), | |
5 | |
) | |
); | |
x = vorrq_u64( | |
vandq_u64( | |
x, | |
vdupq_n_u64(UINT64_C(0x000000ff000000ff)) | |
), | |
vshrq_n_u64( | |
vandq_u64( | |
x, | |
vdupq_n_u64(UINT64_C(0x03fc000003fc0000)) | |
), | |
10 | |
) | |
); | |
x = vreinterpretq_u64_u8( | |
vqtbl1q_u8( | |
vreinterpretq_u8_u64(x), | |
vld1q_u8(((uint8_t[16]){ | |
0x00, 0x80, 0x01, 0x80, | |
0x04, 0x80, 0x05, 0x80, | |
0x08, 0x80, 0x09, 0x80, | |
0x0c, 0x80, 0x0d, 0x80}) | |
) | |
) | |
); | |
uint64x2_t y = vandq_u64( | |
x, | |
vdupq_n_u64(UINT64_C(0x00f000f000f000f0)) | |
); | |
return vreinterpretq_u8_u64( | |
vorrq_u64( | |
veorq_u64(x, y), | |
vshlq_n_u64(y, 4) | |
) | |
); | |
} | |
static force_inline vec128 | |
seq_expand_vec128 (uint16_t seq) | |
{ | |
return vec128_from_u8x16(seq_expand_u8x16(seq)); | |
} | |
#endif /* __aarch64__ */ | |
#if defined(__x86_64__) || defined(_MSC_VER) | |
# define pext_mask 0x783c1e0f783c1e0fULL | |
# define pdep_mask 0x0f0f0f0f0f0f0f0fULL | |
static force_inline uint64_t | |
seq_expand (uint16_t seq) | |
{ | |
u8x16 k = _mm_shuffle_epi8( | |
_mm_set_epi64x( | |
0, (uint32_t)(seq << 12U | seq >> 4U) << 16U | seq | |
), | |
_mm_set_epi64x(0x0303030301010101LL, 0x0202020200000000LL) | |
); | |
return _pext_u64((uint64_t)_mm_extract_epi64(k, 1), pext_mask) << 32U | |
| _pext_u64((uint64_t)_mm_extract_epi64(k, 0), pext_mask); | |
} | |
static force_inline vec128 | |
seq_expand_vec128 (uint16_t seq) | |
{ | |
u8x16 k = _mm_shuffle_epi8( | |
_mm_set_epi64x( | |
0, (uint32_t)(seq << 12U | seq >> 4U) << 16U | seq | |
), | |
_mm_set_epi64x(0x0303030301010101LL, 0x0202020200000000LL) | |
); | |
return (vec128){ | |
.u64[0] = _pdep_u64( | |
_pext_u64( | |
(unsigned long long)_mm_extract_epi64(k, 0), | |
pext_mask | |
), | |
pdep_mask | |
), | |
.u64[1] = _pdep_u64( | |
_pext_u64( | |
(unsigned long long)_mm_extract_epi64(k, 1), | |
pext_mask | |
), | |
pdep_mask | |
) | |
}; | |
} | |
static force_inline u8x16 | |
seq_expand_u8x16 (uint16_t seq) | |
{ | |
return u8x16_from_vec128(seq_expand_vec128(seq)); | |
} | |
# undef pdep_mask | |
# undef pext_mask | |
static const_inline u8x16 | |
age_and_mutate (u8x16 state, | |
u8x16 paths) | |
{ | |
return _mm_shuffle_epi8( | |
_mm_subs_epu8( | |
state, | |
_mm_set_epi64x( | |
0x1010101010101010LL, | |
0x1010101010101010LL | |
) | |
), | |
paths | |
); | |
} | |
static const_inline u8x16 | |
merge_input (u8x16 state, | |
u8x16 input) | |
{ | |
return _mm_blendv_epi8( | |
state, | |
input, | |
/* Mask construction: adding 0x70 (saturated) | |
* sets the top bit iff the value is at least | |
* 0x10, and the result can be used as a mask | |
* in `_mm_blendv_epi8()`. | |
*/ | |
_mm_adds_epu8( | |
input, | |
_mm_set_epi64x( | |
0x7070707070707070LL, | |
0x7070707070707070LL | |
) | |
) | |
); | |
} | |
#endif /* __x86_64__ || _MSC_VER */ | |
#endif /* B24_SEQ_H_ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** @file util.h | |
*/ | |
#ifndef B24_UTIL_H_ | |
#define B24_UTIL_H_ | |
#ifdef _MSC_VER | |
# include <intrin.h> | |
#elif defined(__aarch64__) | |
# include <arm_neon.h> | |
#elif defined(__x86_64__) | |
# include <immintrin.h> | |
#endif | |
#ifdef __aarch64__ | |
typedef uint8x16_t u8x16; | |
#else | |
typedef __m128i u8x16; | |
#endif | |
#ifndef __has_builtin | |
# define __has_builtin(...) 0 | |
#endif | |
#ifdef _MSC_VER | |
# define __attribute__(...) | |
# define force_inline __forceinline | |
# define pragma_msvc(...) _Pragma(#__VA_ARGS__) | |
# define typeof __typeof__ | |
#else | |
# define force_inline __attribute__((always_inline)) inline | |
# define pragma_msvc(...) | |
#endif | |
#define array_size(a) (sizeof (a) / sizeof (a)[0]) | |
#define const_function __attribute__((const)) | |
#define const_inline const_function force_inline | |
#define pure_inline __attribute__((pure)) force_inline | |
#endif /* B24_UTIL_H_ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** @file vec.h | |
*/ | |
#ifndef B24_VEC_H_ | |
#define B24_VEC_H_ | |
#include <stdint.h> | |
#include "util.h" | |
typedef struct vec4x2 { | |
uint8_t x : 4; uint8_t y : 4; | |
} vec4x2; | |
typedef struct vec4x4 { | |
uint16_t x0 : 4; uint16_t y0 : 4; | |
uint16_t x1 : 4; uint16_t y1 : 4; | |
} vec4x4; | |
typedef struct vec4x8 { | |
uint32_t x0 : 4; uint32_t y0 : 4; | |
uint32_t x1 : 4; uint32_t y1 : 4; | |
uint32_t x2 : 4; uint32_t y2 : 4; | |
uint32_t x3 : 4; uint32_t y3 : 4; | |
} vec4x8; | |
typedef struct vec4x16 { | |
uint64_t x0 : 4; uint64_t y0 : 4; | |
uint64_t x1 : 4; uint64_t y1 : 4; | |
uint64_t x2 : 4; uint64_t y2 : 4; | |
uint64_t x3 : 4; uint64_t y3 : 4; | |
uint64_t x4 : 4; uint64_t y4 : 4; | |
uint64_t x5 : 4; uint64_t y5 : 4; | |
uint64_t x6 : 4; uint64_t y6 : 4; | |
uint64_t x7 : 4; uint64_t y7 : 4; | |
} vec4x16; | |
typedef struct vec128 { | |
union { | |
uint8_t u8[ 16]; | |
vec4x2 u4x2[16]; | |
uint16_t u16[ 8]; | |
vec4x4 u4x4[ 8]; | |
uint32_t u32[ 4]; | |
vec4x8 u4x8[ 4]; | |
uint64_t u64[ 2]; | |
vec4x16 u4x16[2]; | |
}; | |
} vec128; | |
_Static_assert(sizeof(vec4x2 ) == 1U,""); | |
_Static_assert(sizeof(vec4x4 ) == 2U,""); | |
_Static_assert(sizeof(vec4x8 ) == 4U,""); | |
_Static_assert(sizeof(vec4x16) == 8U,""); | |
_Static_assert(sizeof(vec128 ) == 16U,""); | |
#ifdef __aarch64__ | |
static pure_inline u8x16 | |
u8x16_from_vec128 (vec128 v) | |
{ | |
return vld1q_u8(&v.u8[0]); | |
} | |
static pure_inline vec128 | |
vec128_from_u8x16 (u8x16 v) | |
{ | |
vec128 ret; | |
vst1q_u8(&ret.u8[0], v); | |
return ret; | |
} | |
#else | |
static pure_inline u8x16 | |
u8x16_from_vec128 (vec128 v) | |
{ | |
return _mm_set_epi64x( | |
(long long)v.u64[1], | |
(long long)v.u64[0] | |
); | |
} | |
static pure_inline vec128 | |
vec128_from_u8x16 (u8x16 v) | |
{ | |
return (vec128){ | |
.u64[0] = (uint64_t)_mm_extract_epi64(v, 0), | |
.u64[1] = (uint64_t)_mm_extract_epi64(v, 1), | |
}; | |
} | |
#endif | |
#endif /* B24_VEC_H_ */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment