Skip to content

Instantly share code, notes, and snippets.

@imaami
Last active September 15, 2024 11:25
Show Gist options
  • Save imaami/a4bf6ad52fa51360432250f45b53299c to your computer and use it in GitHub Desktop.
Save imaami/a4bf6ad52fa51360432250f45b53299c to your computer and use it in GitHub Desktop.
Expand subsequences
#include <errno.h>
#include <inttypes.h>
#include <limits.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef __aarch64__
#include <arm_neon.h>
#endif // __aarch64__
#ifdef __x86_64__
#include <immintrin.h>
#endif // __x86_64__
static uint16_t
parse_uint16 (char *arg,
int *err);
static uint64_t
b24_expand (uint16_t seq);
int
main (int argc,
char **argv)
{
for (int i = 1; i < argc; ++i) {
int e = 0;
uint16_t seq = parse_uint16(argc > 1 ? argv[i] : NULL, &e);
if (e) {
fprintf(stderr, "%s\n", strerror(e));
continue;
}
printf("0x%016" PRIx64 "\n", b24_expand(seq));
}
return EXIT_SUCCESS;
}
#ifdef __aarch64__
static uint64_t
b24_expand (uint16_t seq)
{
uint16_t rot = seq << 12U | seq >> 4U;
uint64_t ret = 0;
uint64x2_t x = vreinterpretq_u64_u8(
vqtbl1q_u8(
vreinterpretq_u8_u16(
vld1q_lane_u16(
&rot,
vld1q_lane_u16(
&seq,
vdupq_n_u16(0),
0
),
1
)
),
vld1q_u8(((uint8_t[16]){
0x00, 0x00, 0x00, 0x00,
0x02, 0x02, 0x02, 0x02,
0x01, 0x01, 0x01, 0x01,
0x03, 0x03, 0x03, 0x03})
)
)
);
x = vorrq_u64(
vandq_u64(
x,
vdupq_n_u64(UINT64_C(0x003c000f003c000f))
),
vshrq_n_u64(
vandq_u64(
x,
vdupq_n_u64(UINT64_C(0x78001e0078001e00))
),
5
)
);
x = vorrq_u64(
vandq_u64(
x,
vdupq_n_u64(UINT64_C(0x000000ff000000ff))
),
vshrq_n_u64(
vandq_u64(
x,
vdupq_n_u64(UINT64_C(0x03fc000003fc0000))
),
10
)
);
vst1q_lane_u64(
&ret,
vreinterpretq_u64_u8(
vqtbl1q_u8(
vreinterpretq_u8_u64(x),
vld1q_u8(((uint8_t[16]){
0x00, 0x01, 0x04, 0x05,
0x08, 0x09, 0x0c, 0x0d,
0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80})
)
)
),
0
);
return ret;
}
#endif // __aarch64__
#ifdef __x86_64__
static uint64_t
b24_expand (uint16_t seq)
{
#define seq_prep(x) 0, (uint32_t)((x) << 12U | \
(x) >> 4U) << 16U | (x)
__m128i k = _mm_shuffle_epi8(
_mm_set_epi64x(seq_prep(seq)),
_mm_set_epi64x(UINT64_C(0x0303030301010101), \
UINT64_C(0x0202020200000000)));
#undef seq_prep
#define pext_mask UINT64_C(0x783c1e0f783c1e0f)
return _pext_u64((uint64_t)_mm_extract_epi64(k, 1), pext_mask) << 32U
| _pext_u64((uint64_t)_mm_extract_epi64(k, 0), pext_mask);
#undef pext_mask
}
#endif // __x86_64__
static uint16_t
parse_uint16 (char *arg,
int *err)
{
int e = !arg ? EFAULT : !*arg ? ENODATA : 0;
uint64_t u64 = 0;
if (!e) {
char *p = arg;
errno = 0;
u64 = _Generic(
u64, unsigned long: strtoul,
unsigned long long: strtoull)(arg, &p, 0);
e = errno;
if (!e && *p)
e = EINVAL;
if (!e && u64 > UINT16_MAX)
e = ERANGE;
}
*err = e;
return (uint16_t)(u64 & UINT64_C(0xffff));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment