Last active
August 21, 2017 01:09
-
-
Save magurosan/be2ae5a435941b67863d26a2dbb8f94b to your computer and use it in GitHub Desktop.
C++/Xbyak strlen generator for AVX512BW
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <xbyak/xbyak.h> | |
#include <xbyak/xbyak_util.h> | |
#include <stdint.h> | |
class StrlenGenerator : Xbyak::CodeGenerator { | |
public: | |
// | |
// e.g. | |
// StrlenGenerator gen(sizeof(char), true) => strlen_s | |
// | |
StrlenGenerator(uint32_t n_scale) { | |
using namespace Xbyak; | |
// assume charsize | |
assert(n_scale == 1 || n_scale == 2 || n_scale == 4); | |
auto cmpeq_insn = [&](const Opmask& mask, const Zmm& z, const Operand& op) { | |
if (n_scale == 1) vpcmpeqb(mask, z, op); | |
else if (n_scale == 2) vpcmpeqw(mask, z, op); | |
else vpcmpeqd(mask, z, op); | |
}; | |
auto kortest_insn = [&](const Opmask& mask1, const Opmask& mask2) { | |
if (n_scale == 1) kortestq(mask1, mask2); | |
else if (n_scale == 2) kortestd(mask1, mask2); | |
else kortestw(mask1, mask2); | |
}; | |
auto kmov_mask_to_reg = [&](const Reg64& reg, const Opmask& mask) { | |
if (n_scale == 1) kmovq(reg, mask); | |
else if (n_scale == 2) kmovd(Reg32(reg.getIdx()), mask); | |
else kmovw(Reg32(reg.getIdx()), mask); | |
}; | |
auto kmov_reg_to_mask = [&](const Opmask& mask, const Reg64& reg) { | |
if (n_scale == 1) kmovq(mask, reg); | |
else if (n_scale == 2) kmovd(mask, Reg32(reg.getIdx())); | |
else kmovw(mask, Reg32(reg.getIdx())); | |
}; | |
auto tzcnt_opt = [&](const Reg64& reg1, const Reg64& reg2) { | |
if (n_scale == 1) tzcnt(reg1, reg2); | |
else tzcnt(Reg32(reg1.getIdx()), Reg32(reg2.getIdx())); | |
}; | |
#if defined(_WIN64) //assume Win64 x64 ABI | |
mov(rdx, rcx); | |
#else //assume x86-64 SysV ABI | |
mov(rdx, rdi); | |
mov(rcx, rdi); | |
#endif | |
vpxor(xmm0, xmm0, xmm0); // zmm0 all zero | |
// misalign offset | |
and(ecx, 0x3F); | |
if (n_scale != 1) shr(ecx, n_scale); | |
//gen mask | |
xor_(eax, eax); | |
if (n_scale == 1) { | |
neg(rax); | |
shl(rax, cl); | |
}else { | |
neg(eax); | |
shl(rax, cl); | |
} | |
kmov_reg_to_mask(k1, rax); | |
neg(rcx); | |
cmpeq_insn(k1|k1, zmm0, ptr[rdx + rcx*n_scale]); | |
kortest_insn(k1, k1); | |
jng("k1_end"); | |
add(rcx, 64 / n_scale); | |
lea(rax, ptr[rdx + rcx*n_scale]); | |
test(rax, 128 - 1); | |
jz("strlen_loop"); | |
cmpeq_insn(k1, zmm0, ptr[rdx + rcx*n_scale]); | |
kortest_insn(k1, k1); | |
jng("k1_end"); | |
add(rcx, 64 / n_scale); | |
jmp("strlen_loop"); | |
align(16); | |
L("strlen_loop"); { | |
cmpeq_insn(k1, zmm0, ptr[rdx + rcx*n_scale]); | |
cmpeq_insn(k2, zmm0, ptr[rdx + rcx*n_scale + 64]); | |
kortest_insn(k1, k2); | |
jnz("strlen_loop_end"); | |
sub(rcx, -128 / n_scale); | |
jmp("strlen_loop"); | |
} | |
L("strlen_loop_end"); | |
ktestq(k1, k1); | |
jz("k2_end"); | |
L("k1_end"); | |
kmov_mask_to_reg(rdx, k1); | |
tzcnt_opt(rdx, rdx); | |
lea(rax, ptr[rcx + rdx]); | |
ret(); | |
L("k2_end"); | |
kmov_mask_to_reg(rdx, k2); | |
tzcnt_opt(rdx, rdx); | |
lea(rax, ptr[rcx + rdx + 64/n_scale]); | |
ret(); | |
} | |
const uint8_t *getCode() const { | |
return Xbyak::CodeGenerator::getCode(); | |
} | |
size_t getSize() { | |
return Xbyak::CodeGenerator::getSize(); | |
} | |
}; | |
#if defined(_DEBUG) | |
#include <stdio.h> | |
int main() | |
{ | |
typedef size_t(*strlen_func)(char *); | |
typedef size_t(*wcslen_func)(wchar_t *); | |
StrlenGenerator strlenGen(sizeof(char)), wcslenGen(sizeof(wchar_t)); | |
auto my_strlen = reinterpret_cast<strlen_func>(strlenGen.getCode()); | |
auto my_wcslen = reinterpret_cast<wcslen_func>(wcslenGen.getCode()); | |
my_strlen(""); | |
my_wcslen(L""); | |
char* str = "abcde"; | |
printf("%s: %d, %d", str, strlen(str), my_strlen(str)); | |
wchar_t* wstr = L"5000兆円欲しい!"; | |
wprintf(L"%s: %d, %d", wstr, wcslen(wstr), my_wcslen(wstr)); | |
return 0; | |
} | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment