Created
May 15, 2023 20:34
-
-
Save amirshukayev/9f603359e6c92730572ce8bebf220b7f to your computer and use it in GitHub Desktop.
Count uint8
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <unistd.h> | |
#include <sys/mman.h> | |
#include <cstdint> | |
#include <immintrin.h> | |
using namespace std; | |
int main() { | |
off_t fsize = lseek(0, 0, SEEK_END); | |
char* buffer = (char*)mmap(0, fsize, PROT_READ, MAP_PRIVATE | MAP_POPULATE, 0, 0); | |
__m256i v1 = _mm256_set1_epi8(127); // set a 256-bit value with all bytes being 127 | |
uint32_t count = 0; | |
int i; | |
for (i = 0; i + 128 <= fsize; i += 128) { | |
__m256i v2_1 = _mm256_load_si256((__m256i*)&buffer[i]); // load 32 bytes from buffer | |
__m256i v2_2 = _mm256_load_si256((__m256i*)&buffer[i+32]); // load the next 32 bytes from buffer | |
__m256i v2_3 = _mm256_load_si256((__m256i*)&buffer[i+64]); // load the next 32 bytes from buffer | |
__m256i v2_4 = _mm256_load_si256((__m256i*)&buffer[i+96]); // load the next 32 bytes from buffer | |
__m256i v3_1 = _mm256_cmpeq_epi8(v1, v2_1); | |
__m256i v3_2 = _mm256_cmpeq_epi8(v1, v2_2); | |
__m256i v3_3 = _mm256_cmpeq_epi8(v1, v2_3); | |
__m256i v3_4 = _mm256_cmpeq_epi8(v1, v2_4); | |
uint32_t mask1 = _mm256_movemask_epi8(v3_1); | |
uint32_t mask2 = _mm256_movemask_epi8(v3_2); | |
uint32_t mask3 = _mm256_movemask_epi8(v3_3); | |
uint32_t mask4 = _mm256_movemask_epi8(v3_4); | |
count += _mm_popcnt_u32(mask1) + _mm_popcnt_u32(mask2) + _mm_popcnt_u32(mask3) + _mm_popcnt_u32(mask4); | |
} | |
// Process the remaining bytes | |
for (; i < fsize; i++) { | |
if (buffer[i] == 127) count++; | |
} | |
cout << count << endl; | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <unistd.h> | |
#include <sys/mman.h> | |
#include <cstdint> | |
#include <immintrin.h> | |
using namespace std; | |
int main() { | |
off_t fsize = lseek(0, 0, SEEK_END); | |
char* buffer = (char*)mmap(0, fsize, PROT_READ, MAP_PRIVATE | MAP_POPULATE, 0, 0); | |
__m256i v1 = _mm256_set1_epi8(127); // set a 256-bit value with all bytes being 127 | |
uint32_t count = 0; | |
int i; | |
for (i = 0; i + 256 <= fsize; i += 256) { | |
__m256i v2_1 = _mm256_load_si256((__m256i*)&buffer[i]); // load 32 bytes from buffer | |
__m256i v2_2 = _mm256_load_si256((__m256i*)&buffer[i+32]); // load the next 32 bytes from buffer | |
__m256i v2_3 = _mm256_load_si256((__m256i*)&buffer[i+64]); // load the next 32 bytes from buffer | |
__m256i v2_4 = _mm256_load_si256((__m256i*)&buffer[i+96]); // load the next 32 bytes from buffer | |
__m256i v2_5 = _mm256_load_si256((__m256i*)&buffer[i+128]); // load the next 32 bytes from buffer | |
__m256i v2_6 = _mm256_load_si256((__m256i*)&buffer[i+160]); // load the next 32 bytes from buffer | |
__m256i v2_7 = _mm256_load_si256((__m256i*)&buffer[i+192]); // load the next 32 bytes from buffer | |
__m256i v2_8 = _mm256_load_si256((__m256i*)&buffer[i+224]); // load the next 32 bytes from buffer | |
__m256i v3_1 = _mm256_cmpeq_epi8(v1, v2_1); | |
__m256i v3_2 = _mm256_cmpeq_epi8(v1, v2_2); | |
__m256i v3_3 = _mm256_cmpeq_epi8(v1, v2_3); | |
__m256i v3_4 = _mm256_cmpeq_epi8(v1, v2_4); | |
uint32_t mask1 = _mm256_movemask_epi8(v3_1); | |
uint32_t mask2 = _mm256_movemask_epi8(v3_2); | |
uint32_t mask3 = _mm256_movemask_epi8(v3_3); | |
uint32_t mask4 = _mm256_movemask_epi8(v3_4); | |
count += _mm_popcnt_u32(mask1) + _mm_popcnt_u32(mask2) + _mm_popcnt_u32(mask3) + _mm_popcnt_u32(mask4); | |
__m256i v3_5 = _mm256_cmpeq_epi8(v1, v2_5); | |
__m256i v3_6 = _mm256_cmpeq_epi8(v1, v2_6); | |
__m256i v3_7 = _mm256_cmpeq_epi8(v1, v2_7); | |
__m256i v3_8 = _mm256_cmpeq_epi8(v1, v2_8); | |
uint32_t mask5 = _mm256_movemask_epi8(v3_5); | |
uint32_t mask6 = _mm256_movemask_epi8(v3_6); | |
uint32_t mask7 = _mm256_movemask_epi8(v3_7); | |
uint32_t mask8 = _mm256_movemask_epi8(v3_8); | |
count += _mm_popcnt_u32(mask5) + _mm_popcnt_u32(mask6) + _mm_popcnt_u32(mask7) + _mm_popcnt_u32(mask8); | |
} | |
// Process the remaining bytes | |
for (; i < fsize; i++) { | |
if (buffer[i] == 127) count++; | |
} | |
cout << count << endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment