Skip to content

Instantly share code, notes, and snippets.

@youkidearitai
Created October 2, 2023 13:30
Show Gist options
  • Save youkidearitai/0018dee27353c00aebaff3bf57c5b8c6 to your computer and use it in GitHub Desktop.
Save youkidearitai/0018dee27353c00aebaff3bf57c5b8c6 to your computer and use it in GitHub Desktop.
trim function for multibyte support
#include <iostream>
#include <cstring>
#include <string_view>
void mb_trim(size_t *strlen, char **str, const size_t
trim_lengths_num, const size_t *trim_lengths, const char **trim_chars)
{
size_t local_strlen = *strlen;
char *local_str = *str;
for (;;)
{
for (size_t i = 0; i < trim_lengths_num; ++i)
{
if (local_strlen >= trim_lengths[i] && memcmp(local_str,
trim_chars[i], trim_lengths[i]) == 0)
{
local_strlen -= trim_lengths[i];
local_str += trim_lengths[i];
goto remove_from_start_continue_2;
}
}
break;
remove_from_start_continue_2:;
}
for (;;)
{
for (size_t i = 0; i < trim_lengths_num; ++i)
{
if (local_strlen >= trim_lengths[i] && memcmp(((local_str
+ local_strlen) - trim_lengths[i]), trim_chars[i], trim_lengths[i]) ==
0)
{
local_strlen -= trim_lengths[i];
goto remove_from_end_continue_2;
}
}
break;
remove_from_end_continue_2:;
}
memmove(*str, local_str, local_strlen);
char *newstr = (char *)realloc(*str, local_strlen);
if (newstr != nullptr)
{
*strlen = local_strlen;
*str = newstr;
}
else
{
// some error handling
}
}
int main()
{
const char *trim_chars[] = {
" ",
"!",
// utf8 whitespace:
"\xE2\x80\x80", // EN QUAD
"\xE2\x80\x81", // EM QUAD
"\xE2\x80\x82", // EN SPACE
"\xE2\x80\x83", // EM SPACE
"\xE2\x80\x84", // THREE-PER-EM SPACE
"\xE2\x80\x85", // FOUR-PER-EM SPACE
"\xE2\x80\x86", // SIX-PER-EM SPACE
};
size_t trim_lengths[] = {
strlen(trim_chars[0]),
strlen(trim_chars[1]),
strlen(trim_chars[2]),
strlen(trim_chars[3]),
strlen(trim_chars[4]),
strlen(trim_chars[5]),
strlen(trim_chars[6]),
strlen(trim_chars[7]),
strlen(trim_chars[8]),
};
size_t trim_lengths_num = sizeof(trim_lengths) / sizeof(trim_lengths[0]);
char *teststr = strdup(" ! \xE2\x80\x80\xE2\x80\x81\xE2\x80\x82Hello World ! \xE2\x80\x83\xE2\x80\x84\xE2\x80\x85\xE2\x80\x86 !");
// char *teststr = strdup(" ! Hello World ! ! ");
size_t teststrlen = strlen(teststr);
std::cout << teststrlen << ": \"" << std::string_view(teststr,
teststrlen) << "\"" << std::endl;
mb_trim(&teststrlen, &teststr, trim_lengths_num, trim_lengths, trim_chars);
std::cout << teststrlen << ": \"" << std::string_view(teststr,
teststrlen) << "\"" << std::endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment