Skip to content

Instantly share code, notes, and snippets.

@fire-eggs
Last active July 15, 2020 20:17
Show Gist options
  • Save fire-eggs/efe75c4e059338a23d3554ac61102453 to your computer and use it in GitHub Desktop.
Save fire-eggs/efe75c4e059338a23d3554ac61102453 to your computer and use it in GitHub Desktop.
Timing test for alternative implementations of fl_filename_isdir.
#define _CRT_SECURE_NO_WARNINGS
#include <string>
#include <windows.h>
#include <fileapi.h> // GetFileAttributes
#include <chrono>
#define FL_PATH_MAX 2048
unsigned fl_utf8decode(const char* p, const char* end, int* len)
{
unsigned char c = *(const unsigned char*)p;
if (c < 0x80) {
if (len) *len = 1;
return c;
#if ERRORS_TO_CP1252
}
else if (c < 0xa0) {
if (len) *len = 1;
return cp1252[c - 0x80];
#endif
}
else if (c < 0xc2) {
goto FAIL;
}
if ((end && p + 1 >= end) || (p[1] & 0xc0) != 0x80) goto FAIL;
if (c < 0xe0) {
if (len) *len = 2;
return
((p[0] & 0x1f) << 6) +
((p[1] & 0x3f));
}
else if (c == 0xe0) {
if (((const unsigned char*)p)[1] < 0xa0) goto FAIL;
goto UTF8_3;
#if STRICT_RFC3629
}
else if (c == 0xed) {
/* RFC 3629 says surrogate chars are illegal. */
if (((const unsigned char*)p)[1] >= 0xa0) goto FAIL;
goto UTF8_3;
}
else if (c == 0xef) {
/* 0xfffe and 0xffff are also illegal characters */
if (((const unsigned char*)p)[1] == 0xbf &&
((const unsigned char*)p)[2] >= 0xbe) goto FAIL;
goto UTF8_3;
#endif
}
else if (c < 0xf0) {
UTF8_3:
if ((end && p + 2 >= end) || (p[2] & 0xc0) != 0x80) goto FAIL;
if (len) *len = 3;
return
((p[0] & 0x0f) << 12) +
((p[1] & 0x3f) << 6) +
((p[2] & 0x3f));
}
else if (c == 0xf0) {
if (((const unsigned char*)p)[1] < 0x90) goto FAIL;
goto UTF8_4;
}
else if (c < 0xf4) {
UTF8_4:
if ((end && p + 3 >= end) || (p[2] & 0xc0) != 0x80 || (p[3] & 0xc0) != 0x80) goto FAIL;
if (len) *len = 4;
#if STRICT_RFC3629
/* RFC 3629 says all codes ending in fffe or ffff are illegal: */
if ((p[1] & 0xf) == 0xf &&
((const unsigned char*)p)[2] == 0xbf &&
((const unsigned char*)p)[3] >= 0xbe) goto FAIL;
#endif
return
((p[0] & 0x07) << 18) +
((p[1] & 0x3f) << 12) +
((p[2] & 0x3f) << 6) +
((p[3] & 0x3f));
}
else if (c == 0xf4) {
if (((const unsigned char*)p)[1] > 0x8f) goto FAIL; /* after 0x10ffff */
goto UTF8_4;
}
else {
FAIL:
if (len) *len = 1;
#if ERRORS_TO_ISO8859_1
return c;
#else
return 0xfffd; /* Unicode REPLACEMENT CHARACTER */
#endif
}
}
unsigned fl_utf8toUtf16(const char* src, unsigned srclen,
unsigned short* dst, unsigned dstlen)
{
const char* p = src;
const char* e = src + srclen;
unsigned count = 0;
if (dstlen) for (;;) {
if (p >= e) { dst[count] = 0; return count; }
if (!(*p & 0x80)) { /* ascii */
dst[count] = *p++;
}
else {
int len; unsigned ucs = fl_utf8decode(p, e, &len);
p += len;
if (ucs < 0x10000) {
dst[count] = ucs;
}
else {
/* make a surrogate pair: */
if (count + 2 >= dstlen) { dst[count] = 0; count += 2; break; }
dst[count] = (((ucs - 0x10000u) >> 10) & 0x3ff) | 0xd800;
dst[++count] = (ucs & 0x3ff) | 0xdc00;
}
}
if (++count == dstlen) { dst[count - 1] = 0; break; }
}
/* we filled dst, measure the rest: */
while (p < e) {
if (!(*p & 0x80)) p++;
else {
int len; unsigned ucs = fl_utf8decode(p, e, &len);
p += len;
if (ucs >= 0x10000) ++count;
}
++count;
}
return count;
}
static wchar_t* wbuf = NULL;
static wchar_t* utf8_to_wchar(const char* utf8, wchar_t*& wbuf, int lg = -1) {
unsigned len = (lg >= 0) ? (unsigned)lg : (unsigned)strlen(utf8);
unsigned wn = fl_utf8toUtf16(utf8, len, NULL, 0) + 1; // Query length
wbuf = (wchar_t*)realloc(wbuf, sizeof(wchar_t) * wn);
wn = fl_utf8toUtf16(utf8, len, (unsigned short*)wbuf, wn); // Convert string
wbuf[wn] = 0;
return wbuf;
}
int isdirGFAW(const char* n)
{
utf8_to_wchar(n, wbuf, (int)strlen(n));
DWORD res = GetFileAttributesW(wbuf);
return (res & FILE_ATTRIBUTE_DIRECTORY);
}
inline int isdirsep(char c) { return c == '/' || c == '\\'; }
int isdirORIG(const char* n)
{
struct _stat s;
char fn[FL_PATH_MAX];
int length;
length = (int)strlen(n);
// This workaround brought to you by the fine folks at Microsoft!
// (read lots of sarcasm in that...)
if (length < (int)(sizeof(fn) - 1)) {
if (length < 4 && isalpha(n[0]) && n[1] == ':' &&
(isdirsep(n[2]) || !n[2])) {
// Always use D:/ for drive letters
fn[0] = n[0];
strcpy(fn + 1, ":/");
n = fn;
}
else if (length > 0 && isdirsep(n[length - 1])) {
// Strip trailing slash from name...
length--;
memcpy(fn, n, length);
fn[length] = '\0';
n = fn;
}
}
return !_stat(n, &s) && (s.st_mode & _S_IFDIR);
}
const char* filename = "Z:/maid_filtered.txt";
long long testOne(int (*func)(const char *))
{
char buffer[FL_PATH_MAX];
FILE* fptr = fopen(filename, "r");
if (!fptr)
return -1LL;
auto start = std::chrono::high_resolution_clock::now();
bool first = true;
while (fgets(buffer, sizeof(buffer), fptr) != NULL)
{
buffer[strlen(buffer) - 1] = '\0';
int res = (*func)(filename);
}
auto stop = std::chrono::high_resolution_clock::now();
fclose(fptr);
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
return duration.count();
}
int main()
{
long long res2 = testOne(isdirGFAW);
double res2secs = res2 / 1000.0 / 1000.0;
printf("GFAW time: %lld (%.2f seconds)\n", res2, res2secs);
long long res1 = testOne(isdirORIG);
double res1secs = res1 / 1000.0 / 1000.0;
printf("Original time: %lld (%.2f seconds)\n", res1, res1secs);
}
@fire-eggs
Copy link
Author

The key here is this line:
const char* filename = "Z:/maid_filtered.txt";

This is a path to a text file. Each line of the text file is a full file path.

@Albrecht-S
Copy link

Thanks, test looks good (viewing code only, no time to test yet).

It would be interesting to count the results (true vs. false) of each isdirXXX test for comparison. isdirORIG() would likely not find all directories with non-ASCII names correctly, so you could see the difference if there are such dirs.

A third test with my fixed version (as posted in fltk.general) would reveal differences between your implementation and mine with all the special handling still included.

@fire-eggs
Copy link
Author

My unit test version is over here.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment