Skip to content

Instantly share code, notes, and snippets.

@fire-eggs
Last active July 17, 2020 01:02
Show Gist options
  • Save fire-eggs/f9668bc1315f7a002d90032efec2f489 to your computer and use it in GitHub Desktop.
Save fire-eggs/f9668bc1315f7a002d90032efec2f489 to your computer and use it in GitHub Desktop.
Unit test program: variations on fl_filename_isdir implementations for Windows
/*
1. modify the const BASE to the drive letter you want to test on.
1a. To build, the additional option "/utf-8" needs to be added to the compiler options.
See project settings, compiler, "command line".
2. execute the following commands in a console window at the root of said drive
chcp 65001
mkdir isdirtest
cd isdirtest
mkdir testǼ
mkdir test♥
echo. 2> file♥.txt
echo. 2> file.txt
cd test♥
echo. 2> file♥.txt
echo. 2> file.txt
3. Run the program.
*/
#define _CRT_SECURE_NO_WARNINGS
#include <string>
#include <windows.h>
#include <fileapi.h> // GetFileAttributes
#include <algorithm> // replace
const std::string BASE = "E:";
#define FL_PATH_MAX 2048
unsigned fl_utf8decode(const char* p, const char* end, int* len)
{
unsigned char c = *(const unsigned char*)p;
if (c < 0x80) {
if (len) *len = 1;
return c;
#if ERRORS_TO_CP1252
}
else if (c < 0xa0) {
if (len) *len = 1;
return cp1252[c - 0x80];
#endif
}
else if (c < 0xc2) {
goto FAIL;
}
if ((end && p + 1 >= end) || (p[1] & 0xc0) != 0x80) goto FAIL;
if (c < 0xe0) {
if (len) *len = 2;
return
((p[0] & 0x1f) << 6) +
((p[1] & 0x3f));
}
else if (c == 0xe0) {
if (((const unsigned char*)p)[1] < 0xa0) goto FAIL;
goto UTF8_3;
#if STRICT_RFC3629
}
else if (c == 0xed) {
/* RFC 3629 says surrogate chars are illegal. */
if (((const unsigned char*)p)[1] >= 0xa0) goto FAIL;
goto UTF8_3;
}
else if (c == 0xef) {
/* 0xfffe and 0xffff are also illegal characters */
if (((const unsigned char*)p)[1] == 0xbf &&
((const unsigned char*)p)[2] >= 0xbe) goto FAIL;
goto UTF8_3;
#endif
}
else if (c < 0xf0) {
UTF8_3:
if ((end && p + 2 >= end) || (p[2] & 0xc0) != 0x80) goto FAIL;
if (len) *len = 3;
return
((p[0] & 0x0f) << 12) +
((p[1] & 0x3f) << 6) +
((p[2] & 0x3f));
}
else if (c == 0xf0) {
if (((const unsigned char*)p)[1] < 0x90) goto FAIL;
goto UTF8_4;
}
else if (c < 0xf4) {
UTF8_4:
if ((end && p + 3 >= end) || (p[2] & 0xc0) != 0x80 || (p[3] & 0xc0) != 0x80) goto FAIL;
if (len) *len = 4;
#if STRICT_RFC3629
/* RFC 3629 says all codes ending in fffe or ffff are illegal: */
if ((p[1] & 0xf) == 0xf &&
((const unsigned char*)p)[2] == 0xbf &&
((const unsigned char*)p)[3] >= 0xbe) goto FAIL;
#endif
return
((p[0] & 0x07) << 18) +
((p[1] & 0x3f) << 12) +
((p[2] & 0x3f) << 6) +
((p[3] & 0x3f));
}
else if (c == 0xf4) {
if (((const unsigned char*)p)[1] > 0x8f) goto FAIL; /* after 0x10ffff */
goto UTF8_4;
}
else {
FAIL:
if (len) *len = 1;
#if ERRORS_TO_ISO8859_1
return c;
#else
return 0xfffd; /* Unicode REPLACEMENT CHARACTER */
#endif
}
}
unsigned fl_utf8toUtf16(const char* src, unsigned srclen,
unsigned short* dst, unsigned dstlen)
{
const char* p = src;
const char* e = src + srclen;
unsigned count = 0;
if (dstlen) for (;;) {
if (p >= e) { dst[count] = 0; return count; }
if (!(*p & 0x80)) { /* ascii */
dst[count] = *p++;
}
else {
int len; unsigned ucs = fl_utf8decode(p, e, &len);
p += len;
if (ucs < 0x10000) {
dst[count] = ucs;
}
else {
/* make a surrogate pair: */
if (count + 2 >= dstlen) { dst[count] = 0; count += 2; break; }
dst[count] = (((ucs - 0x10000u) >> 10) & 0x3ff) | 0xd800;
dst[++count] = (ucs & 0x3ff) | 0xdc00;
}
}
if (++count == dstlen) { dst[count - 1] = 0; break; }
}
/* we filled dst, measure the rest: */
while (p < e) {
if (!(*p & 0x80)) p++;
else {
int len; unsigned ucs = fl_utf8decode(p, e, &len);
p += len;
if (ucs >= 0x10000) ++count;
}
++count;
}
return count;
}
static wchar_t* wbuf = NULL;
static wchar_t* utf8_to_wchar(const char* utf8, wchar_t*& wbuf, int lg = -1) {
unsigned len = (lg >= 0) ? (unsigned)lg : (unsigned)strlen(utf8);
unsigned wn = fl_utf8toUtf16(utf8, len, NULL, 0) + 1; // Query length
wbuf = (wchar_t*)realloc(wbuf, sizeof(wchar_t) * wn);
wn = fl_utf8toUtf16(utf8, len, (unsigned short*)wbuf, wn); // Convert string
wbuf[wn] = 0;
return wbuf;
}
int isdirGFAW(const char* n)
{
utf8_to_wchar(n, wbuf);
DWORD res = GetFileAttributesW(wbuf);
return res != INVALID_FILE_ATTRIBUTES && (res & FILE_ATTRIBUTE_DIRECTORY);
}
inline int isdirsep(char c) { return c == '/' || c == '\\'; }
int isdirORIG(const char* n)
{
struct _stat s;
char fn[FL_PATH_MAX];
int length;
length = (int)strlen(n);
// This workaround brought to you by the fine folks at Microsoft!
// (read lots of sarcasm in that...)
if (length < (int)(sizeof(fn) - 1)) {
if (length < 4 && isalpha(n[0]) && n[1] == ':' &&
(isdirsep(n[2]) || !n[2])) {
// Always use D:/ for drive letters
fn[0] = n[0];
strcpy(fn + 1, ":/");
n = fn;
}
else if (length > 0 && isdirsep(n[length - 1])) {
// Strip trailing slash from name...
length--;
memcpy(fn, n, length);
fn[length] = '\0';
n = fn;
}
}
return !_stat(n, &s) && (s.st_mode & _S_IFDIR);
}
int isdirWStat(const char* n)
{
char fn[4]; // used for drive letter only: "X:/"
int length = (int)strlen(n);
// Strip trailing slash from name...
if (length > 0 && isdirsep(n[length - 1]))
length--;
if (length < 1)
return 0;
// This workaround brought to you by the fine folks at Microsoft!
// (read lots of sarcasm in that...)
if (length == 2 && isalpha(n[0]) && n[1] == ':') {
fn[0] = n[0];
strcpy(fn + 1, ":/");
n = fn;
length = 3;
}
// convert filename to wide chars using *length*
utf8_to_wchar(n, wbuf, length);
struct _stat s;
return (!_wstat(wbuf, &s) && (s.st_mode & _S_IFDIR));
}
// NOTE: these paths should only use the '/' separator. The test will
// repeat test each path using the backslash separator.
std::pair<std::string, int> testVals [] =
{
{BASE, 1},
{BASE + "/", 1},
{"/", 1},
{".", 1},
{"..", 1},
// variations on an ASCII folder
{BASE + "/isdirtest", 1},
{BASE + "/isdirtest/", 1},
{BASE + "/isdirtest/.", 1},
{BASE + "/isdirtest/..", 1},
{BASE + "/isdirtest/./", 1},
{BASE + "/isdirtest/../", 1},
// variations on a UTF-8 folder
{BASE + "/isdirtest/testǼ", 1},
{BASE + "/isdirtest/testǼ/", 1},
{BASE + "/isdirtest/testǼ/.", 1},
{BASE + "/isdirtest/testǼ/..", 1},
{BASE + "/isdirtest/testǼ/./", 1},
{BASE + "/isdirtest/testǼ/../", 1},
// variations on a different UTF-8 folder [utf-8 value larger than 255]
{BASE + "/isdirtest/test♥", 1},
{BASE + "/isdirtest/test♥/", 1},
{BASE + "/isdirtest/test♥/.", 1},
{BASE + "/isdirtest/test♥/..", 1},
{BASE + "/isdirtest/test♥/./", 1},
{BASE + "/isdirtest/test♥/../", 1},
// files within an ASCII folder
{BASE + "/isdirtest/file.txt", 0},
{BASE + "/isdirtest/file♥.txt", 0},
// files within a UTF-8 folder.
{BASE + "/isdirtest/test♥/file.txt", 0},
{BASE + "/isdirtest/test♥/file♥.txt", 0},
};
void testOne(const char* name, int expect)
{
if (isdirORIG(name) != expect)
printf("isdirORIG fail: %s\n", name);
if (isdirGFAW(name) != expect)
printf("isdirGFAW fail: %s\n", name);
if (isdirWStat(name) != expect)
printf("isdirWStat fail: %s\n", name);
}
void testBothSlashes(std::string name, int expect)
{
testOne(name.c_str(), expect);
const char* name2 = name.c_str();
if (name.find('/', 0) != std::string::npos)
{
std::replace(name.begin(), name.end(), '/', '\\');
testOne(name.c_str(), expect);
}
}
int main()
{
SetConsoleOutputCP(65001); // make sure we can display unicode
SetCurrentDirectory((BASE + "/").c_str());
char buf[2048];
GetCurrentDirectory(2048, buf);
printf("cwd = %s\n", buf);
int count = sizeof(testVals) / sizeof(testVals[0]);
for (int i = 0; i < count; i++)
{
auto name = testVals[i].first;
int expect = testVals[i].second;
testBothSlashes(name, expect);
}
}
@fire-eggs
Copy link
Author

fire-eggs commented Jul 15, 2020

My results on Windows 10. As expected, the _stat() based implementation fails with UTF-8 folders.

cwd = E:\
isdirORIG fail: /
isdirWStat fail: /
isdirORIG fail: \
isdirWStat fail: \
isdirORIG fail: E:/isdirtest/testǼ
isdirORIG fail: E:\isdirtest\testǼ
isdirORIG fail: E:/isdirtest/testǼ/
isdirORIG fail: E:\isdirtest\testǼ\
isdirORIG fail: E:/isdirtest/testǼ/.
isdirORIG fail: E:\isdirtest\testǼ\.
isdirORIG fail: E:/isdirtest/testǼ/./
isdirORIG fail: E:\isdirtest\testǼ\.\
isdirORIG fail: E:/isdirtest/test♥
isdirORIG fail: E:\isdirtest\test♥
isdirORIG fail: E:/isdirtest/test♥/
isdirORIG fail: E:\isdirtest\test♥\
isdirORIG fail: E:/isdirtest/test♥/.
isdirORIG fail: E:\isdirtest\test♥\.
isdirORIG fail: E:/isdirtest/test♥/./
isdirORIG fail: E:\isdirtest\test♥\.\

@fire-eggs
Copy link
Author

fire-eggs commented Jul 15, 2020

My results on Windows 7. [After I fixed a mixup between Æ and Ǽ ! grrr...]

cwd = E:\
isdirORIG fail: /
isdirWStat fail: /
isdirORIG fail: \
isdirWStat fail: \
isdirORIG fail: E:/isdirtest/testǼ
isdirORIG fail: E:\isdirtest\testǼ
isdirORIG fail: E:/isdirtest/testǼ/
isdirORIG fail: E:\isdirtest\testǼ\
isdirORIG fail: E:/isdirtest/testǼ/.
isdirORIG fail: E:\isdirtest\testǼ\.
isdirORIG fail: E:/isdirtest/testǼ/./
isdirORIG fail: E:\isdirtest\testǼ\.\
isdirORIG fail: E:/isdirtest/test♥
isdirORIG fail: E:\isdirtest\test♥
isdirORIG fail: E:/isdirtest/test♥/
isdirORIG fail: E:\isdirtest\test♥\
isdirORIG fail: E:/isdirtest/test♥/.
isdirORIG fail: E:\isdirtest\test♥\.
isdirORIG fail: E:/isdirtest/test♥/./
isdirORIG fail: E:\isdirtest\test♥\.\

@fire-eggs
Copy link
Author

Three variants tested:
isdirORIG : FLTK 1.4 version
isdirWStat: isdirORIG modified to use wstat, as posted in fltk.general
isdirGFAW: GetFileAttributesW version

Not certain how to interpret the failures with "/".

Feel free to suggest any more test cases.

@fire-eggs
Copy link
Author

fire-eggs commented Jul 15, 2020

Another test case is network paths. Kind of difficult to perform in a generic manner (i.e. the user must have a shared network drive).

I.e. \\machine\drive\folder\file. Pretty sure the alternate slashes are not valid for the \\machine\drive portion.

@Albrecht-S
Copy link

Thanks for this update, I was distracted by PR stuff. I'll check it and decide later. Sorry.

@fire-eggs
Copy link
Author

No apologies necessary! I'm sure you are juggling plenty of plates, please don't feel you need to devote any special attention this way!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment