Skip to content

Instantly share code, notes, and snippets.

@rdlowrey
Last active July 14, 2023 12:51
Show Gist options
  • Save rdlowrey/5f56cc540099de9d5006 to your computer and use it in GitHub Desktop.
Save rdlowrey/5f56cc540099de9d5006 to your computer and use it in GitHub Desktop.
Remove dot segments from a URI path according to RFC3986 Section 5.2.4
<?php
/**
* Remove dot segments from a URI path according to RFC3986 Section 5.2.4
*
* @param $path
* @return string
* @link http://www.ietf.org/rfc/rfc3986.txt
*/
function removeDotPathSegments($path) {
if (strpos($path, '.') === false) {
return $path;
}
$inputBuffer = $path;
$outputStack = [];
/**
* 2. While the input buffer is not empty, loop as follows:
*/
while ($inputBuffer != '') {
/**
* A. If the input buffer begins with a prefix of "../" or "./",
* then remove that prefix from the input buffer; otherwise,
*/
if (strpos($inputBuffer, "./") === 0) {
$inputBuffer = substr($inputBuffer, 2);
continue;
}
if (strpos($inputBuffer, "../") === 0) {
$inputBuffer = substr($inputBuffer, 3);
continue;
}
/**
* B. if the input buffer begins with a prefix of "/./" or "/.",
* where "." is a complete path segment, then replace that
* prefix with "/" in the input buffer; otherwise,
*/
if ($inputBuffer === "/.") {
$outputStack[] = '/';
break;
}
if (substr($inputBuffer, 0, 3) === "/./") {
$inputBuffer = substr($inputBuffer, 2);
continue;
}
/**
* C. if the input buffer begins with a prefix of "/../" or "/..",
* where ".." is a complete path segment, then replace that
* prefix with "/" in the input buffer and remove the last
* segment and its preceding "/" (if any) from the output
* buffer; otherwise,
*/
if ($inputBuffer === "/..") {
array_pop($outputStack);
$outputStack[] = '/';
break;
}
if (substr($inputBuffer, 0, 4) === "/../") {
array_pop($outputStack);
$inputBuffer = substr($inputBuffer, 3);
continue;
}
/**
* D. if the input buffer consists only of "." or "..", then remove
* that from the input buffer; otherwise,
*/
if ($inputBuffer === '.' || $inputBuffer === '..') {
break;
}
/**
* E. move the first path segment in the input buffer to the end of
* the output buffer, including the initial "/" character (if
* any) and any subsequent characters up to, but not including,
* the next "/" character or the end of the input buffer.
*/
if (($slashPos = stripos($inputBuffer, '/', 1)) === false) {
$outputStack[] = $inputBuffer;
break;
} else {
$outputStack[] = substr($inputBuffer, 0, $slashPos);
$inputBuffer = substr($inputBuffer, $slashPos);
}
}
return implode($outputStack);
}
// --- Test ---
$expectations = [
['bar', '../bar'],
['bar', './bar'],
['bar', '.././bar'],
['bar', '.././bar'],
['/foo/bar', '/foo/./bar'],
['/bar/', '/bar/./'],
['/', '/.'],
['/bar/', '/bar/.'],
['/bar', '/foo/../bar'],
['/', '/bar/../'],
['/', '/..'],
['/', '/bar/..'],
['/foo/', '/foo/bar/..'],
['', '.'],
['', '..'],
];
foreach ($expectations as $struct) {
list($expected, $input) = $struct;
$result = removeDotPathSegments($input);
echo ($expected === $result) ? "Success\n" : "Failure: {$expected} !== {$result}\n";
}
@geryogam
Copy link

Very useful, thanks!

@frugan-dev
Copy link

PHP 8.2 version:

/**
 * https://gist.github.com/rdlowrey/5f56cc540099de9d5006.
 *
 * Remove dot segments from a URI path according to RFC3986 Section 5.2.4
 *
 * @return string
 *
 * @see http://www.ietf.org/rfc/rfc3986.txt
 */
function normalizePath(string $path)
{
    if (!str_contains($path, '.')) {
        return $path;
    }

    $inputBuffer = $path;
    $outputStack = [];

    // 2.  While the input buffer is not empty, loop as follows:
    while ('' !== $inputBuffer) {
        /*
         * A.  If the input buffer begins with a prefix of "../" or "./",
         *     then remove that prefix from the input buffer; otherwise,
         */
        if (str_starts_with($inputBuffer, './')) {
            $inputBuffer = substr($inputBuffer, 2);

            continue;
        }
        if (str_starts_with($inputBuffer, '../')) {
            $inputBuffer = substr($inputBuffer, 3);

            continue;
        }

        /*
         * B.  if the input buffer begins with a prefix of "/./" or "/.",
         *     where "." is a complete path segment, then replace that
         *     prefix with "/" in the input buffer; otherwise,
         */
        if ('/.' === $inputBuffer) {
            $outputStack[] = '/';

            break;
        }
        if (str_starts_with($inputBuffer, '/./')) {
            $inputBuffer = substr($inputBuffer, 2);

            continue;
        }

        /*
         * C.  if the input buffer begins with a prefix of "/../" or "/..",
         *     where ".." is a complete path segment, then replace that
         *     prefix with "/" in the input buffer and remove the last
         *     segment and its preceding "/" (if any) from the output
         *     buffer; otherwise,
         */
        if ('/..' === $inputBuffer) {
            array_pop($outputStack);
            $outputStack[] = '/';

            break;
        }
        if (str_starts_with($inputBuffer, '/../')) {
            array_pop($outputStack);
            $inputBuffer = substr($inputBuffer, 3);

            continue;
        }

        /*
         * D.  if the input buffer consists only of "." or "..", then remove
         *     that from the input buffer; otherwise,
         */
        if ('.' === $inputBuffer || '..' === $inputBuffer) {
            break;
        }

        /*
         * E.  move the first path segment in the input buffer to the end of
         *     the output buffer, including the initial "/" character (if
         *     any) and any subsequent characters up to, but not including,
         *     the next "/" character or the end of the input buffer.
         */
        if (($slashPos = stripos($inputBuffer, '/', 1)) === false) {
            $outputStack[] = $inputBuffer;

            break;
        }
        $outputStack[] = substr($inputBuffer, 0, $slashPos);
        $inputBuffer = substr($inputBuffer, $slashPos);
    }

    return implode('', $outputStack);
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment