Skip to content

Instantly share code, notes, and snippets.

@nberlette
Last active February 24, 2024 09:11
Show Gist options
  • Save nberlette/df980c0d3741092c60525dc727f3a6a9 to your computer and use it in GitHub Desktop.
Save nberlette/df980c0d3741092c60525dc727f3a6a9 to your computer and use it in GitHub Desktop.
Outdent: highly configurable, modern re-implementation of the popular outdent module

Outdent

TODO: Add a description of the project here.

Usage

import { outdent } from "https://gist.githubusercontent.com/nberlette";
export const whitespaceLike = [
"\u{B}", // vertical tab
"\u{C}", // form feed
"\u{20}", // space
"\u{A0}",
"\u{2000}",
"\u{2001}",
"\u{2002}",
"\u{2003}",
"\u{2004}",
"\u{2005}",
"\u{2006}",
"\u{2007}",
"\u{2008}",
"\u{2009}",
"\u{200A}",
"\u{202F}",
"\u{205F}",
"\u{3000}",
] as const;
export const lineTerminators = [
"\u{A}",
"\u{D}",
"\u{2028}",
"\u{2029}",
] as const;
export const whitespace = [
...whitespaceLike,
...lineTerminators,
"\u{9}", // tab
"\u{FEFF}", // zero-width no-break space
] as const;
export type whitespace = typeof whitespace;
export type Whitespace = whitespace[number];
export type LineTerminators = "\u{A}" | "\u{D}" | "\u{2028}" | "\u{2029}";
export type TrimLeft<
S extends string,
N extends number = 1,
A extends readonly 0[] = [],
> = [N] extends [A["length"]] ? S
: S extends `${Whitespace}${infer R}` ? TrimLeft<R, N, [...A, 0]>
: S;
export type MeasureIndentation<S extends string, A extends readonly 0[] = []> =
S extends `${infer L}${infer R}`
? L extends `${Whitespace}${infer R2}` ? MeasureIndentation<R, [...A, 0]>
: A["length"]
: 0;
export type OutdentHelper<
S extends string,
N extends number = MeasureIndentation<S>,
> = string extends S ? string
: S extends "" ? ""
: S extends `${LineTerminators}${infer R}` ? OutdentHelper<R>
: S extends `${infer L}\n${infer R}`
? L extends `${Whitespace}${string}`
? MeasureIndentation<L> extends N
? `${TrimLeft<L, N>}\n${OutdentHelper<R, N>}`
: `${TrimLeft<L, N>}\n${OutdentHelper<R, N>}`
: S
: S extends `${Whitespace}${string}` ? `${TrimLeft<S, N>}`
: S;
The MIT License (MIT)
Copyright © 2023-2024 Nicholas Berlette (https://github.com/nberlette)
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the “Software”), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
export { outdent, outdent as default } from "./outdent.ts";
export type { Outdent, OutdentOptions } from "./types.ts";
/*! MIT. Copyright (c) 2024+ Nicholas Berlette. All rights reserved. */
import type { Outdent, OutdentOptions } from "./types.ts";
type TemplateValue<T> = T | OutdentOptions | typeof outdent;
/**
* Removes common leading whitespace from each line in the template string,
* while respecting indentation levels beyond the shared whitespace.
*
* @example
* ```ts
* const s = outdent(`
* const foo = 1;
* if (foo) {
* if (foo > 1) {
* console.log(foo);
* }
* }
* `);
* console.log(s);
*
* // Output:
* const foo = 1;
* if (foo) {
* if (foo > 1) {
* console.log(foo);
* }
* }
* ```
*/
export function outdent<S extends string>(string: S): Outdent<S>;
/**
* Removes common leading whitespace from each line in the template string,
* while respecting indentation levels beyond the shared whitespace. */
export function outdent<const T, const V extends readonly TemplateValue<T>[]>(
strings: TemplateStringsArray,
...values: [...V]
): string;
export function outdent(string: string, options?: OutdentOptions): string;
export function outdent<T>(
template: TemplateStringsArray,
...values: TemplateValue<T>[]
): string;
export function outdent<T>(
template: TemplateStringsArray | string,
...values: TemplateValue<T>[]
): string {
const options: OutdentOptions = {};
let str = typeof template === "string" ? template : "";
if (typeof template === "string") {
Object.assign(options, values[0] ?? {});
} else {
const p = values.map((value, i) => {
if (value && typeof value === "object") {
Object.assign(options, value);
return "";
} else if (typeof value === "function") {
if (value === outdent) {
options.spaces = parseInt(
String(template[i].match(/(\s*)$/)?.[1] ?? options.spaces ?? 0),
);
}
return "";
}
return value;
});
str = template.raw.reduce((a, s, i) => `${a}${s}${p[i] ?? ""}`, "");
}
const {
useTabs = false,
tabWidth = useTabs ? 2 : undefined!,
trimStart = false,
trimEnd = false,
lineWidth = 80,
preserveTabsAndSpaces = true,
normalizeEndings = true,
normalizeWhitespace = !preserveTabsAndSpaces,
removeEmptyLines = 2,
spaces = useTabs ? undefined! : 2,
eol = "\n",
wordwrap = false,
} = options;
const MAX_INDENT_LENGTH = 100;
const LEADING_TAB_RE = /(?<=^|\s)(\t)/g;
const LEADING_SPACE_RE = new RegExp(`(?<=^|\\s)([ ]{${tabWidth}})`, "g");
const WORDWRAP_RE = new RegExp(
`^(.{1,${lineWidth}}(?=(?:(?![\\r\\n])\\s+)|$\\n?))|(.{${lineWidth}}\\b)`,
"gum",
);
const WHITESPACE_RE = new RegExp(`^([${whitespaceLike.join("")}]+)`, "gum");
// deno-lint-ignore no-control-regex
const LINE_ENDINGS_RE = /\r\n|\r|\u2028|\u2029|\u000B|\u000C/g;
const EOL_RE = new RegExp(`(${eol}){3,}`, "g");
// normalize weird whitespace
if (normalizeWhitespace) {
str = str.replace(WHITESPACE_RE, (m) => " ".repeat(m.length));
}
if (normalizeEndings) {
str = str.replace(LINE_ENDINGS_RE, eol);
}
if (!preserveTabsAndSpaces && !useTabs && spaces === undefined) {
str = str.replace(/\t/g, " ".repeat(tabWidth));
}
if (removeEmptyLines === true) {
str = str.replace(EOL_RE, `${eol}${eol}`);
}
let lines = str.split(eol);
if (lines.length) {
if (trimStart) lines.unshift(lines.shift()?.trimStart()!);
if (trimEnd) lines[lines.length - 1] = lines.at(-1)?.trimEnd()!;
const tabs = spaces === undefined && useTabs;
const search = tabs ? LEADING_SPACE_RE : LEADING_TAB_RE;
const replace = tabs ? "\t" : " ".repeat(tabWidth);
lines = lines.map((line) => line.replace(search, replace));
const minIndent = lines.reduce(
(min, line) => {
if (/^\s*$/.test(line)) return min;
const lineIndent = line.match(/^\s*/)?.[0].length ?? 0;
return Math.min(min, lineIndent);
},
Infinity,
);
// match all leading spaces/tabs at the start of each line
const match = str.match(/^[ \t]*(?=\S)/gm);
// find the smallest indent, we don't want to remove all leading whitespace
const indent = Math.min(
MAX_INDENT_LENGTH,
...(match ?? [""])?.map((el) => el.length),
);
const INDENT_RE = new RegExp(
`^[ \\t]{${Math.min(minIndent, indent)}}`,
"gm",
);
// const indentLevel = spaces ?? minIndent;
// const tabLevel = Math.ceil(indentLevel / tabWidth);
lines = lines.map((line) => {
// const level = tabs ? tabLevel : indentLevel;
// const prefix = line.slice(0, level);
if (line.search(/\S/) < 1) return line;
return line.replace(INDENT_RE, "");
});
if (lineWidth > 0 && wordwrap !== false) {
if (wordwrap === "hard") {
lines = lines.flatMap((line, i, arr) => {
if (line.length <= lineWidth) return line;
const wrapped = line.slice(0, lineWidth);
const leftover = line.slice(wrapped.length).trimEnd();
let next = arr[i + 1] ?? "";
if (leftover.length) {
const nextIndent = next.match(/^\s*/)?.[0] ?? "";
arr[i + 1] = next = nextIndent + leftover +
next.slice(nextIndent.length);
}
return wrapped;
});
} else {
lines = lines.map((line) =>
line.replace(WORDWRAP_RE, (m, p1, p2) => {
let str = p1 ?? p2 ?? m;
str = str.replace(/$\n?|\s+$/m, (m) => m.length > 1 ? eol : "");
if (str.length <= lineWidth) return str;
const parts = str.split(/(\s+)/);
let result = "", line = "";
for (const part of parts) {
if ((line + part).length > lineWidth) {
result += line + eol;
line = "";
} else if (line.length > lineWidth) {
result += line + eol;
line = part;
} else if (part.trim().length || line.length) {
line += part;
} else if (result.length) {
result += part;
} else {
line += part;
}
}
return result + line;
})
);
}
}
}
return lines.join(eol);
}
import type { OutdentHelper } from "./_internal.ts";
/**
* Removes common leading whitespace from each line in {@linkcode S}.
* This is the type-level counterpart to the {@linkcode outdent} function.
*
* @example
* ```ts
* type S = Outdent<`
* const foo = 1;
* if (foo) {
* if (foo > 1) {
* console.log(foo);
* }
* }
* `>;
* // type S = "const foo = 1;\nif (foo) {\n if (foo > 1) {\n console.log(foo);\n }\n}\n"
* ```
*/
export type Outdent<S extends string> = OutdentHelper<S> extends
infer R extends string ? R : string;
/** Options for the `outdent` module. */
export interface OutdentOptions {
/** The character to use for the end of line. Default is `"\n"`. */
eol?: string;
/**
* Normalizes irregular line endings, replacing several different characters
* that are often used for the same purpose. The replacement string depends
* on the {@linkcode eol} option, which is a line feed (`U+000A`) by default.
*
* The following characters are replaced:
* - `U+000D` (carriage return)
* - `U+2028` (line separator)
* - `U+2029` (paragraph separator)
* - `U+000D` + `U+000A` (carriage return + line feed)
* - `U+000B` (vertical tab)
* - `U+000C` (form feed)
*/
normalizeEndings?: boolean;
/** Normalizes irregular whitespace to a standard space character (`U+0020`).
*
* The following characters are replaced:
* - `U+000B` (vertical tab)
* - `U+000C` (form feed)
* - `U+0020` (space)
* - `U+00A0` (no-break space)
* - `U+2000` (en quad)
* - `U+2001` (em quad)
* - `U+2002` (en space)
* - `U+2003` (em space)
* - `U+2004` (three-per-em space)
* - `U+2005` (four-per-em space)
* - `U+2006` (six-per-em space)
* - `U+2007` (figure space)
* - `U+2008` (punctuation space)
* - `U+2009` (thin space)
* - `U+200A` (hair space)
* - `U+202F` (narrow no-break space)
* - `U+205F` (medium mathematical space)
* - `U+3000` (ideographic space)
* - `U+FEFF` (zero-width no-break space)
*/
normalizeWhitespace?: boolean;
/** If `true`, mixed tabs and spaces are preserved as-is, and {@link tabWidth}
* will be used to attempt to outdent the text while preserving the existing
* indentation characters. This option is experimental, and may produce mixed
* results. It is ignored if {@link useTabs} or {@link spaces} are set. */
preserveTabsAndSpaces?: boolean;
/** If `true`, anything beyond two consecutive empty lines will be reduced to
* two empty lines. If a number is provided, it will be used as the maximum
* number of consecutive empty lines to allow instead of two. An empty line
* is defined as a line that contains only whitespace characters, matching
* the following regular expression: `/^\s*$/`. */
removeEmptyLines?: boolean | number;
/** The number of spaces to use for indentation. If not provided, the value
* will be determined by the minimum amount of leading whitespace that is
* common to all non-empty lines. When using the tagged template literal
* syntax, you may also provide a reference to the `outdent` function itself
* as a placeholder value, to mark the position where the indentation level
* should be determined.
*
* @example
* ```ts
* const result = outdent`
* ${outdent} // <- use the outdent function to mark the indent level (2)
* if (true) {
* console.log('Hello, world!');
* }
* `;
*
* console.log(result);
* // Output:
* // if (true) {
* // console.log('Hello, world!');
* // }
* ```
*/
spaces?: number;
/**
* If `true`, leading whitespace is removed from the start of the string.
* Otherwise it will be preserved.
*
* Default is `false`. */
trimStart?: boolean;
/**
* If `true`, trailing whitespace is removed from the end of the string.
* Otherwise, it will be preserved.
*
* Default is `false`. */
trimEnd?: boolean;
/** The width of a tab character, in number of spaces. Default is `2`. */
tabWidth?: number;
/**
* If `true`, tabs are used for indentation rather than spaces. If `false`,
* spaces will be used instead.
*
* Default is `false`. */
useTabs?: boolean;
/**
* The maximum line width to use for word wrapping. The is only used if the
* {@link wordwrap} option is set to `true`, `"hard"`, or `"soft"`.
*
* Default is `80`. */
lineWidth?: number;
/**
* UNSTABLE: Enable/disable/control word-wrapping behavior.
*
* If `true`, the text will be wrapped to fit within the {@link lineWidth}
* limit. If `"hard"`, the text will be wrapped at the breakpoint that is
* closest to the exact width, and whitespace will not be preserved. If
* `"soft"`, the text will be wrapped at the nearest whitespace character,
* while respecting existing whitespace and line terminators.
*
* Default is `false`. */
wordwrap?: boolean | "hard" | "soft";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment