Last active
February 4, 2020 15:56
-
-
Save diegomanuel/8dd3ebf0b1c2de2a48e70e3f5e5eda91 to your computer and use it in GitHub Desktop.
Quick introduction to regular expressions to make a short class =]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Regular expressions: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions | |
// Cheatsheet: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Cheatsheet | |
// Most languages have "native/direct" ways of construct regular expressions | |
let re_native = /abc/; | |
// And also have its functional/modular constructors | |
let re_obj = new RegExp("abc"); | |
// Elixir :: https://hexdocs.pm/elixir/Regex.html | |
// regexp = ~r/abc/ | |
// regexp = Regex.compile("abc") | |
// Python :: https://docs.python.org/3/howto/regex.html | |
// regexp = re.compile("abc") | |
// Regular expressions have "modifiers" that affects the matching behaviour | |
// https://www.regular-expressions.info/modifiers.html | |
// g: global search (match or replace all occurrences) | |
console.log("the A letter".replace(/e/, "")); // Output: "th A letter" | |
console.log("the A letter".replace(/e/g, "")); // Output: "th A lttr" | |
// i: case insensitive | |
console.log("the A".match(/a/)); // Output: null | |
console.log("the A".match(/a/i)); // Output: [ 'A', index: 4, input: 'the A' ] | |
console.log("the A".match(/a/gi)); // Output: [ 'A' ] | |
// m: multiline strings (ie: matching an entire text file string with new lines [only affects the behavior of ^ and $]) | |
// u: unicode strings (matching strings containing unicode characters) | |
// And you can mix'em | |
console.log("A NICE STRING IS nicer NOW 👌👌👌!".replace(/nice[r]?|👌|!/igu, "*")); // Output: "A * STRING IS * NOW ****" | |
// Most commonly used character classes | |
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Character_Classes | |
// Character: "\" (escape next character) | |
console.log("$1234".replace(/\$/g, "U\$D")); // Output: "U$D1234" | |
console.log("the [match] (here)!".replace(/the \[match\] \(here\)/g, "-")); // Output: "-!" | |
// Character: "\d" (matches numbers) | |
console.log("the number is $1234".replace(/\d/g, "*")); // Output: "the number is $****" | |
// Character: "\D" (matches NOT numbers) | |
console.log("the number is $1234".replace(/\D/g, "*")); // Output: "***************1234" | |
// Character: "\w" (matches any alphanumeric character) | |
console.log("the number is $1234".replace(/\w+ is \$\w/g, "*")); // Output: "the *234" | |
// Character: "\W" (matches any NOT alphanumeric character) | |
console.log("the number is $1234! right???".replace(/\W/g, "*")); // Output: "the*number*is**1234**right***" | |
// Character: "\s" (matches whitespace character) | |
console.log("the number is $1234".replace(/\s/g, "-")); // Output: "the-number-is-$1234" | |
// Character: "\S" (you can imagine =) | |
console.log("the number is $1234".replace(/\S/g, "-")); // Output: "--- ------ -- -----" | |
// Most commonly used patterns | |
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Assertions | |
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Groups_and_Ranges | |
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Quantifiers | |
// Pattern: "^" and "$" (matches the start and the end of a string, respectively) | |
console.log("the start of the art".replace(/art/g, "*")); // Output: "the st* of the *" | |
console.log("the start of the art".replace(/^the/g, "*")); // Output: "* start of the art" | |
console.log("the start of the art".replace(/art$/g, "*")); // Output: "the start of the *" | |
// Pattern: "." (matches any single character except line terminators) | |
console.log("no last letters".replace(/.s/g, "*s")); // Output: "no l*st lette*s" | |
// Pattern: "+" (matches at least one time, repeat "n" times) | |
console.log("ac abc aabbcc aaabbbccc".replace(/ab+c/g, "*")); // Output: "ac * a*c aa*cc" (note the last result "aa*cc") | |
// Pattern: "*" (matches 0 or "n" times) | |
console.log("ac abc aabbcc aaabbbccc".replace(/ab*c/g, "*")); // Output: "* * a*c aa*cc" | |
// Pattern: "?" (the previous character or group may or may not be present) | |
console.log("ac abc abbc".replace(/ab?c/g, "*")); // Output: "* * abbc" | |
// Pattern: "x{n}", "x{n,}", "x{n,m}" (matches "n" to "m" times) | |
console.log("aaabbbccc".replace(/ab{3}c/g, "*")); // Output: "aa*cc" | |
console.log("aaabbbccc".replace(/ab{2}c/g, "no matches")); // Output: "aaabbbccc" (no matches) | |
console.log("aaabbbccc".replace(/ab{2,}c/g, "*")); // Output: "aa*cc" | |
console.log("aaabccc".replace(/ab{1,3}c/g, "*")); // Output: "aa*cc" | |
console.log("aaabccc".replace(/ab{2,3}c/g, "no matches")); // Output: "aaabccc" (no matches) | |
// Pattern: "x|y" (matches "x" or "y") | |
console.log("green apples and red ones".replace(/green|red/g, "*")); // Output: "* apples and * ones" | |
// Pattern: "[]" (matches any one of the enclosed characters) | |
console.log("abcdefghi".replace(/[fed]/g, "*")); // Output: "abc***ghi" | |
console.log("abcdefghi".replace(/[^fed]/g, "*")); // Output: "***def***" | |
// The "-" sign acts as a range (for letters and numbers) | |
console.log("abcdefghi".replace(/[b-h]/g, "*")); // Output: "a*******i" | |
console.log("ahgcedbfi".replace(/[b-h]/g, "*")); // Output: "a*******i" | |
console.log("a".replace(/a[bcd]?/g, "*")); // Output: "*" | |
console.log("abcd".replace(/a[bcd]?/g, "*")); // Output: "*cd" (note that it is NOT "a*") | |
console.log("abcd".replace(/a[bc]?d/g, "no matches")); // Output: "abcd" (no matches) | |
// Pattern: "()" (groups the patterns and capture the matching strings) | |
console.log("Total: $500".replace(/\w+:\s.(\d+)/g, "You owe me U\$D$1!")); // Output: "You owe me U$D500!" | |
console.log("12ab34cd56".replace(/(\D)/g, "-$1-")); // Output: "12-a--b-34-c--d-56" | |
console.log("12ab34cd56".replace(/(\D+)/g, "-$1-")); // Output: "12-ab-34-cd-56" | |
console.log("111 xxx 111 and xxxyy".replace(/(\d+) (x+) \1 and \2/g, "-$1-")); // Output: "-111-yy" | |
console.log("111 xxx 222".replace(/(\d+) x+ \1/g, "no matches")); // Output: "111 xxx 222" (no matches) | |
// Don't capture groups | |
console.log("aaa bbb ccc".replace(/(a+) (?:b+) (c+)/g, "1: $1\n2: $2")); // Output: "1: aaa\n2: ccc" ($2 is NOT "bbb") | |
// Example: Match the site <title> from https://fiqus.coop | |
const https = require("https"); | |
https.get("https://fiqus.coop", (resp) => { | |
let data = ""; | |
resp.on("data", (chunk) => data += chunk); | |
resp.on("end", () => { | |
const title = data.match(/<title>(.*)<\/title>/im)[1]; | |
console.log("The title at Fiqus site is: "+title); | |
}); | |
}); | |
// For detailed information about finite state machines: | |
// https://brilliant.org/wiki/regular-languages/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment