Last active
February 10, 2021 01:43
-
-
Save amieres/00b6c75451ce729ecdf37d22c2dfd5b9 to your computer and use it in GitHub Desktop.
Regex.fs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Regex = | |
open System.Text.RegularExpressions | |
type REx = REx of string with | |
member r.txt = let (REx t) = r in t | |
static member (+)(REx a, REx b) = REx(a + b) | |
let rex (REx v) = v | |
let (|Regex|_|) (REx pattern) input = | |
if input = null then None else | |
try | |
let m = Regex.Match(input, pattern) | |
if m.Success then Some(List.tail [ for g in m.Groups -> g.Value ]) | |
else None | |
with e -> None | |
let (|Regexs|) (REx pattern) input = | |
if input = null then [| |] else | |
try | |
let ms = Regex.Matches(input, pattern) | |
[| for m in ms do yield (List.tail [ for g in m.Groups -> g.Value ]) |] | |
with e -> [| |] | |
/// Patterns for constructing your regex | |
module p = | |
let lit(v:string) = | |
v | |
|> Seq.collect ( | |
function | |
| '_' -> [ '_' ] | |
| c when System.Char.IsSymbol c -> [ '\\' ; c ] | |
| c when System.Char.IsPunctuation c -> [ '\\' ; c ] | |
| c -> [ c ] | |
) | |
|> Seq.toArray | |
|> fun ar -> new System.String(ar) | |
|> REx | |
let cr = @"\r" |> REx | |
let nl = @"\n" |> REx | |
let anychar = "." |> REx | |
let tab = @"\t" |> REx | |
let blank = @"\s" |> REx | |
let noblank = @"\S" |> REx | |
let capt = rex >> sprintf "(%s)" >> REx | |
let nocapt = rex >> sprintf "(?:%s)" >> REx | |
let opt = nocapt >> rex >> sprintf "%s?" >> REx | |
module g = | |
let inline anything mny : REx = mny anychar | |
let inline blanks mny : REx = mny blank | |
let inline noblanks mny : REx = mny noblank | |
let inline spaceaft mny x : REx = x + blanks mny | |
let inline listof1 mny0 x sep : REx = x + mny0(sep + x) | |
module e = | |
let inline many0 a = nocapt a + REx "*" | |
let inline many1 a = nocapt a + REx "+" | |
module e1 = | |
let many = e.many1 | |
let anything = g.anything many | |
let blanks = g.blanks many | |
let noblanks = g.noblanks many | |
let spaceaft = g.spaceaft many | |
let inline listof x sep = g.listof1 e.many0 x sep | |
module e0 = | |
let many = e.many0 | |
let anything = g.anything many | |
let blanks = g.blanks many | |
let noblanks = g.noblanks many | |
let spaceaft = g.spaceaft many | |
let inline listof x sep = e1.listof x sep |> opt | |
module l = | |
let many0 a = e.many0 a + REx "?" | |
let many1 a = e.many1 a + REx "?" | |
module l1 = | |
let many = l.many1 | |
let anything = g.anything many | |
let blanks = g.blanks many | |
let noblanks = g.noblanks many | |
let spaceaft = g.spaceaft many | |
let inline listof x sep = g.listof1 l.many0 x sep | |
module l0 = | |
let many = l.many0 | |
let anything = g.anything many | |
let blanks = g.blanks many | |
let noblanks = g.noblanks many | |
let spaceaft = g.spaceaft many | |
let inline listof x sep = l1.listof x sep |> opt | |
let repl n (REx r) = Seq.replicate n r |> String.concat "" |> REx | |
let either patterns = patterns |> Seq.map rex |> String.concat "|" |> REx |> nocapt | |
let word (REx r) = sprintf "\\b%s\\b" r |> REx | |
let oneof (REx r) = sprintf "[%s]" r |> REx | |
let noneof (REx r) = sprintf "[^%s]" r |> REx | |
let caseins (REx r) = sprintf "(?i)%s(?-i)" r |> REx | |
let multiline = either [ cr ; nl ; anychar ] |> l.many0 | |
let letter = REx "_a-zA-Z" |> oneof | |
let digit = REx "0-9" |> oneof | |
let letters = e.many1 letter | |
let digits = e.many1 digit | |
let ident = letter + (either [letter ; digit] |> l.many0) |> word | |
let dottedid = e1.listof ident (e0.spaceaft (REx "\.")) | |
let typename = dottedid + opt (REx @"<" + e1.listof dottedid (e0.spaceaft (REx ",")) + REx "\>") | |
/// match whole string | |
let full (REx r) = sprintf "^%s$" r |> REx | |
let regmatch regex = function Regex regex ms -> Some ms |_-> None | |
/// helpers for replacements | |
module r = | |
open p | |
let remove patterns = p.either patterns, "" | |
let tabs = p.tab , " " | |
let comments = REx @"\/\/" + e1.anything, "\n" | |
let regexReplaceFunc f (regex:string) (input:string) = | |
System.Text.RegularExpressions | |
.Regex(regex) | |
.Replace(input, MatchEvaluator(fun m -> f m.Value)) | |
let regexReplaceGroups f (regex:string) (input:string) = | |
System.Text.RegularExpressions | |
.Regex(regex) | |
.Replace(input, MatchEvaluator(fun m -> [ for g in m.Groups -> g.Value ] |> f)) | |
let regexReplace (regex:string) (repl:string) (input:string) = | |
System.Text.RegularExpressions | |
.Regex(regex) | |
.Replace(input, repl) | |
let replaceToF (regex, repl) = regexReplace regex repl | |
let replacesToFs rs = rs |> Seq.map replaceToF | |
/// a replace function for multiple replacements applied in sequence | |
let replace0 repls inp = repls |> Seq.fold (fun txt (regex, repl) -> regexReplace txt regex repl) inp | |
/// same as replace0: a function for multiple replacements | |
let replace1 v = replacesToFs v |> Seq.reduce (>>) | |
/// find a section (like: module XXX = ..) and capture its name, make replacements using that sections name | |
let byLine init st f (inp:string) = | |
inp.Split '\n' | |
|> Array.mapFold (fun md ln -> | |
match st ln with | |
| Some nm -> ln , nm | |
|_ -> f md ln, md | |
) init | |
|> fst | |
|> String.concat "\n" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment