Last active
March 6, 2021 20:03
-
-
Save yanndegat/3790c3ac97823fad7242ba589e3e8a17 to your computer and use it in GitHub Desktop.
std-pregexp.rkt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#lang racket | |
(require net/url) | |
(define ipv6-hex "[0-9a-fA-F:]*:[0-9a-fA-F:]*") | |
(define url-regexp | |
(pregexp (string-append | |
"^" | |
"(?:" ; / scheme-colon-opt | |
"([^:/?#]*)" ; | #1 = scheme-opt | |
":)?" ; \ | |
"(?://" ; / slash-slash-authority-opt | |
"(?:" ; | / user-at-opt | |
"([^/?#@]*)" ; | | #2 = user-opt | |
"@)?" ; | \ | |
"(?:" ; | |
"(?:\\[" ; | / #3 = ipv6-host-opt | |
"(" ipv6-hex ")" ; | | hex-addresses | |
"\\])|" ; | \ | |
"([^/?#:]*)" ; | #4 = host-opt | |
")?" ; | |
"(?::" ; | / colon-port-opt | |
"([0-9]*)" ; | | #5 = port-opt | |
")?" ; | \ | |
")?" ; \ | |
"([^?#]*)" ; #6 = path | |
"(?:\\?" ; / question-query-opt | |
"([^#]*)" ; | #7 = query-opt | |
")?" ; \ | |
"(?:#" ; / hash-fragment-opt | |
"(.*)" ; | #8 = fragment-opt | |
")?" ; \ | |
"$"))) | |
(define urls (list | |
"https://github.com/foo/bar.git" | |
"https://github.com/foo/bar.git?ref=master&foo=bar" | |
"ssh://git@github.com:22/foo/bar.git" | |
"ssh://git@github.com:22/foo/bar.git?ref=master&foo=bar" | |
)) | |
(map (curry regexp-match url-regexp) urls) | |
=> '(("https://github.com/foo/bar.git" "https" #f #f "github.com" #f "/foo/bar.git" #f #f) ("https://github.com/foo/bar.git?ref=master&foo=bar" "https" #f #f "github.com" #f "/foo/bar.git" "ref=master&foo=bar" #f) ("ssh://git@github.com:22/foo/bar.git" "ssh" "git" #f "github.com" "22" "/foo/bar.git" #f #f) ("ssh://git@github.com:22/foo/bar.git?ref=master&foo=bar" "ssh" "git" #f "github.com" "22" "/foo/bar.git" "ref=master&foo=bar" #f)) | |
-- | |
good match | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(import :std/pregexp) | |
(define ipv6-hex "[0-9a-fA-F:]*:[0-9a-fA-F:]*") | |
(define url-regexp | |
(pregexp (string-append | |
"^" | |
"(?:" ; / scheme-colon-opt | |
"([^:/?#]*)" ; | #1 = scheme-opt | |
":)?" ; \ | |
"(?://" ; / slash-slash-authority-opt | |
"(?:" ; | / user-at-opt | |
"([^/?#@]*)" ; | | #2 = user-opt | |
"@)?" ; | \ | |
"(?:" ; | |
"(?:\\[" ; | / #3 = ipv6-host-opt | |
"(" ipv6-hex ")" ; | | hex-addresses | |
"\\])|" ; | \ | |
"([^/?#:]*)" ; | #4 = host-opt | |
")?" ; | |
"(?::" ; | / colon-port-opt | |
"([0-9]*)" ; | | #5 = port-opt | |
")?" ; | \ | |
")?" ; \ | |
"([^?#]*)" ; #6 = path | |
"(?:\\?" ; / question-query-opt | |
"([^#]*)" ; | #7 = query-opt | |
")?" ; \ | |
"(?:#" ; / hash-fragment-opt | |
"(.*)" ; | #8 = fragment-opt | |
")?" ; \ | |
"$"))) | |
(define urls (list | |
"https://github.com/foo/bar.git" | |
"https://github.com/foo/bar.git?ref=master&foo=bar" | |
"ssh://git@github.com:22/foo/bar.git" | |
"ssh://git@github.com:22/foo/bar.git?ref=master&foo=bar" | |
)) | |
(displayln (map (lambda (url) (pregexp-match url-regexp url)) urls)) | |
=> ((https://github.com/foo/bar.git https github.com #f github.com #f /foo/bar.git #f #f) (https://github.com/foo/bar.git?ref=master&foo=bar https github.com #f github.com #f /foo/bar.git ref=master&foo=bar #f) (ssh://git@github.com:22/foo/bar.git ssh git #f github.com 22 /foo/bar.git #f #f) (ssh://git@github.com:22/foo/bar.git?ref=master&foo=bar ssh git #f github.com 22 /foo/bar.git ref=master&foo=bar #f)) | |
=> ----------- | |
=> wrong match |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment