Created
August 29, 2020 02:47
-
-
Save Des-Nerger/ac52fe7e6b12945983bba177964de4dc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
USAGE: | |
$ go run generate_multibyte_range_for_grep.go '"[0-9] 123-345"' #regex for greping numbers from 123 to 345 inclusively | |
2[0-9][0-9]|1[3-9][0-9]|12[3-9]|3[0-3][0-9]|34[0-5] | |
$ go run generate_multibyte_range_for_grep.go '"[\x80-\xBF] \u4E00-\u9FFC"' #regex to grep -P kanjis from 一 to 鿼 | |
[\xE5-\xE8][\x80-\xBF][\x80-\xBF]|\xE4[\xB8-\xBF][\x80-\xBF]|\xE9[\x80-\xBE][\x80-\xBF]|\xE9\xBF[\x80-\xBC] | |
*/ | |
package main | |
import ( | |
"fmt" | |
"os" | |
"strconv" | |
"strings" | |
. "unsafe" | |
) | |
func boolToByte(b bool) byte {return *(*byte)(Pointer(&b))} | |
func containsOnly(s string, b byte) bool {for i:=0; i<len(s); i++ {if s[i]!=b {return false}}; return true} | |
func sign(b byte) int8 {const byteSize=8; isntZero:=b!=0; return int8(b)>>(byteSize-1) | *(*int8)(Pointer(&isntZero))} | |
var (byteRange string; Repeat=strings.Repeat) | |
func f(p, a, b string) string {d := [2]byte{} | |
for len(a)>=2{ if a[0]==b[0]{p+=a[:1]; a,b=a[1:],b[1:]; continue} | |
d[0], d[1] = boolToByte(!containsOnly(a[1:], byteRange[1])), boolToByte(!containsOnly(b[1:], byteRange[3])); break } | |
return p + func()string{ ad,bd:=a[0]+d[0],b[0]-d[1] | |
switch sign(bd-ad) { | |
case +1: return "["+escapeNotPrintableASCII(ad)+"-"+escapeNotPrintableASCII(bd)+"]" + Repeat(byteRange,len(a)-1) | |
case 0: return escapeNotPrintableASCII(ad) + Repeat(byteRange,len(a)-1) | |
}; return "" | |
} () + | |
func() string {if d[0]==1 {return "|" + f(p+a[:1], a[1:], Repeat(byteRange[3:4], len(a[1:])))}; return ""} () + | |
func() string {if d[1]==1 {return "|" + f(p+b[:1], Repeat(byteRange[1:2], len(b[1:])), b[1:])}; return ""} () | |
} | |
func main() { | |
panicCheck := func(e error) {if e!=nil {panic(e)}} | |
a := func()[]string { | |
a,e:=strconv.Unquote(os.Args[1]); panicCheck(e) | |
b := strings.SplitN(a, " ", 2) | |
byteRange,a = func()string{ | |
if len(b)==2 {return b[0]} | |
return "[\x80-\xBF]" | |
} (),b[len(b)-1] | |
return strings.SplitN(a, "-", 2) | |
} () | |
fmt.Println(quoteToASCII(strings.TrimPrefix(f("", a[0], a[1]), "|"))) | |
} | |
func escapeNotPrintableASCII(b byte) string { | |
if 0x20 <= b && b <= 0x7E {return string(b)}; return fmt.Sprintf("\\x%02X", b) } | |
func quoteToASCII(s string) string{ sb := &strings.Builder{} | |
for i:=0; i<len(s); i++ {sb.WriteString(escapeNotPrintableASCII(s[i]))}; return sb.String() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment