Skip to content

Instantly share code, notes, and snippets.

@Des-Nerger
Created August 29, 2020 02:47
Show Gist options
  • Save Des-Nerger/ac52fe7e6b12945983bba177964de4dc to your computer and use it in GitHub Desktop.
Save Des-Nerger/ac52fe7e6b12945983bba177964de4dc to your computer and use it in GitHub Desktop.
/*
USAGE:
$ go run generate_multibyte_range_for_grep.go '"[0-9] 123-345"' #regex for greping numbers from 123 to 345 inclusively
2[0-9][0-9]|1[3-9][0-9]|12[3-9]|3[0-3][0-9]|34[0-5]
$ go run generate_multibyte_range_for_grep.go '"[\x80-\xBF] \u4E00-\u9FFC"' #regex to grep -P kanjis from 一 to 鿼
[\xE5-\xE8][\x80-\xBF][\x80-\xBF]|\xE4[\xB8-\xBF][\x80-\xBF]|\xE9[\x80-\xBE][\x80-\xBF]|\xE9\xBF[\x80-\xBC]
*/
package main
import (
"fmt"
"os"
"strconv"
"strings"
. "unsafe"
)
func boolToByte(b bool) byte {return *(*byte)(Pointer(&b))}
func containsOnly(s string, b byte) bool {for i:=0; i<len(s); i++ {if s[i]!=b {return false}}; return true}
func sign(b byte) int8 {const byteSize=8; isntZero:=b!=0; return int8(b)>>(byteSize-1) | *(*int8)(Pointer(&isntZero))}
var (byteRange string; Repeat=strings.Repeat)
func f(p, a, b string) string {d := [2]byte{}
for len(a)>=2{ if a[0]==b[0]{p+=a[:1]; a,b=a[1:],b[1:]; continue}
d[0], d[1] = boolToByte(!containsOnly(a[1:], byteRange[1])), boolToByte(!containsOnly(b[1:], byteRange[3])); break }
return p + func()string{ ad,bd:=a[0]+d[0],b[0]-d[1]
switch sign(bd-ad) {
case +1: return "["+escapeNotPrintableASCII(ad)+"-"+escapeNotPrintableASCII(bd)+"]" + Repeat(byteRange,len(a)-1)
case 0: return escapeNotPrintableASCII(ad) + Repeat(byteRange,len(a)-1)
}; return ""
} () +
func() string {if d[0]==1 {return "|" + f(p+a[:1], a[1:], Repeat(byteRange[3:4], len(a[1:])))}; return ""} () +
func() string {if d[1]==1 {return "|" + f(p+b[:1], Repeat(byteRange[1:2], len(b[1:])), b[1:])}; return ""} ()
}
func main() {
panicCheck := func(e error) {if e!=nil {panic(e)}}
a := func()[]string {
a,e:=strconv.Unquote(os.Args[1]); panicCheck(e)
b := strings.SplitN(a, " ", 2)
byteRange,a = func()string{
if len(b)==2 {return b[0]}
return "[\x80-\xBF]"
} (),b[len(b)-1]
return strings.SplitN(a, "-", 2)
} ()
fmt.Println(quoteToASCII(strings.TrimPrefix(f("", a[0], a[1]), "|")))
}
func escapeNotPrintableASCII(b byte) string {
if 0x20 <= b && b <= 0x7E {return string(b)}; return fmt.Sprintf("\\x%02X", b) }
func quoteToASCII(s string) string{ sb := &strings.Builder{}
for i:=0; i<len(s); i++ {sb.WriteString(escapeNotPrintableASCII(s[i]))}; return sb.String()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment