Skip to content

Instantly share code, notes, and snippets.

@CarsonSlovoka
Last active July 2, 2024 11:27
Show Gist options
  • Save CarsonSlovoka/15e12d87069af4e50a108b1a61ae5137 to your computer and use it in GitHub Desktop.
Save CarsonSlovoka/15e12d87069af4e50a108b1a61ae5137 to your computer and use it in GitHub Desktop.
EquivalentUnifiedIdeograph.txt
package main
import (
"fmt"
"regexp"
"strconv"
"strings"
)
func main() {
EquivalentUnifiedIdeograph()
}
func EquivalentUnifiedIdeograph() {
const data = `
2E81 ; 5382 # CJK RADICAL CLIFF
2E82 ; 4E5B # CJK RADICAL SECOND ONE
# ...
2ED5 ; 28E0F # CJK RADICAL MOUND ONE
2E8C..2E8D ; 5C0F # [2] CJK RADICAL SMALL ONE..CJK RADICAL SMALL TWO
`
lines := strings.Split(data, "\n")
re := regexp.MustCompile(`(\w+(?:\.\.\w+)?)\s*;\s*(\w+)\s*#\s*(.+)`)
for _, line := range lines {
if strings.HasPrefix(line, "#") || line == "" {
// 跳過註解或者空行
continue
}
m := re.FindStringSubmatch(line)
if strings.Contains(m[1], "..") {
// 表示是一個區間
elem := strings.Split(m[1], "..")
start := str2hex(elem[0])
end := str2hex(elem[1])
for codepoint := start; codepoint <= end; codepoint++ {
fmt.Printf("%c (%04X)\t%c (%04X)\t%s\n",
rune(codepoint), codepoint,
rune(str2hex(m[2])), str2hex(m[2]),
m[3],
)
}
} else {
fmt.Printf("%c (%04X)\t%c (%04X)\t%s\n",
rune(str2hex(m[1])), m[1],
rune(str2hex(m[2])), str2hex(m[2]),
m[3],
)
}
}
}
func str2hex(str string) int64 {
out, err := strconv.ParseInt(str,
16, // 字串的基底是指16進位
32, // 32 bit就夠用了
)
if err != nil {
panic(err)
}
return out
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment