Skip to content

Instantly share code, notes, and snippets.

@hhsprings
Last active August 4, 2024 11:57
Show Gist options
  • Save hhsprings/07d5fab81590aa016c8b43ad51aea3cf to your computer and use it in GitHub Desktop.
Save hhsprings/07d5fab81590aa016c8b43ad51aea3cf to your computer and use it in GitHub Desktop.
unpacking/repacking zip written by Golang
package main
import (
"archive/zip"
"archive/tar"
"compress/gzip"
//"compress/bzip2" // bzip2 package of standard library in Go 1.16 has no compression ability.
"github.com/dsnet/compress/bzip2"
"fmt"
"io"
"io/fs"
//"log"
"os"
"os/exec"
"path/filepath"
"strings"
"bytes"
"reflect"
"text/template"
"github.com/hellflame/argparse"
"github.com/axgle/mahonia"
"github.com/ulikunitz/xz"
"github.com/ulikunitz/xz/lzma"
"github.com/clbanning/mxj/v2"
"github.com/danwakefield/fnmatch"
)
/*
* =================================================
* constants, global variables, etc.
* =================================================
*/
const (
CONF_DEFAULT = `{{/*
configuration of zip2tar.
basically this file is a json-like format.
*/}}
{
"unpackers": {
"zip": ["7za", "x", "{{.Src}}"], {{/* if zip is encrypted, use this via --prefer_external_unpacker */}}
"lzh": ["lha", "x", "{{.Src}}"],
"7z": ["7za", "x", "{{.Src}}"],
"rar": ["py", "-m", "rarfile", "-e", "{{.Src}}", "."] {{/* python binding of libunrar */}}
},
"ignoredirpatterns": [
"__MACOSX",
".git",
".hg",
".svn"
]
}
`
)
var (
// dictCapExps maps preset values to exponent for dictionary
// capacity sizes.
lzmaDictCapExps = []uint{18, 20, 21, 22, 22, 23, 23, 24, 25, 26}
)
/*
* =================================================
* helper functions
* =================================================
*/
func abspath(p string) string {
r, _ := filepath.Abs(p)
return r
}
func pathJoin(topdir, subdir string) (string, bool) {
topdir = filepath.Clean(topdir)
subdir = filepath.Clean(subdir)
resdir := filepath.Join(topdir, subdir)
return resdir, strings.HasPrefix(
abspath(resdir), abspath(topdir) + string(os.PathSeparator))
}
func getatype(fn string, forread bool) (string, string) {
bn := strings.ToLower(filepath.Base(fn))
comps := strings.Split(bn, ".")
if len(comps) < 2 {
return "", ""
}
ext1, ext2 := comps[len(comps) - 2], comps[len(comps) - 1]
if ext1 == "tar" {
return ext1, ext2
} else if ext2 == "tar" {
return "tar", ""
} else if ext2 == "tgz" {
return "tar", "gz"
} else if ext2 == "tbz2" {
return "tar", "bz2"
} else if ext2 == "txz" {
return "tar", "xz"
} else if ext2 == "tlzma" {
return "tar", "lzma"
} else if ext2 == "zip" || ext2 == "egg" {
return "zip", ""
}
if forread {
r, err := zip.OpenReader(fn)
if err == nil {
r.Close()
return "zip", ""
}
}
return ext2, ""
}
func runExtnProc(args []string) error {
progpath, err := exec.LookPath(args[0])
if err != nil {
return err
}
cmd := exec.Command(progpath, args[1:]...)
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Run()
excd := cmd.ProcessState.ExitCode()
if excd != 0 {
return fmt.Errorf("external process had ended with status=%d", excd)
}
return nil
}
func runExtnUnpacker(args []string) (string, error) {
tdir, err := os.MkdirTemp("", "_tmp_" + filepath.Base(args[0]))
if err != nil {
return tdir, err
}
cdir, _ := filepath.Abs(".")
tdir, _ = filepath.Abs(tdir)
os.Chdir(tdir)
defer os.Chdir(cdir)
err = runExtnProc(args)
return tdir, err
}
/*
* =================================================
* unarch components
* =================================================
*/
type UnarchArgs struct {
src *string
dst *string
dirheadEncoding *string
unpackers_conf map[string][]string
ignoredirpatterns []string
prefer_external_unpacker *bool
pattern_match *string
pattern_exclude_match *string
do_list *bool
}
func (arg *UnarchArgs) Filter(ent Entry) bool {
path, _ := ent.Path(".")
flag := fnmatch.FNM_PATHNAME | fnmatch.FNM_LEADING_DIR
res := true
if *arg.pattern_match != "" {
res = res && fnmatch.Match(*arg.pattern_match, path, flag)
}
if res && *arg.pattern_exclude_match != "" {
res = res && !fnmatch.Match(*arg.pattern_exclude_match, path, flag)
}
dir, _ := filepath.Split(path)
for _, pp := range strings.Split(filepath.ToSlash(dir), "/") {
for _, pat := range arg.ignoredirpatterns {
res = res && !fnmatch.Match(pat, filepath.Clean(pp), flag)
if !res {
break
}
}
if !res {
break
}
}
return res
}
type Entry interface {
Path(destdir string) (string, error)
FileInfo() fs.FileInfo
Open() (/*io.ReadCloser*/io.Reader, error)
}
type EntryWriter interface {
Write(ent Entry) error
}
/*
*
*/
type ZipEntry struct {
Entry
f *zip.File
dirheadEncoding string
}
func (ent *ZipEntry) Path(destdir string) (string, error) {
name := ent.f.Name
if ent.f.NonUTF8 {
dec := mahonia.NewDecoder(ent.dirheadEncoding)
name = dec.ConvertString(name)
}
// Join, with checking for ZipSlip. More Info: http://bit.ly/2MsjAWE
fpath, valid := pathJoin(destdir, name)
if !valid {
return "", fmt.Errorf("%s: illegal file path", fpath)
}
return fpath, nil
}
func (ent *ZipEntry) FileInfo() fs.FileInfo {
return ent.f.FileInfo()
}
func (ent *ZipEntry) Open() (/*io.ReadCloser*/io.Reader, error) {
return ent.f.Open()
}
/*
*
*/
type TarEntry struct {
Entry
f *tar.Header
tr *tar.Reader
}
func (ent *TarEntry) Path(destdir string) (string, error) {
name := ent.f.Name
fpath, valid := pathJoin(destdir, name)
if !valid {
return "", fmt.Errorf("%s: illegal file path", fpath)
}
return fpath, nil
}
func (ent *TarEntry) FileInfo() fs.FileInfo {
return ent.f.FileInfo()
}
func (ent *TarEntry) Open() (/*io.ReadCloser*/io.Reader, error) {
return ent.tr, nil
}
/*
*
*/
type FsdiskEntry struct {
Entry
fpath string
vpath string
st os.FileInfo
}
func (ent *FsdiskEntry) Path(destdir string) (string, error) {
name := ent.vpath
fpath, _ := pathJoin(destdir, name)
return fpath, nil
}
func (ent *FsdiskEntry) FileInfo() fs.FileInfo {
return ent.st
}
func (ent *FsdiskEntry) Open() (/*io.ReadCloser*/io.Reader, error) {
return os.Open(ent.fpath)
}
/*
*
*/
type EntryListWriter struct {
EntryWriter
destdir string
}
func (wr *EntryListWriter) Write(ent Entry) error {
fpath, err := ent.Path(wr.destdir)
if err != nil {
return err
}
if ent.FileInfo().IsDir() {
return nil
}
fmt.Println(filepath.ToSlash(fpath))
//rc.Close()
return err
}
/*
*
*/
type EntryDiskWriter struct {
EntryWriter
destdir string
}
func (wr *EntryDiskWriter) Write(ent Entry) error {
fpath, err := ent.Path(wr.destdir)
if err != nil {
return err
}
if ent.FileInfo().IsDir() {
os.MkdirAll(fpath, os.ModePerm)
return nil
}
if err = os.MkdirAll(filepath.Dir(fpath), os.ModePerm); err != nil {
return err
}
rc, err := ent.Open()
if err != nil {
return err
}
outFile, err := os.OpenFile(fpath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, ent.FileInfo().Mode())
if err != nil {
return err
}
defer outFile.Close()
_, err = io.Copy(outFile, rc)
if err != nil {
return err
}
fmt.Println(filepath.ToSlash(fpath))
//rc.Close()
return err
}
/*
*
*/
type EntryZipWriter struct {
EntryWriter
dst *zip.Writer
}
func (wr *EntryZipWriter) Write(ent Entry) error {
fpath, err := ent.Path("")
if err != nil {
return err
}
if ent.FileInfo().IsDir() {
return nil
}
header, err := zip.FileInfoHeader(ent.FileInfo())
if err != nil {
return err
}
header.Name = fpath
header.Method = zip.Deflate
rc, err := ent.Open()
if err != nil {
return err
}
hdrwrt, err := wr.dst.CreateHeader(header)
if err != nil {
return err
}
_, err = io.Copy(hdrwrt, rc)
if err != nil {
return err
}
fmt.Println(filepath.ToSlash(fpath))
//rc.Close()
return err
}
/*
*
*/
type EntryTarWriter struct {
EntryWriter
dst *tar.Writer
}
func (wr *EntryTarWriter) Write(ent Entry) error {
fpath, err := ent.Path("")
if err != nil {
return err
}
if ent.FileInfo().IsDir() {
return nil
}
rc, err := ent.Open()
if err != nil {
return err
}
hdr := &tar.Header{
Name: fpath,
Mode: int64(ent.FileInfo().Mode()),
Size: int64(ent.FileInfo().Size()),
ModTime: ent.FileInfo().ModTime(),
Format: tar.FormatPAX,
}
if err := wr.dst.WriteHeader(hdr); err != nil {
return err
}
_, err = io.Copy(wr.dst, rc)
if err != nil {
return err
}
fmt.Println(filepath.ToSlash(fpath))
//rc.Close()
return err
}
/*
* ========================
* Unzip
* ========================
*/
func Unzip(arg UnarchArgs, writer EntryWriter) (error) {
r, err := zip.OpenReader(*arg.src)
if err != nil {
return err
}
defer r.Close()
for _, f := range r.File {
ent := ZipEntry{f: f, dirheadEncoding: *arg.dirheadEncoding}
if !arg.Filter(&ent) {
continue
}
err := writer.Write(&ent)
if err != nil {
fmt.Fprintf(os.Stderr, "%s: %s\n", ent.f.Name, err)
//return err
}
}
return nil
}
/*
* ========================
* Untar
* ========================
*/
func uncompReader(fn string) (io.Reader, error) {
_, ext2 := getatype(fn, true)
r, err := os.Open(fn)
if ext2 == "" || err != nil {
return r, err
}
if ext2 == "gz" {
return gzip.NewReader(r)
} else if ext2 == "bz2" {
return bzip2.NewReader(r, nil)
} else if ext2 == "xz" {
return xz.NewReader(r)
} else if ext2 == "lzma" {
return lzma.NewReader(r)
}
panic("unknown format")
}
func Untar(arg UnarchArgs, writer EntryWriter) error {
r, err := uncompReader(*arg.src)
if err != nil {
return err
}
tr := tar.NewReader(r)
for {
header, err := tr.Next()
switch {
case err == io.EOF:
return nil
case err != nil:
return err
case header == nil:
continue
}
ent := TarEntry{tr: tr, f: header}
if !arg.Filter(&ent) {
continue
}
err = writer.Write(&ent)
if err != nil {
fmt.Fprintf(os.Stderr, "%s: %s\n", ent.f.Name, err)
//return err
}
}
return nil
}
/*
* ========================
* Invoke external unpacker
* ========================
*/
func Unany(cmd []string, arg UnarchArgs, writer EntryWriter) error {
tdir, err := runExtnUnpacker(cmd)
if err != nil {
return err
}
defer os.RemoveAll(tdir)
*arg.src = tdir
return Dirwalk(arg, writer)
}
/*
* ========================
* Dir walk
* ========================
*/
func Dirwalk(arg UnarchArgs, writer EntryWriter) error {
root := *arg.src
walk := func(fn string, fi os.FileInfo, err error) error {
if err != nil {
return err
}
fpath := fn
vpath, _ := filepath.Rel(root, fn)
ent := FsdiskEntry{fpath: fpath, vpath: vpath, st: fi}
if !arg.Filter(&ent) {
return nil
}
err = writer.Write(&ent)
if err != nil {
fmt.Fprintf(os.Stderr, "%s: %s\n", ent.fpath, err)
//return err
}
return nil
}
filepath.Walk(root, walk)
return nil
}
/*
* =================================================
* MAIN
* =================================================
*/
func setupConfs() (UnarchArgs, error) {
getconfpath := func () string {
home := os.Getenv("USERPROFILE")
if home == "" {
home = os.Getenv("HOME")
}
return filepath.Join(home, ".zip2tar.go.conf.json")
}
getconf := func () string {
confpath := getconfpath()
_, err := os.Stat(confpath)
var f *os.File
if err != nil {
f, _ = os.OpenFile(confpath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0744)
fmt.Fprintf(f, "%s\n", CONF_DEFAULT)
f.Close()
}
f, _ = os.Open(confpath)
defer f.Close()
cont, _ := io.ReadAll(f)
return string(cont)
}
parser := argparse.NewParser(os.Args[0], "zip2tar", nil)
arg := UnarchArgs{}
arg.dirheadEncoding = parser.String(
"e", "encoding",
&argparse.Option{
Default: "shiftjis",
Help: "specify encoding of directory header (only for zip)",
})
arg.prefer_external_unpacker = parser.Flag(
"U", "prefer_external_unpacker", &argparse.Option{})
arg.pattern_match = parser.String(
"m", "fnmatch",
&argparse.Option{})
arg.pattern_exclude_match = parser.String(
"x", "fnmatch_exclude",
&argparse.Option{})
arg.do_list = parser.Flag(
"l", "list", &argparse.Option{})
arg.src = parser.String("", "src", &argparse.Option{Positional: true})
arg.dst = parser.String("", "dst", &argparse.Option{Positional: true})
var err error
if err = parser.Parse(os.Args[1:]); err != nil {
return arg, err
}
*arg.src, _ = filepath.Abs(*arg.src)
conftmpl, err := template.New("zip2tarconf").Parse(getconf())
if err != nil {
return arg, err
}
var tmplout bytes.Buffer
esrc := strings.ReplaceAll(strings.ReplaceAll(*arg.src, `\`, `\\`), `"`, `\"`)
err = conftmpl.Execute(&tmplout, struct{Src string}{Src: esrc})
if err != nil {
return arg, err
}
arg.unpackers_conf = map[string][]string{}
conf, err := mxj.NewMapJson([]byte(tmplout.String()))
unpackers := conf["unpackers"].(map[string]interface{})
for k, v := range unpackers {
cmdi := reflect.ValueOf(v)
var cmd []string
for i := 0; i < cmdi.Len(); i++ {
cmd = append(cmd, fmt.Sprint(cmdi.Index(i)))
}
arg.unpackers_conf[k] = cmd
}
if conf["ignoredirpatterns"] != nil {
igndirpats := conf["ignoredirpatterns"].([]interface{})
for _, v := range igndirpats {
pati := reflect.ValueOf(v)
arg.ignoredirpatterns = append(arg.ignoredirpatterns, fmt.Sprint(pati))
}
}
return arg, err
}
func main() {
arg, err := setupConfs()
if err != nil {
panic(err)
}
st, err := os.Stat(*arg.src)
if err != nil {
panic(err)
}
oext1, oext2 := getatype(*arg.dst, false)
var writer EntryWriter
if *arg.do_list {
writer = &EntryListWriter{destdir: *arg.dst}
} else if oext1 == "zip" {
wr, err := os.OpenFile(*arg.dst, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0744)
if err == nil {
zwr := zip.NewWriter(wr)
writer = &EntryZipWriter{dst: zwr}
defer zwr.Close()
}
} else if oext1 == "tar" {
wr, err := os.OpenFile(*arg.dst, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0744)
if err == nil {
var twr *tar.Writer
if oext2 == "gz" {
gzwr, _ := gzip.NewWriterLevel(wr, 9)
defer gzwr.Close()
twr = tar.NewWriter(gzwr)
} else if oext2 == "bz2" {
bz2wr, _ := bzip2.NewWriter(wr, &bzip2.WriterConfig{Level: 9})
defer bz2wr.Close()
twr = tar.NewWriter(bz2wr)
} else if oext2 == "xz" {
cfg := xz.WriterConfig{DictCap: 1 << lzmaDictCapExps[9]}
xzwr, _ := cfg.NewWriter(wr)
defer xzwr.Close()
twr = tar.NewWriter(xzwr)
} else if oext2 == "lzma" {
// sample code copied from https://github.com/ulikunitz/xz/blob/v0.5.10/cmd/gxz/file.go
cfg := lzma.WriterConfig{
Properties: &lzma.Properties{LC: 3, LP: 0, PB: 2},
DictCap: 1 << lzmaDictCapExps[9],
}
xzwr, _ := cfg.NewWriter(wr)
defer xzwr.Close()
twr = tar.NewWriter(xzwr)
} else {
twr = tar.NewWriter(wr)
}
writer = &EntryTarWriter{dst: twr}
defer twr.Close()
}
} else {
writer = &EntryDiskWriter{destdir: *arg.dst}
}
if err == nil {
iext1, _ := getatype(*arg.src, true)
if st.IsDir() {
err = Dirwalk(arg, writer)
} else {
cmd, ke := arg.unpackers_conf[iext1]
if ke && (*arg.prefer_external_unpacker ||
(iext1 != "zip" && iext1 != "tar")) {
err = Unany(cmd, arg, writer)
} else {
if iext1 == "zip" {
err = Unzip(arg, writer)
} else {
err = Untar(arg, writer)
}
}
}
}
if err != nil {
panic(err)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment