Skip to content

Instantly share code, notes, and snippets.

@yunginnanet
Created April 22, 2024 19:24
Show Gist options
  • Save yunginnanet/98cdc52223a40a99ebe766e532e9f601 to your computer and use it in GitHub Desktop.
Save yunginnanet/98cdc52223a40a99ebe766e532e9f601 to your computer and use it in GitHub Desktop.
used to extract corrupt/broken base64 encoded attachments from .eml message files
package main
import (
"bufio"
"bytes"
"encoding/base64"
"encoding/hex"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
)
var (
seen = map[string]struct{}{}
seenMu sync.RWMutex
concurrent = true
)
func main() {
_ = os.MkdirAll("output", 0755)
wg := &sync.WaitGroup{}
for _, arg := range os.Args[1:] {
if arg == "--slow" {
concurrent = false
continue
}
f := open(arg)
if concurrent {
wg.Add(1)
go extract(f, wg)
} else {
extract(f, wg)
}
}
if concurrent {
wg.Wait()
}
}
func extract(f io.ReadCloser, wg *sync.WaitGroup) {
defer func() {
_ = f.Close()
if concurrent {
wg.Done()
}
}()
xerox := bufio.NewScanner(f)
boundaries := map[string]struct{}{}
msgID := ""
contentType := ""
count := 0
inFile := false
totalFound := 0
fileBuf := bytes.Buffer{}
dumpFile := func() {
countStr := ""
if count > 1 {
countStr = "-" + strconv.Itoa(count)
}
fname := msgID + countStr + "." + contentType
fname = strings.TrimSuffix(fname, "\"")
dupStr := ""
dupInt := 0
for {
if dupInt > 0 {
dupStr = strconv.Itoa(dupInt)
if !strings.Contains(fname, ".") {
fname += "-" + dupStr
} else {
spl := strings.Split(fname, ".")
fname = spl[0] + "-" + dupStr + spl[1]
}
}
seenMu.RLock()
if _, ok := seen[fname]; !ok {
seenMu.RUnlock()
seenMu.Lock()
seen[fname] = struct{}{}
seenMu.Unlock()
break
}
seenMu.RUnlock()
dupInt++
}
datStr := string(fileBuf.Bytes())
dat, err := base64.StdEncoding.DecodeString(datStr)
if err != nil {
println("\tbase64: " + err.Error())
if corruption, ok := err.(base64.CorruptInputError); ok {
println("\tdumping remainder after corruption detected...")
_, _ = os.Stderr.WriteString(hex.Dump(fileBuf.Bytes()[int64(corruption):]))
}
}
totalFound += len(fileBuf.Bytes())
if err := os.WriteFile(filepath.Join("output", fname), dat, 0666); err != nil {
panic(err.Error())
}
println("\twrote " + fname)
fileBuf.Reset()
}
totalLen := 0
for xerox.Scan() {
totalLen += len(xerox.Bytes())
switch {
case inFile:
blankLine := strings.TrimSpace(xerox.Text()) == ""
_, boundaryFound := boundaries[strings.ReplaceAll(xerox.Text(), "-", "")]
if !boundaryFound {
for b := range boundaries {
if strings.Contains(xerox.Text(), b) {
boundaryFound = true
}
}
}
if blankLine || boundaryFound || strings.Contains(xerox.Text(), "--") {
inFile = false
count++
dumpFile()
} else {
_, _ = fileBuf.WriteString(xerox.Text())
}
case strings.Contains(xerox.Text(), "Content-Type"):
txt := strings.TrimPrefix(strings.TrimSpace(strings.ToLower(xerox.Text())), "content-type: ")
txt = strings.TrimSpace(strings.TrimSuffix(txt, ";"))
if !strings.Contains(txt, "text/plain") && strings.Contains(txt, "/") {
contentType = strings.Split(txt, "/")[1]
}
case strings.Contains(strings.ToLower(xerox.Text()), "message-id"):
txt := strings.TrimPrefix(strings.TrimSpace(xerox.Text()), "Message-Id: <")
txt = strings.TrimPrefix(txt, "Message-ID: <")
txt = strings.ReplaceAll(txt, "\"", "")
txt = strings.ReplaceAll(txt, "=", "")
txt = strings.ReplaceAll(txt, ";", "")
switch {
case strings.Contains(txt, "@"):
msgID = strings.Split(txt, "@")[0]
default:
msgID = strings.Split(txt, ">")[0]
}
case strings.Contains(strings.ToLower(xerox.Text()), "boundary="):
bnd := strings.ReplaceAll(strings.Split(xerox.Text(), "boundary=")[1], "\"", "")
bnd = strings.TrimSpace(bnd)
if len(bnd) > 0 {
boundaries[bnd] = struct{}{}
}
case strings.Contains(strings.ToLower(xerox.Text()), ": base64"):
inFile = true
for {
if !xerox.Scan() || xerox.Err() != nil {
if xerox.Err() == nil {
panic("eof while looking for newline after attachment start!")
}
println("\t" + xerox.Err().Error() + "while waiting for newline after attachment start")
break
}
if txt := strings.TrimSpace(xerox.Text()); txt != "" {
println("\twarn: missing newline after boundary?")
println("\tinstead of newline: " + txt)
// _, _ = fileBuf.WriteString(txt)
} else {
break
}
}
}
}
if inFile || fileBuf.Len() != 0 {
println("\terr: ohp! still in file but never got boundary or blank line")
count++
dumpFile()
}
if totalLen-totalFound > 20000 {
println(msgID + " lots of leftover!!")
}
}
func open(fs string) io.ReadCloser {
println("open: " + fs)
f, err := os.Open(fs)
if err != nil {
println(err.Error())
os.Exit(1)
}
return f
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment