Skip to content

Instantly share code, notes, and snippets.

@adjam
Created January 17, 2019 20:10
Show Gist options
  • Save adjam/f1e54514075788fc079adb44e8beceb1 to your computer and use it in GitHub Desktop.
Save adjam/f1e54514075788fc079adb44e8beceb1 to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"io"
"bytes"
"os"
"log"
"encoding/xml"
)
type ParserState struct {
File *os.File
LastSeenId string
In918 bool
InSubfieldA bool
LastRecordStart int64
Error error
}
func(state *ParserState) LastRecord() (bool, string) {
currPos, _ := state.File.Seek(state.LastRecordStart, io.SeekStart)
buf := make([]byte, 20000)
readLen, err := state.File.Read(buf); //, state.LastRecordStart)
if err != nil {
state.Error = err
return false,"encountered non-recoverable error"
}
recordEnd := bytes.Index(buf, []byte("</record>"))
if recordEnd != -1 {
culled := string(buf[0:recordEnd+9])
state.LastRecordStart = currPos + int64(readLen)
state.File.Seek(state.LastRecordStart, io.SeekStart)
return true, culled
} else {
return false, "<n/a>"
}
}
func dumpState(l *log.Logger, state ParserState) int {
l.Printf("Last seen catkey was %s\n", state.LastSeenId)
l.Printf("Last record element seen at %d\n", state.LastRecordStart)
available, contents := state.LastRecord()
if available {
l.Println("Record we were processing at the time")
l.Println("-----------")
l.Println(contents)
l.Println("-----------")
return 0
} else {
l.Println("Unable to retrieve record where error was encountered")
return 1
}
}
func check(e error, state ParserState) {
if e != nil {
fmt.Printf("Last seen catkey was %s\n", state.LastSeenId)
panic(e)
}
}
func main() {
l := log.New(os.Stderr, "", 0)
file, err:= os.Open(os.Args[1])
state := ParserState{file,"<not started>", false, false, 0, nil}
check(err , state)
defer file.Close()
decoder := xml.NewDecoder(file)
for {
currentPos := decoder.InputOffset()
t, err := decoder.Token()
if t == io.EOF {
break
}
if err != nil {
exit_status := dumpState(l, state)
os.Exit(exit_status)
}
switch v := t.(type) {
case xml.StartElement:
if v.Name.Local == "record" {
state.LastRecordStart = currentPos
}
if v.Name.Local == "datafield" {
for _, attr:= range v.Attr {
if attr.Name.Local == "tag" && attr.Value == "918" {
state.In918 = true
}
}
}
if v.Name.Local == "subfield" && state.In918 {
for _, attr := range v.Attr {
if attr.Name.Local == "code" && attr.Value == "a" {
state.InSubfieldA = true
}
}
}
case xml.CharData:
if state.InSubfieldA {
state.LastSeenId = string(v)
}
case xml.EndElement:
if state.In918 {
state.In918 = false
state.InSubfieldA = false
}
if state.InSubfieldA {
state.InSubfieldA = false
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment