Created
January 14, 2014 01:52
-
-
Save JodiTheTigger/8411686 to your computer and use it in GitHub Desktop.
gdbBacktraceToJson.py parses the output of the command "thread apply all bt full" and turns it into a json array. Useful for automating the analysis of coredump files generated when an application crashes. Use the tokenised json to search a database of crashes for similar crashes or make a nice web interface for viewing back traces. You could ma…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python2 | |
# | |
# gdbBacktraceToJson.py. Parses gdb backtraces into json. | |
# Copyright (C) 2014 Richard Maxwell <jodi.the.tigger@gmail.com> | |
# | |
# This program is free software: you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation, either version 3 of the License, or | |
# (at your option) any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# | |
# You should have received a copy of the GNU General Public License | |
# along with this program. If not, see <http://www.gnu.org/licenses/> | |
# | |
# Description: | |
# gdbBacktraceToJson.py parses the output of the command "thread apply all bt full" and turns it into a json array. | |
# Useful for automating the analysis of coredump files generated when an application crashes. Use the tokenised json | |
# to seach a database of crashes for similar crashes or make a nice web interface for viewing back traces. You could | |
# make a backtrace diff tool. It's much easier to use and write tools using a standard data format. | |
# Usage: | |
# python2 gdbBacktraceToJson.py <backtrace file> | |
# It will parse the file and output the backtrace as a json array to std out. | |
# you can get the backtrace file from a core dump file by running gdb in the following way: | |
# gdb [app with debug symbols] [core file] --eval-command "thread apply all bt full" --eval-command "quit" > mybacktrace.txt | |
import sys | |
import os | |
import re | |
import json | |
import string | |
import datetime | |
def parseLocals(lines): | |
result = {} | |
index = 0 | |
while index < len(lines): | |
simpleVars = re.match( r'\s*(.*)\s=\s(.*)\s*', lines[index], re.I|re.M) | |
if lines[index].find('{') == -1: | |
if simpleVars: | |
result[simpleVars.group(1)] = simpleVars.group(2).strip().strip(',') | |
else: | |
# find the closing brace. | |
closingIndex = index + 1 | |
closingIndexFound = -1 | |
depth = 1 | |
while closingIndex < len(lines): | |
closingIndexFound = closingIndex | |
if lines[closingIndex].find('{') != -1: | |
depth = depth + 1 | |
else: | |
if lines[closingIndex].find('}') != -1: | |
depth = depth - 1 | |
if depth < 1: | |
closingIndexFound = closingIndex | |
break | |
closingIndex = closingIndex + 1 | |
if closingIndexFound == -1: | |
# wtf? | |
print "*ERROR* Coreline: parseLocals: Can't find closing brace." | |
return result | |
# deal with nested braces using recursion. | |
joinedLines = '\n'.join(lines[index+1:closingIndex]) | |
if simpleVars: | |
result[simpleVars.group(1)] = parseLocals(lines[index+1:closingIndex]) | |
index = closingIndex | |
else: | |
return result | |
index = index + 1 | |
return result | |
def coreLinesToObject(coreLine): | |
coreObject = {} | |
# line format is: | |
# #frame [0x12345678] in (<function>) [from|at] [library|file] | |
# (?:....) means don't capture that group (?:) | |
matchResult = re.match( r'\#(\d+)\s+(?:(0x(?:[0-9A-F])*) in |)(\S+) (\((?:.|\n|\r)*\))(?: (?:at|from) (.*)|$)', coreLine, re.I|re.M) | |
if matchResult: | |
# matches are: | |
# 1: frame | |
# 2: address or no match | |
# 3: function name | |
# 4: argument list (including braces) | |
# 5: source / library | |
coreObject['frame'] = matchResult.group(1) | |
coreObject['address'] = matchResult.group(2) | |
coreObject['function'] = matchResult.group(3) | |
coreObject['source'] = matchResult.group(5) | |
coreObject['arguments'] = {} | |
# right, parse in the argument list | |
# arguments can have the @ symbol in them 'this@entry=0x12345678' | |
argSearch = re.findall( r'([\w@]+)=(\w+|<optimized out>)', matchResult.group(4), re.I|re.M) | |
for (argKey, argValue) in argSearch: | |
coreObject['arguments'][argKey] = argValue | |
# bt full stuff will come here. Stack variables and source files too. | |
arguments = coreLine.split('\n')[1:] | |
if len(arguments) > 1: | |
if coreObject['source'] == None: | |
sourceMatch = re.match( r'\s+(?:at|from) (.*)\w', arguments[0], re.I|re.M) | |
if sourceMatch: | |
coreObject['source'] = sourceMatch.group(1) | |
# parse the arguments. | |
coreObject['locals'] = parseLocals(arguments[1:]) | |
else: | |
# really should complain. | |
print "*ERROR* Coreline mismatch: ", coreLine | |
return coreObject | |
def textToList(filePath, fileText): | |
core = {} | |
core['filePath'] = filePath | |
core['fileName'] = os.path.splitext(os.path.basename(filePath))[0] | |
core['threads'] = [] | |
core['jsonCreationTimeUtc'] = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") | |
threadIndex = -1; | |
threadId = "" | |
multipleLines = "" | |
for line in fileText: | |
# Search for core dump global meta | |
# (command line and termination reason) | |
# Core was generated by `.....'. | |
# Program terminated with ... | |
# --------------------------------------- | |
if not core.has_key('commandLine'): | |
if line.find("Core was generated by") == 0: | |
# [23:-3] manually deduced so I can keep what's in quotes | |
# If I did it properly I would use a regex. | |
core['commandLine'] = line[23:-3] | |
if not core.has_key('coreReason'): | |
if line.find("Program terminated with") == 0: | |
# [:-1] remove line ending | |
core['coreReason'] = line[:-1] | |
# Parse core dumps per thread. | |
# --------------------------------------- | |
if line.find("Thread")== 0: | |
#right, make sure we purge the last line of the last stack trace please. | |
if len(multipleLines) > 0: | |
core['threads'][threadIndex]['stackTrace'].append(coreLinesToObject(multipleLines)) | |
multipleLines = ""; | |
threadIndex += 1 | |
threadId = line[:-2] | |
core['threads'].append({}) | |
core['threads'][threadIndex]['stackTrace'] = [] | |
threadResult = re.match( r'Thread\s+(\d+)\s+\(LWP\s+(\d+)\)', threadId, re.I|re.M) | |
if threadResult: | |
core['threads'][threadIndex]['threadId'] = threadResult.group(2) | |
core['threads'][threadIndex]['threadNumber'] = threadResult.group(1) | |
else: | |
print "*ERROR* ThreadId mismatch: ", threadId | |
core['threads'][threadIndex]['threadId'] = threadId | |
else: | |
if threadIndex > -1: | |
if len(line) > 0: | |
if len(multipleLines) > 0: | |
if line[0] == '#': | |
core['threads'][threadIndex]['stackTrace'].append(coreLinesToObject(multipleLines)) | |
multipleLines = line; | |
else: | |
multipleLines += line; | |
else: | |
if (line[0] == '#'): | |
multipleLines = line; | |
else: | |
if len(multipleLines) > 0: | |
core['threads'][threadIndex]['stackTrace'].append(coreLinesToObject(multipleLines)) | |
multipleLines = ""; | |
return core | |
# the filename is the name of the textual output of gdb's "thread apply all bt" | |
def process(argList): | |
fileName = argList[1] | |
coreDump = open(fileName, 'r') | |
lines = coreDump.readlines() | |
coreDump.close() | |
coreDumpObject = textToList(fileName, lines) | |
# right, dump the json | |
print json.dumps(coreDumpObject, sort_keys=True, indent=4) | |
# decode the first passed filename | |
process(sys.argv) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment