Created
October 16, 2013 21:02
-
-
Save tingley/7014806 to your computer and use it in GitHub Desktop.
Scala script to parse and analyze large numbers of jstack dumps in order to create an extremely basic profiler. This is currently set up to be used with GlobalSight, but it could be adapted for other uses as well.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.File | |
import scala.collection.mutable | |
// A tool to digest and visualize jstack traces, written poorly in scala. | |
// Usage | |
// scala jstack.scala -t [regex] [files] | |
// where | |
// [regex] is a regular expression applied to thread names in the jstack | |
// dumps. Matching threads will be processed. The -t option | |
// can be specified multiple times. | |
// [files] is one or more files or directories to process. Files should be | |
// jstack dumps; directories are expected to contain only jstack | |
// dumps. | |
// Output is written to stdout. It will be very wide, so capturing via | |
// redirection is recommended. | |
// | |
// The stack data will be merged into a call tree that tracks how often | |
// each execution point is seen in a stack. The options below | |
// (not exposed via the UI) allow you to trim the tree in both directions | |
// to only capture calls bracketed by a certain codebase (in this case, | |
// com.globalsight). The entire tree is dumped as output. Following that, | |
// leaf nodes with more than a configurable percentage of the observed | |
// data will be identified as "hot spots". | |
// | |
// This could go a lot further. In particular: | |
// - Find out how better to merge stacks at the bottom as well | |
// - Figure out how to identify hot nodes in the middle of the tree | |
// - I probably need a better trimming mechanism. | |
// - Expose options via command line | |
// - etc | |
val OPTION_TRIM_FROM_BOTTOM = true // Trim from the bottom? | |
val OPTION_CLEAN_FROM_TOP = true // Clean from the top? | |
val WARN_UNRECOGNIZED = false // for debugging | |
val HOTSPOT_THRESHOLD = 0.02 // threshold for hot spots (% / 100) | |
val PACKAGE_PREFIX = "com.globalsight" // Code we care about | |
class StackThread(n : String) { | |
val name : String = n | |
var stackFrames : List[StackFrame] = null | |
override def toString() = "Thread '" + name + "'" | |
} | |
case class StackFrame(method : String, location : String) { | |
override def toString() = { | |
val colon = location.indexOf(":") | |
val num = if (colon >= 0) location.substring(colon + 1, location.length()) | |
else location | |
method + ":" + num | |
} | |
} | |
object State extends Enumeration { | |
type State = Value | |
val Initial, SeenDate, Body, SeenHeader, InThread, SeenThread, | |
Synchronizers = Value | |
} | |
import State._ | |
// Thread header regexes | |
val ThreadHeaderRegex = """"(.+)" (?:daemon )?prio=\d+ tid=(0x[0-9a-f]+) nid=0x[0-9a-f]+ (.*)""".r | |
val ThreadStateRegex = """\s*java.lang.Thread.State: (\w+)""".r | |
// Thread body regexes | |
val StackFrameRegex = """\s*at ([^(]+)\((.+)\)""".r | |
val WaitingRegex = """\s+- waiting on (.*)""".r | |
val LockedRegex = """\s+- locked (.*)""".r | |
val ParkingRegex = """\s+- parking (.*)""".r | |
var threads : List[StackThread] = List() | |
def parse(files : Seq[File]) : Unit = { | |
for (file <- files) { | |
//println("Parsing " + file) | |
var currentThread : StackThread = null | |
var state : State = Initial | |
var frames : List[StackFrame] = List() | |
var wantThread = false; | |
for (line <- scala.io.Source.fromFile(file).getLines) { | |
state match { | |
case Initial => { | |
state = _state(SeenDate, line) | |
} | |
case SeenDate => { | |
state = _state(Body, line) | |
frames = List() | |
} | |
case Body => { | |
if (line.trim != "") { | |
val ThreadHeaderRegex(name, tid, loc) = line | |
state = _state(SeenHeader, line) | |
require (currentThread == null); | |
// TODO: filter by name so we don't collect useless data | |
currentThread = new StackThread(name) | |
} | |
} | |
case SeenHeader => { | |
// Parse ThreadState | |
state = _state(InThread, line) | |
} | |
case InThread => { | |
// Need to be able to test multiple regexes | |
if (line.trim == "") { | |
state = _state(SeenThread, line) | |
currentThread.stackFrames = | |
if (OPTION_TRIM_FROM_BOTTOM) trimFrames(frames) else frames | |
if (OPTION_CLEAN_FROM_TOP) { | |
currentThread.stackFrames = cleanThread(currentThread.stackFrames) | |
if (currentThread.stackFrames.length == 0) { | |
// Pretend this thread never happened | |
currentThread = null | |
} | |
// Special case: strip automatic import threads that are | |
// sleeping | |
else if (currentThread.stackFrames.last.method.contains("AutomaticImportMonitor.sleepUntilNextPoll")) { | |
currentThread = null | |
} | |
} | |
frames = List() | |
} | |
else if (StackFrameRegex.findFirstIn(line) != None) { | |
frames ::= makeStackFrame(line) | |
} | |
else if (WaitingRegex.findFirstIn(line) != None) { | |
// no-op | |
} | |
else if (LockedRegex.findFirstIn(line) != None) { | |
// no-op | |
} | |
else if (ParkingRegex.findFirstIn(line) != None) { | |
// no-op | |
} | |
else { | |
if (WARN_UNRECOGNIZED) println("* Unrecognized: [" + line + "]") | |
} | |
} | |
case SeenThread => { | |
val t = line.trim | |
if (t == "Locked ownable synchronizers:") { | |
state = _state(Synchronizers, line) | |
} | |
else if (t != "") { | |
if (WARN_UNRECOGNIZED) println("* Unrecognized: [" + line + "]") | |
} | |
} | |
case Synchronizers => { | |
// TODO: handle cases where this is not trivial | |
if (line.trim == "") { | |
state = _state(Body, line) | |
if (currentThread != null) { | |
threads ::= currentThread | |
} | |
currentThread = null | |
} | |
} | |
case _ => ; //println(line) | |
} | |
} | |
} | |
} | |
def cleanThread(frames : List[StackFrame]) : List[StackFrame] = { | |
frames match { | |
case head :: tail => { | |
if (head.method.startsWith(PACKAGE_PREFIX)) | |
head :: tail | |
else | |
cleanThread(tail) | |
} | |
case nil => nil | |
} | |
} | |
def trimFrames(frames : List[StackFrame]) : List[StackFrame] = { | |
var result : List[StackFrame] = List() | |
var b = 0 | |
for (f <- frames.reverse) { | |
if (b == 0) { | |
if (f.method.startsWith(PACKAGE_PREFIX)) { | |
b = 1 | |
result ::= f | |
} | |
} | |
else { | |
result ::= f | |
} | |
} | |
result | |
} | |
def _state(s : State, l : String) : State = { | |
//println(s + "<-- [" + l + "]") | |
s | |
} | |
def makeStackFrame(line : String) : StackFrame = { | |
try { | |
val StackFrameRegex(method, location) = line | |
new StackFrame(method, location) | |
} | |
catch { | |
case e: MatchError => return null | |
} | |
} | |
class StackTreeNode(f : StackFrame) { | |
val stackFrame = f | |
var count : Int = 1 | |
var children = mutable.Map.empty[StackFrame, StackTreeNode] | |
override def toString() = "[" + stackFrame + ", " + count + "]" | |
} | |
// Build a huge tree | |
def analyzeThreads(threads : Iterable[StackThread], | |
threadNames : Set[String]) : (StackTreeNode, Int) = { | |
val root = new StackTreeNode(null) | |
var threadCount = 0 | |
for {t <- threads | |
pattern <- threadNames | |
if t.name.matches(pattern)} { | |
threadCount += 1 | |
var node = root | |
for (f <- t.stackFrames) { | |
val c = node.children.get(f) | |
c match { | |
case Some(childNode) => { | |
childNode.count += 1 | |
node = childNode | |
} | |
case None => { | |
val n = new StackTreeNode(f) | |
node.children += (f -> n) | |
node = n | |
} | |
} | |
} | |
} | |
(root, threadCount) | |
} | |
def dumpThread(t : StackThread) : Unit = { | |
println(t) | |
for (f <- t.stackFrames) { | |
println("\t" + f) | |
} | |
} | |
def dump(n : StackTreeNode, indent : String) : Unit = { | |
println(indent + " " + n.stackFrame + " [" + n.count + "]") | |
for (c <- n.children.values) { | |
dump(c, indent + " ") | |
} | |
} | |
def dumpHotLeaves(n : StackTreeNode, leafCount : Int) : Unit = { | |
// Look for things with more than 3% total | |
val threshold = (leafCount * HOTSPOT_THRESHOLD).toInt | |
println(leafCount + " leaf nodes; printing those with >= " | |
+ threshold + " hits") | |
def search(n : StackTreeNode) : Unit = { | |
if (n.children.size == 0) { | |
if (n.count >= threshold) { | |
println(n) | |
} | |
} | |
else { | |
for (child <- n.children.values) { | |
search(child) | |
} | |
} | |
} | |
search(n) | |
} | |
// | |
// Here's the actual program | |
// Notes: | |
// - My scala sucks | |
// - Threadnames can be (Java) regexes | |
// - items in the file list can be directories | |
val usage = """ | |
Usage: ProcessJStack [-t thread1 -t thread2 ...] [files] | |
""" | |
if (argv.length == 0) { | |
println(usage) | |
sys.exit | |
} | |
def parseArg(threads : List[String], files : List[String], | |
list : List[String]) : (List[String], List[String]) = { | |
list match { | |
case Nil => (threads, files); | |
case "-t" :: threadName :: tail => | |
parseArg(threadName :: threads, files, tail); | |
case fileName :: tail => | |
parseArg(threads, fileName :: files, tail); | |
} | |
} | |
val (threadNames, fileNames) = parseArg(List(), List(), argv.toList) | |
var fileList = List[File]() | |
for (fileName <- fileNames) { | |
val f = new File(fileName) | |
if (f.isDirectory()) fileList ++= f.listFiles().toList | |
else fileList +:= f | |
} | |
parse(fileList) | |
println("Found " + threads.size + " threads") | |
val (root, threadCount) = analyzeThreads(threads, threadNames.toSet) | |
for (n <- root.children.values) | |
dump(n, "") | |
dumpHotLeaves(root, threadCount) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment