tingley · October 16, 2013 21:02
diff --git a/jstack.scala b/jstack.scala
 import java.io.File
 import scala.collection.mutable

 // A tool to digest and visualize jstack traces, written poorly in scala.

 // Usage
 //   scala jstack.scala -t [regex] [files]
 // where
 //   [regex] is a regular expression applied to thread names in the jstack 
 //           dumps.  Matching threads will be processed.  The -t option
 //           can be specified multiple times.
 //   [files] is one or more files or directories to process.  Files should be
 //           jstack dumps; directories are expected to contain only jstack
 //           dumps.
 // Output is written to stdout. It will be very wide, so capturing via 
 // redirection is recommended.
 // 
 // The stack data will be merged into a call tree that tracks how often
 // each execution point is seen in a stack.  The options below
 // (not exposed via the UI) allow you to trim the tree in both directions
 // to only capture calls bracketed by a certain codebase (in this case,
 // com.globalsight).  The entire tree is dumped as output.  Following that,
 // leaf nodes with more than a configurable percentage of the observed
 // data will be identified as "hot spots".
 //
 // This could go a lot further.  In particular:
 // - Find out how better to merge stacks at the bottom as well
 // - Figure out how to identify hot nodes in the middle of the tree
 // - I probably need a better trimming mechanism.
 // - Expose options via command line
 // - etc

 val OPTION_TRIM_FROM_BOTTOM = true // Trim from the bottom?
 val OPTION_CLEAN_FROM_TOP = true // Clean from the top?
 val WARN_UNRECOGNIZED = false // for debugging
 val HOTSPOT_THRESHOLD = 0.02 // threshold for hot spots (% / 100)

 val PACKAGE_PREFIX = "com.globalsight" // Code we care about

 class StackThread(n : String) {
  val name : String = n 
  var stackFrames : List[StackFrame] = null

  override def toString() = "Thread '" + name + "'"
 }

 case class StackFrame(method : String, location : String) {
  override def toString() = {
    val colon = location.indexOf(":")
    val num = if (colon >= 0) location.substring(colon + 1, location.length())
              else location
    method + ":" + num
  }
 }

 object State extends Enumeration {
  type State = Value
  val Initial, SeenDate, Body, SeenHeader, InThread, SeenThread, 
              Synchronizers = Value
 }

 import State._

 // Thread header regexes
 val ThreadHeaderRegex = """"(.+)" (?:daemon )?prio=\d+ tid=(0x[0-9a-f]+) nid=0x[0-9a-f]+ (.*)""".r
 val ThreadStateRegex = """\s*java.lang.Thread.State: (\w+)""".r
 // Thread body regexes
 val StackFrameRegex = """\s*at ([^(]+)\((.+)\)""".r
 val WaitingRegex = """\s+- waiting on (.*)""".r
 val LockedRegex = """\s+- locked (.*)""".r
 val ParkingRegex = """\s+- parking (.*)""".r

 var threads : List[StackThread] = List()

 def parse(files : Seq[File]) : Unit = {
  for (file <- files) {
    //println("Parsing " + file)
    var currentThread : StackThread = null
    var state : State = Initial
    var frames : List[StackFrame]  = List()
    var wantThread = false;
    for (line <- scala.io.Source.fromFile(file).getLines) {
      state match {
        case Initial => {
          state = _state(SeenDate, line)
        }
        case SeenDate => {
          state = _state(Body, line)
          frames = List()
        }
        case Body => {
          if (line.trim != "") {
            val ThreadHeaderRegex(name, tid, loc) = line
            state = _state(SeenHeader, line)
            require (currentThread == null);
            // TODO: filter by name so we don't collect useless data
            currentThread = new StackThread(name)
          }
        }
        case SeenHeader => {
          // Parse ThreadState
          state = _state(InThread, line)
        }
        case InThread => {
          // Need to be able to test multiple regexes
          if (line.trim == "") {
            state = _state(SeenThread, line)
            currentThread.stackFrames = 
                if (OPTION_TRIM_FROM_BOTTOM) trimFrames(frames) else frames
            if (OPTION_CLEAN_FROM_TOP) {
              currentThread.stackFrames = cleanThread(currentThread.stackFrames)
              if (currentThread.stackFrames.length == 0) {
                // Pretend this thread never happened
                currentThread = null
              }
              // Special case: strip automatic import threads that are
              // sleeping
              else if (currentThread.stackFrames.last.method.contains("AutomaticImportMonitor.sleepUntilNextPoll")) {
                currentThread = null
              }
            }
            frames = List()
          }
          else if (StackFrameRegex.findFirstIn(line) != None) {
              frames ::= makeStackFrame(line)
          }
          else if (WaitingRegex.findFirstIn(line) != None) {
              // no-op
          }
          else if (LockedRegex.findFirstIn(line) != None) {
              // no-op
          }
          else if (ParkingRegex.findFirstIn(line) != None) {
              // no-op
          }
          else {
            if (WARN_UNRECOGNIZED) println("* Unrecognized: [" + line + "]")
          }
        }
        case SeenThread => {
          val t = line.trim
          if (t == "Locked ownable synchronizers:") {
            state = _state(Synchronizers, line)
          }
          else if (t != "") {
            if (WARN_UNRECOGNIZED) println("* Unrecognized: [" + line + "]")
          }
        }
        case Synchronizers => {
            // TODO: handle cases where this is not trivial
            if (line.trim == "")  {
              state = _state(Body, line)
              if (currentThread != null) {
                threads ::= currentThread
              }
              currentThread = null
            }
        }
        case _ => ; //println(line)
      }
    }
  }
 }

 def cleanThread(frames : List[StackFrame]) : List[StackFrame] = {
  frames match {
    case head :: tail => {
      if (head.method.startsWith(PACKAGE_PREFIX))
        head :: tail 
      else
        cleanThread(tail)
    }
    case nil => nil
  }
 }

 def trimFrames(frames : List[StackFrame]) : List[StackFrame] = {
  var result : List[StackFrame] = List()
  var b = 0
  for (f <- frames.reverse) {
    if (b == 0) {
      if (f.method.startsWith(PACKAGE_PREFIX)) {
        b = 1
        result ::= f
      }
    }
    else {
        result ::= f
    }
  }
  result
 }

 def _state(s : State, l : String) : State = {
  //println(s + "<-- [" + l + "]")
  s
 }

 def makeStackFrame(line : String) : StackFrame = {
  try {
    val StackFrameRegex(method, location) = line
    new StackFrame(method, location)
  }
  catch {
    case e: MatchError => return null
  }
 }

 class StackTreeNode(f : StackFrame) {
  val stackFrame = f
  var count : Int = 1
  var children = mutable.Map.empty[StackFrame, StackTreeNode]

  override def toString() = "[" + stackFrame + ", " + count + "]"
 }

 // Build a huge tree
 def analyzeThreads(threads : Iterable[StackThread], 
                   threadNames : Set[String]) : (StackTreeNode, Int) = {
  val root = new StackTreeNode(null)
  var threadCount = 0
  for {t <- threads
       pattern <- threadNames
       if t.name.matches(pattern)} {
    threadCount += 1
    var node = root
    for (f <- t.stackFrames) {
      val c = node.children.get(f)
      c match {
        case Some(childNode) => {
          childNode.count += 1
          node = childNode
        }
        case None => { 
          val n = new StackTreeNode(f)
          node.children += (f -> n)
          node = n
        }
      }
    }
  }
  (root, threadCount)
 }

 def dumpThread(t : StackThread) : Unit = {
  println(t)
  for (f <- t.stackFrames) {
    println("\t" + f)
  }
 }

 def dump(n : StackTreeNode, indent : String) : Unit = {
  println(indent + " " + n.stackFrame + " [" + n.count + "]")
  for (c <- n.children.values) {
    dump(c, indent + "  ")
  }
 }

 def dumpHotLeaves(n : StackTreeNode, leafCount : Int) : Unit = {
  // Look for things with more than 3% total 
  val threshold = (leafCount * HOTSPOT_THRESHOLD).toInt
  println(leafCount + " leaf nodes; printing those with >= " 
          + threshold + " hits")
  def search(n : StackTreeNode) : Unit = {
    if (n.children.size == 0) {
      if (n.count >= threshold) {
        println(n)
      }
    }
    else {
      for (child <- n.children.values) {
        search(child)
      }
    }
  }
  search(n)
 }

 // 
 // Here's the actual program
 // Notes:
 // - My scala sucks
 // - Threadnames can be (Java) regexes
 // - items in the file list can be directories
 val usage = """
 Usage: ProcessJStack [-t thread1 -t thread2 ...] [files]
 """
 if (argv.length == 0) {
  println(usage)
  sys.exit
 }
 def parseArg(threads : List[String], files : List[String], 
               list : List[String]) : (List[String], List[String]) = {
  list match {
    case Nil => (threads, files);
    case "-t" :: threadName :: tail =>
        parseArg(threadName :: threads, files, tail);
    case fileName :: tail =>
        parseArg(threads, fileName :: files, tail);
  }
 }
 val (threadNames, fileNames) = parseArg(List(), List(), argv.toList)

 var fileList = List[File]()
 for (fileName <- fileNames) {
  val f = new File(fileName)
  if (f.isDirectory()) fileList ++= f.listFiles().toList
  else fileList +:= f
 }

 parse(fileList)
 println("Found " + threads.size + " threads")

 val (root, threadCount) = analyzeThreads(threads, threadNames.toSet)
 for (n <- root.children.values)
  dump(n, "")
 dumpHotLeaves(root, threadCount)
	import java.io.File
	import scala.collection.mutable

	// A tool to digest and visualize jstack traces, written poorly in scala.

	// Usage
	// scala jstack.scala -t [regex] [files]
	// where
	// [regex] is a regular expression applied to thread names in the jstack
	// dumps. Matching threads will be processed. The -t option
	// can be specified multiple times.
	// [files] is one or more files or directories to process. Files should be
	// jstack dumps; directories are expected to contain only jstack
	// dumps.
	// Output is written to stdout. It will be very wide, so capturing via
	// redirection is recommended.
	//
	// The stack data will be merged into a call tree that tracks how often
	// each execution point is seen in a stack. The options below
	// (not exposed via the UI) allow you to trim the tree in both directions
	// to only capture calls bracketed by a certain codebase (in this case,
	// com.globalsight). The entire tree is dumped as output. Following that,
	// leaf nodes with more than a configurable percentage of the observed
	// data will be identified as "hot spots".
	//
	// This could go a lot further. In particular:
	// - Find out how better to merge stacks at the bottom as well
	// - Figure out how to identify hot nodes in the middle of the tree
	// - I probably need a better trimming mechanism.
	// - Expose options via command line
	// - etc

	val OPTION_TRIM_FROM_BOTTOM = true // Trim from the bottom?
	val OPTION_CLEAN_FROM_TOP = true // Clean from the top?
	val WARN_UNRECOGNIZED = false // for debugging
	val HOTSPOT_THRESHOLD = 0.02 // threshold for hot spots (% / 100)

	val PACKAGE_PREFIX = "com.globalsight" // Code we care about

	class StackThread(n : String) {
	val name : String = n
	var stackFrames : List[StackFrame] = null

	override def toString() = "Thread '" + name + "'"
	}

	case class StackFrame(method : String, location : String) {
	override def toString() = {
	val colon = location.indexOf(":")
	val num = if (colon >= 0) location.substring(colon + 1, location.length())
	else location
	method + ":" + num
	}
	}

	object State extends Enumeration {
	type State = Value
	val Initial, SeenDate, Body, SeenHeader, InThread, SeenThread,
	Synchronizers = Value
	}

	import State._

	// Thread header regexes
	val ThreadHeaderRegex = """"(.+)" (?:daemon )?prio=\d+ tid=(0x[0-9a-f]+) nid=0x[0-9a-f]+ (.*)""".r
	val ThreadStateRegex = """\s*java.lang.Thread.State: (\w+)""".r
	// Thread body regexes
	val StackFrameRegex = """\s*at ([^(]+)\((.+)\)""".r
	val WaitingRegex = """\s+- waiting on (.*)""".r
	val LockedRegex = """\s+- locked (.*)""".r
	val ParkingRegex = """\s+- parking (.*)""".r

	var threads : List[StackThread] = List()

	def parse(files : Seq[File]) : Unit = {
	for (file <- files) {
	//println("Parsing " + file)
	var currentThread : StackThread = null
	var state : State = Initial
	var frames : List[StackFrame] = List()
	var wantThread = false;
	for (line <- scala.io.Source.fromFile(file).getLines) {
	state match {
	case Initial => {
	state = _state(SeenDate, line)
	}
	case SeenDate => {
	state = _state(Body, line)
	frames = List()
	}
	case Body => {
	if (line.trim != "") {
	val ThreadHeaderRegex(name, tid, loc) = line
	state = _state(SeenHeader, line)
	require (currentThread == null);
	// TODO: filter by name so we don't collect useless data
	currentThread = new StackThread(name)
	}
	}
	case SeenHeader => {
	// Parse ThreadState
	state = _state(InThread, line)
	}
	case InThread => {
	// Need to be able to test multiple regexes
	if (line.trim == "") {
	state = _state(SeenThread, line)
	currentThread.stackFrames =
	if (OPTION_TRIM_FROM_BOTTOM) trimFrames(frames) else frames
	if (OPTION_CLEAN_FROM_TOP) {
	currentThread.stackFrames = cleanThread(currentThread.stackFrames)
	if (currentThread.stackFrames.length == 0) {
	// Pretend this thread never happened
	currentThread = null
	}
	// Special case: strip automatic import threads that are
	// sleeping
	else if (currentThread.stackFrames.last.method.contains("AutomaticImportMonitor.sleepUntilNextPoll")) {
	currentThread = null
	}
	}
	frames = List()
	}
	else if (StackFrameRegex.findFirstIn(line) != None) {
	frames ::= makeStackFrame(line)
	}
	else if (WaitingRegex.findFirstIn(line) != None) {
	// no-op
	}
	else if (LockedRegex.findFirstIn(line) != None) {
	// no-op
	}
	else if (ParkingRegex.findFirstIn(line) != None) {
	// no-op
	}
	else {
	if (WARN_UNRECOGNIZED) println("* Unrecognized: [" + line + "]")
	}
	}
	case SeenThread => {
	val t = line.trim
	if (t == "Locked ownable synchronizers:") {
	state = _state(Synchronizers, line)
	}
	else if (t != "") {
	if (WARN_UNRECOGNIZED) println("* Unrecognized: [" + line + "]")
	}
	}
	case Synchronizers => {
	// TODO: handle cases where this is not trivial
	if (line.trim == "") {
	state = _state(Body, line)
	if (currentThread != null) {
	threads ::= currentThread
	}
	currentThread = null
	}
	}
	case _ => ; //println(line)
	}
	}
	}
	}

	def cleanThread(frames : List[StackFrame]) : List[StackFrame] = {
	frames match {
	case head :: tail => {
	if (head.method.startsWith(PACKAGE_PREFIX))
	head :: tail
	else
	cleanThread(tail)
	}
	case nil => nil
	}
	}

	def trimFrames(frames : List[StackFrame]) : List[StackFrame] = {
	var result : List[StackFrame] = List()
	var b = 0
	for (f <- frames.reverse) {
	if (b == 0) {
	if (f.method.startsWith(PACKAGE_PREFIX)) {
	b = 1
	result ::= f
	}
	}
	else {
	result ::= f
	}
	}
	result
	}

	def _state(s : State, l : String) : State = {
	//println(s + "<-- [" + l + "]")
	s
	}

	def makeStackFrame(line : String) : StackFrame = {
	try {
	val StackFrameRegex(method, location) = line
	new StackFrame(method, location)
	}
	catch {
	case e: MatchError => return null
	}
	}

	class StackTreeNode(f : StackFrame) {
	val stackFrame = f
	var count : Int = 1
	var children = mutable.Map.empty[StackFrame, StackTreeNode]

	override def toString() = "[" + stackFrame + ", " + count + "]"
	}

	// Build a huge tree
	def analyzeThreads(threads : Iterable[StackThread],
	threadNames : Set[String]) : (StackTreeNode, Int) = {
	val root = new StackTreeNode(null)
	var threadCount = 0
	for {t <- threads
	pattern <- threadNames
	if t.name.matches(pattern)} {
	threadCount += 1
	var node = root
	for (f <- t.stackFrames) {
	val c = node.children.get(f)
	c match {
	case Some(childNode) => {
	childNode.count += 1
	node = childNode
	}
	case None => {
	val n = new StackTreeNode(f)
	node.children += (f -> n)
	node = n
	}
	}
	}
	}
	(root, threadCount)
	}

	def dumpThread(t : StackThread) : Unit = {
	println(t)
	for (f <- t.stackFrames) {
	println("\t" + f)
	}
	}

	def dump(n : StackTreeNode, indent : String) : Unit = {
	println(indent + " " + n.stackFrame + " [" + n.count + "]")
	for (c <- n.children.values) {
	dump(c, indent + " ")
	}
	}

	def dumpHotLeaves(n : StackTreeNode, leafCount : Int) : Unit = {
	// Look for things with more than 3% total
	val threshold = (leafCount * HOTSPOT_THRESHOLD).toInt
	println(leafCount + " leaf nodes; printing those with >= "
	+ threshold + " hits")
	def search(n : StackTreeNode) : Unit = {
	if (n.children.size == 0) {
	if (n.count >= threshold) {
	println(n)
	}
	}
	else {
	for (child <- n.children.values) {
	search(child)
	}
	}
	}
	search(n)
	}

	//
	// Here's the actual program
	// Notes:
	// - My scala sucks
	// - Threadnames can be (Java) regexes
	// - items in the file list can be directories
	val usage = """
	Usage: ProcessJStack [-t thread1 -t thread2 ...] [files]
	"""
	if (argv.length == 0) {
	println(usage)
	sys.exit
	}
	def parseArg(threads : List[String], files : List[String],
	list : List[String]) : (List[String], List[String]) = {
	list match {
	case Nil => (threads, files);
	case "-t" :: threadName :: tail =>
	parseArg(threadName :: threads, files, tail);
	case fileName :: tail =>
	parseArg(threads, fileName :: files, tail);
	}
	}
	val (threadNames, fileNames) = parseArg(List(), List(), argv.toList)

	var fileList = List[File]()
	for (fileName <- fileNames) {
	val f = new File(fileName)
	if (f.isDirectory()) fileList ++= f.listFiles().toList
	else fileList +:= f
	}

	parse(fileList)
	println("Found " + threads.size + " threads")

	val (root, threadCount) = analyzeThreads(threads, threadNames.toSet)
	for (n <- root.children.values)
	dump(n, "")
	dumpHotLeaves(root, threadCount)