In this gist you will find my first try to parse IIS (w3c extended format) log files. The specs of the log format can be found here: http://www.w3.org/TR/WD-logfile.html
Created
January 3, 2016 21:29
-
-
Save anonymous/686f79615e76164e3edc to your computer and use it in GitHub Desktop.
First try to parse IIS (w3c extended format log files with Scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// http://stackoverflow.com/questions/1284423/read-entire-file-in-scala | |
//val source = scala.io.Source.fromFile("G:\\test\\2015-12 Logging Eventhub\\W3SVC916189047\\u_ex151130.log") | |
val source = scala.io.Source.fromFile("testlog.log") | |
val lines = source.getLines() | |
//getLogItems(lines).take(5).foreach { line => println(line) } | |
getLogItems(lines).foreach { line => println(line) } | |
class LogItemsIterator(lines : Iterator[String]) extends Iterator[collection.mutable.Map[String, String]] { | |
var _nextLine = None : Option[collection.mutable.Map[String, String]]; | |
var _columns : Array[String] = null; | |
var _hasNext = None : Option[Boolean]; | |
def hasNext() : Boolean = { | |
_hasNext match { | |
case None => { | |
return !nextLine().isEmpty; | |
} | |
case Some(value) => return value; | |
} | |
} | |
def nextLine() : Option[collection.mutable.Map[String, String]] = { | |
var found : String = null; | |
// http://www.tutorialspoint.com/scala/scala_break_statement.htm | |
val loop = new scala.util.control.Breaks; | |
loop.breakable { | |
while (lines.hasNext) { | |
val line = lines.next() | |
if (line.length == 0) { | |
found = line; | |
loop.break; | |
} | |
if (line.charAt(0) == '#') { | |
println("#: " + line); | |
if (line.startsWith("#Fields: ")) { | |
val allColumns = line.split(" "); | |
_columns = allColumns.drop(1); // remove "#Fields" | |
} | |
} | |
else { | |
found = line; | |
loop.break; | |
} | |
} | |
} | |
val foundHash = new collection.mutable.HashMap[String, String]; | |
if (found == null) { | |
_hasNext = Some(false); | |
_nextLine = None; | |
return None; | |
} | |
val _values = found.split(" "); | |
for (idx <- 0 to _values.length-1) { | |
foundHash += _columns(idx) -> _values(idx); | |
} | |
_hasNext = Some(true); | |
_nextLine = Some(foundHash); | |
return Some(foundHash); | |
} | |
def next : collection.mutable.Map[String, String] = { | |
if (!hasNext()) { | |
throw new NoSuchElementException("next on empty iterator") | |
} | |
val result = _nextLine; | |
nextLine(); | |
return result.get; | |
} | |
} | |
def getLogItems(lines : Iterator[String]) : Iterator[collection.mutable.Map[String, String]] = { | |
val it = new LogItemsIterator(lines) | |
return it; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Software: Microsoft Internet Information Services 7.5 | |
#Version: 1.0 | |
#Date: 2015-11-30 00:00:00 | |
#Fields: date time s-ip cs-method cs-uri-stem cs-uri-query s-port cs-username c-ip cs(User-Agent) sc-status sc-substatus sc-win32-status time-taken | |
2015-11-30 00:00:00 145.85.27.38 EERSTE / - 80 - 145.85.27.38 - 200 0 0 2 | |
2015-11-30 00:00:02 145.85.27.38 SPPING / - 80 - 145.85.27.38 - 200 0 0 2 | |
2015-11-30 00:00:04 145.85.27.38 SPPING / - 80 - 145.85.27.38 - 200 0 0 3 | |
2015-11-30 00:00:06 145.85.27.38 SPPING / - 80 - 145.85.27.38 - 200 0 0 14 | |
2015-11-30 00:00:08 145.85.27.38 SPPING / - 80 - 145.85.27.38 - 200 0 0 2 | |
2015-11-30 00:18:56 145.85.27.38 SPPING / - 80 - 145.85.27.38 - 200 0 0 1 | |
2015-11-30 00:18:58 145.85.27.38 SPPING / - 80 - 145.85.27.38 - 200 0 0 2 | |
2015-11-30 00:19:03 145.85.27.38 SPPING / - 80 - 145.85.27.38 - 503 0 64 3778 | |
2015-11-30 00:19:03 145.85.27.38 SPPING / - 80 - 145.85.27.38 - 503 0 64 3380 | |
#Software: Microsoft Internet Information Services 7.5 | |
#Version: 1.0 | |
#Date: 2015-11-30 00:55:26 | |
#Fields: date2 time s-ip cs-method cs-uri-stem cs-uri-query s-port cs-username c-ip cs(User-Agent) sc-status sc-substatus sc-win32-status time-taken | |
2015-11-30 00:55:26 145.85.27.38 GET / - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 1 2148074254 308 | |
2015-11-30 00:55:26 145.85.27.38 GET / - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 0 0 456 | |
2015-11-30 00:55:26 145.85.27.38 GET /_vti_bin/sitedata.asmx - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 1 2148074254 340 | |
2015-11-30 00:55:26 145.85.27.38 GET /_vti_bin/sitedata.asmx - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 1 2148074254 340 | |
2015-11-30 00:55:26 145.85.27.38 POST /_vti_bin/publishingservice.asmx - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 0 0 340 | |
2015-11-30 00:55:26 145.85.27.38 GET /_vti_bin/sitedata.asmx - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 1 2148074254 289 | |
2015-11-30 00:55:26 145.85.27.38 POST /_vti_bin/publishingservice.asmx - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 0 0 365 | |
2015-11-30 00:55:26 145.85.27.38 GET /_vti_bin/sitedata.asmx - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 0 0 471 | |
2015-11-30 00:55:26 127.0.0.1 GET /_vti_bin/sitedata.asmx - 80 - 127.0.0.1 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 0 0 458 | |
2015-11-30 00:55:26 145.85.27.38 GET / - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 1 2148074254 282 | |
2015-11-30 00:55:26 145.85.27.38 GET /_vti_bin/sitedata.asmx - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 0 0 424 | |
2015-11-30 00:55:26 127.0.0.1 GET / - 80 - 127.0.0.1 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 0 0 471 | |
2015-11-30 00:55:26 145.85.27.38 LAATSTE /_vti_bin/publishingservice.asmx - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 0 0 471 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment