Last active
March 29, 2017 20:40
-
-
Save brockpalen/b4e59efb94ade080d17d to your computer and use it in GitHub Desktop.
Lustre Logstash Files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
#Brock Palen | |
# brockp@umich.edu | |
# | |
''' | |
Takes data in the form of: | |
metric number | |
snapshot_time 1396141904.951010 secs.usecs | |
open 28540765918 samples [reqs] | |
close 10256936166 samples [reqs] | |
mknod 30061 samples [reqs] | |
and creates a json version: | |
metic: number | |
''' | |
import sys | |
try: #rhel5 doesn't have json | |
import json | |
except ImportError: | |
import simplejson as json | |
def dictify(filename): | |
try: | |
f = open(filename, 'r') | |
except: | |
sys.stderr.write("failed to open"+filename+"\n") | |
sys.exit(-1) | |
data = {'source':f.name} | |
#read the strcture line at a time and build a dict out of it: | |
for line in f: | |
words = line.split() | |
#OST's use formats where the last number is the one you want | |
#read_bytes 100121201 samples [bytes] 0 1048576 54023523712987 | |
if(words[-1].isdigit()): | |
data[words[0]] = words[-1] | |
else: | |
data[words[0]] = words[1] | |
f.close() | |
print json.JSONEncoder().encode(data) | |
for x in range(1, len(sys.argv)): | |
filename = sys.argv[x] | |
dictify(filename) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#collection of loggrabs for lustre nodes | |
#Brock Palen brockp@umich.edu | |
input { | |
#metadata server inputs, gets size of filesystem, number of files etc | |
stdin { | |
type => "lustre-stdin" | |
} | |
exec { | |
#these are the per client stats, there are lots, so if things get overloaded this is the first place to look | |
type => "lustre-client-stats" | |
command => "/root/logstash.git/helpers/json-stats-wrapper.py /proc/fs/lustre/mdt/*/exports/*/stats" | |
codec => json_lines | |
interval => 120 | |
} | |
exec { | |
#these are the per MDT/OST stats, these are probably the most interesting | |
type => "lustre-server-stats" | |
command => "/root/logstash.git/helpers/json-stats-wrapper.py /proc/fs/lustre/mdt/*/md_stats" | |
codec => json_lines | |
interval => 10 | |
} | |
} | |
filter { | |
if [type] == "lustre-stdin" { | |
grok { | |
#grab the filesystem name | |
match => [ "message", "(mdt|obdfilter)/%{WORD:fsname}-(?<ltype>(MDT|OST))%{BASE16NUM:ldevid}%{GREEDYDATA}/%{WORD:metric} %{NUMBER:count}"] | |
} | |
} | |
if [type] == "lustre-client-stats" or [type] == "lustre-server-stats" { | |
#{'rename': '6703220', 'sync': '7016939', 'llog_init': '30', 'mknod': '30071', 'connect': '2266', 'reconnect': '780', 'close': '10260863572', 'open': '28550349796', 'disconnect': '2109', 'create': '3186', 'quotactl': '4609', 'mkdir': '24263000', 'source': '/proc/fs/lustre/mds/scratch-MDT0000/stats', 'getattr': '4903414744', 'rmdir': '17575276', 'destroy': '3147', 'snapshot_time': '1396145902.593900', 'getxattr': '5825862', 'link': '4331053', 'unlink': '208715050', 'process_config': '2', 'setattr': '284966835', 'statfs': '222', 'notify': '163'} | |
json{ | |
source => "message" | |
remove_field => [ "command", "snapshot_time" ] | |
} | |
if[type] == "lustre-client-stats" { | |
grok { | |
match => [ "source", "(mdt|obdfilter)/%{WORD:fsname}-(?<ltype>(MDT|OST))%{BASE16NUM:ldevid}/exports/%{IP:client}"] | |
} | |
#clients are identified by ip@nid eg 10.255.255.1@o2ib | |
#graphite doesn't like all the dots and makes the splits, this making grouping hard | |
#change them! Be best if we could just do a lookup on them oh well | |
mutate { | |
gsub => [ | |
"client", "\.", "-" | |
] | |
} | |
} else if [type] == "lustre-server-stats" { | |
grok { | |
match => [ "source", "(mdt|obdfilter)/%{WORD:fsname}-(?<ltype>(MDT|OST))%{BASE16NUM:ldevid}"] | |
} | |
} | |
} | |
} | |
output { | |
if [type] == "lustre-client-stats" { | |
#stdout { codec => rubydebug } | |
graphite{ | |
host => "GRAPHITE.SERVER.EDU" | |
# #lustre.scratch.MDT.0000.client.open | |
# #lustre.scratch.OST.00a1.client.* | |
metrics => [ | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.open", "%{open}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.close", "%{close}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.rename", "%{rename}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.samedir_rename", "%{samedir_rename}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.crossdir_rename", "%{crossdir_rename}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.sync", "%{sync}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.mknod", "%{mknod}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.mkdir", "%{mkdir}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.rmdir", "%{rmdir}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.getattr", "%{getattr}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.setattr", "%{setattr}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.getxattr", "%{getxattr}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.setxattr", "%{setxattr}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.link", "%{link}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.unlink", "%{unlink}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.statfs", "%{statfs}" | |
] | |
} | |
} | |
if [type] == "lustre-server-stats" { | |
#stdout { codec => rubydebug } | |
graphite{ | |
host => "GRAPHITE.SERVER.EDU" | |
# #lustre.scratch.MDT.0000.open | |
# #lustre.scratch.OST.00a1.* | |
metrics => [ | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.open", "%{open}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.close", "%{close}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.rename", "%{rename}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.samedir_rename", "%{samedir_rename}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.crossdir_rename", "%{crossdir_rename}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.sync", "%{sync}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.mknod", "%{mknod}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.mkdir", "%{mkdir}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.rmdir", "%{rmdir}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.getattr", "%{getattr}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.setattr", "%{setattr}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.getxattr", "%{getxattr}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.setxattr", "%{setxattr}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.link", "%{link}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.unlink", "%{unlink}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.statfs", "%{statfs}" | |
] | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#collection of loggrabs for lustre nodes | |
#Brock Palen brockp@umich.edu | |
input { | |
#metadata server inputs, gets size of filesystem, number of files etc | |
stdin { | |
type => "lustre-stdin" | |
} | |
exec { | |
#these are the per client stats, there are lots, so if things get overloaded this is the first place to look | |
type => "lustre-client-stats" | |
command => "/root/logstash.git/helpers/json-stats-wrapper.py /proc/fs/lustre/obdfilter/*/exports/*/stats" | |
codec => json_lines | |
interval => 120 | |
} | |
exec { | |
#these are the per MDT/OST stats, these are probably the most interesting | |
type => "lustre-server-stats" | |
command => "/root/logstash.git/helpers/json-stats-wrapper.py /proc/fs/lustre/obdfilter/*/stats" | |
codec => json_lines | |
interval => 10 | |
} | |
} | |
filter { | |
if [type] == "lustre-stdin" { | |
grok { | |
#grab the filesystem name | |
match => [ "message", "(mds|obdfilter)/%{WORD:fsname}-(?<ltype>(MDT|OST))%{BASE16NUM:ldevid}%{GREEDYDATA}/%{WORD:metric} %{NUMBER:count}"] | |
} | |
} | |
if [type] == "lustre-client-stats" or [type] == "lustre-server-stats" { | |
#{'rename': '6703220', 'sync': '7016939', 'llog_init': '30', 'mknod': '30071', 'connect': '2266', 'reconnect': '780', 'close': '10260863572', 'open': '28550349796', 'disconnect': '2109', 'create': '3186', 'quotactl': '4609', 'mkdir': '24263000', 'source': '/proc/fs/lustre/mds/scratch-MDT0000/stats', 'getattr': '4903414744', 'rmdir': '17575276', 'destroy': '3147', 'snapshot_time': '1396145902.593900', 'getxattr': '5825862', 'link': '4331053', 'unlink': '208715050', 'process_config': '2', 'setattr': '284966835', 'statfs': '222', 'notify': '163'} | |
json{ | |
source => "message" | |
remove_field => [ "command", "snapshot_time" ] | |
} | |
if[type] == "lustre-client-stats" { | |
grok { | |
match => [ "source", "(mds|obdfilter)/%{WORD:fsname}-(?<ltype>(MDT|OST))%{BASE16NUM:ldevid}/exports/%{IP:client}"] | |
} | |
#clients are identified by ip@nid eg 10.255.255.1@o2ib | |
#graphite doesn't like all the dots and makes the splits, this making grouping hard | |
#change them! Be best if we could just do a lookup on them oh well | |
mutate { | |
gsub => [ | |
"client", "\.", "-" | |
] | |
} | |
} else if [type] == "lustre-server-stats" { | |
grok { | |
match => [ "source", "(mds|obdfilter)/%{WORD:fsname}-(?<ltype>(MDT|OST))%{BASE16NUM:ldevid}"] | |
} | |
} | |
} | |
} | |
output { | |
#stdout { codec => rubydebug } | |
if [type] == "lustre-client-stats" { | |
#stdout { codec => rubydebug } | |
graphite{ | |
host => "GRAPHITE.SERVER.EDU" | |
# #lustre.scratch.MDT.0000.open | |
# #lustre.scratch.OST.00a1.* | |
metrics => [ | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.sync", "%{sync}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.write_bytes", "%{write_bytes}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.read_bytes", "%{read_bytes}", | |
#"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.get_info", "%{get_info}", | |
#"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.set_info_async", "%{set_info_async}", | |
#"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.process_config", "%{process_config}", | |
#"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.connect", "%{connect}", | |
#"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.reconnect", "%{reconnect}", | |
#"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.disconnect", "%{disconnect}", | |
#"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.statfs", "%{statfs}", | |
#"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.create", "%{create}", | |
#"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.destroy", "%{destroy}", | |
#"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.setattr", "%{setattr}", | |
#"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.punch", "%{punch}", | |
#"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.preprw", "%{preprw}", | |
#"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.commitrw", "%{commitrw}", | |
#"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.quotactl", "%{quotactl}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.%{client}.ping", "%{ping}" | |
] | |
} | |
} | |
if [type] == "lustre-server-stats" { | |
#stdout { codec => rubydebug } | |
graphite{ | |
host => "GRAPHITE.SERVER.EDU" | |
# #lustre.scratch.MDT.0000.open | |
# #lustre.scratch.OST.00a1.* | |
metrics => [ | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.sync", "%{sync}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.write_bytes", "%{write_bytes}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.read_bytes", "%{read_bytes}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.get_info", "%{get_info}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.set_info_async", "%{set_info_async}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.process_config", "%{process_config}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.connect", "%{connect}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.reconnect", "%{reconnect}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.disconnect", "%{disconnect}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.statfs", "%{statfs}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.create", "%{create}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.destroy", "%{destroy}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.setattr", "%{setattr}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.punch", "%{punch}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.preprw", "%{preprw}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.commitrw", "%{commitrw}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.quotactl", "%{quotactl}", | |
"lustre.%{fsname}.%{ltype}.%{ldevid}.ping", "%{ping}" | |
] | |
} | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment