Logstash custom patterns for Lustre
LUSTRE_OBJECT %{WORD}(-%{WORD}){1,3}
LUSTRE_LNET %{IP}@%{WORD}
LUSTRE_SOURCECODE (%{USERNAME}.c:%{INT})
LUSTRE_ERRCODE rc (=)? (%{INT:error_code}|%{INT}/%{INT})
LUSTRE_LOGPREFIX1 (Lustre|LustreError|LNetError): (%{WORD}-%{WORD}: )?%{LUSTRE_OBJECT:lustre_object}:
LUSTRE_LOGPREFIX2 (Lustre|LustreError|LNet|LNetError):%{SPACE}?%{WORD}:%{WORD}:\(%{LUSTRE_SOURCECODE:lustre_source}:%{USERNAME:lustre_function}\(\)\)
LUSTRE_LOGPREFIX3 (Lustre|LustreError|LNet|LNetError):
LUSTRE_LOGPREFIX (%{LUSTRE_LOGPREFIX1}|%{LUSTRE_LOGPREFIX2}|%{LUSTRE_LOGPREFIX3})
Logstash grok pattern matching for Lustre
grok {
patterns_dir => "/etc/logstash/patterns"
match => [ "message", "%{LUSTRE_LOGPREFIX} not available for connect from %{LUSTRE_LNET:lustre_host} \(no target\)",
"message", "%{LUSTRE_LOGPREFIX} This client was evicted by %{LUSTRE_OBJECT}; in progress operations using this service will fail.",
"message", "%{LUSTRE_LOGPREFIX} %{LUSTRE_OBJECT:lustre_object}: %{USERNAME} failed for resource %{DATA:lustre_resource}: %{LUSTRE_ERRCODE}",
"message", "%{LUSTRE_LOGPREFIX} %{LUSTRE_OBJECT:lustre_object}: error destroying precreated id %{DATA}:%{DATA}: %{LUSTRE_ERRCODE}",
"message", "%{LUSTRE_LOGPREFIX} Bulk IO write error with %{UUID:lustre_uuid} \(at %{LUSTRE_LNET:lustre_host}\), client will retry: %{LUSTRE_ERRCODE}",
"message", "%{LUSTRE_LOGPREFIX} @@@ %{GREEDYDATA}: %{LUSTRE_ERRCODE} %{GREEDYDATA} %{WORD}->%{UUID:lustre_uuid}@%{LUSTRE_LNET:lustre_host}:%{GREEDYDATA}",
"message", "%{LUSTRE_LOGPREFIX} Communicating with %{LUSTRE_LNET:lustre_host}, %{GREEDYDATA}",
"message", "%{LUSTRE_LOGPREFIX} Client %{UUID:lustre_uuid} \(at %{LUSTRE_LNET:lustre_host}\) reconnecting",
"message", "%{LUSTRE_LOGPREFIX} Client %{UUID:lustre_uuid} \(at %{LUSTRE_LNET:lustre_host}\) refused reconnection, %{GREEDYDATA}",
"message", "%{LUSTRE_LOGPREFIX} Connection restored to %{LUSTRE_OBJECT} \(at %{LUSTRE_LNET:lustre_host}\)",
"message", "%{LUSTRE_LOGPREFIX} binary \[%{DATA:lustre_resource}\] changed while waiting for the page fault lock",
"message", "%{LUSTRE_LOGPREFIX} %{GREEDYDATA}",
"message", "Lustre: Mounted %{LUSTRE_OBJECT:lustre_object}",
"message", "LDISKFS-fs \(%{USERNAME:device}\): %{GREEDYDATA}" ]
add_tag => [ "lustre" ]
}
# Add extra tags to lustre messages based on type
if "lustre" in [tags] {
if [message] =~ /^LNet:/ {
mutate {
add_tag => [ "lnet" ]
remove_tag => [ "lustre" ]
}
}
if [message] =~ /^LNetError:/ {
mutate {
add_tag => [ "lnet_error" ]
remove_tag => [ "lustre" ]
}
}
if [message] =~ /^LustreError:/ {
mutate {
add_tag => [ "lustre_error" ]
remove_tag => [ "lustre" ]
}
}
# Set up an timer to count the time between
# a lustre connection going down and coming
# back
# This is the start event
if [message] =~ /was lost;/ {
mutate {
add_field => [ "lustre_conn_id", "%{syslog_hostname}:%{lustre_object}" ]
add_tag => [ "lustre_conn_lost" ]
}
}
# This is the end event
if [message] =~ /Connection restored/ {
mutate {
add_field => [ "lustre_conn_id", "%{syslog_hostname}:%{lustre_object}" ]
add_tag => [ "lustre_conn_restored" ]
}
}
# This counts the elapsed time between start and end events
elapsed {
start_tag => "lustre_conn_lost"
end_tag => "lustre_conn_restored"
unique_id_field => "lustre_conn_id"
timeout => 43200
}
}