Skip to content

Instantly share code, notes, and snippets.

@yogi87
Created October 6, 2015 20:54
Show Gist options
  • Save yogi87/e35ba1c5284c49bffff3 to your computer and use it in GitHub Desktop.
Save yogi87/e35ba1c5284c49bffff3 to your computer and use it in GitHub Desktop.
// anomaly detection based on Median Absolute Deviation estimates of
// standard deviation (robust to outliers and non-normal data)
//
import 'https://gist.githubusercontent.com/welch/3f7b696beab6ba1b55bb/raw/stdj.juttle' as stdj;
import 'https://gist.githubusercontent.com/welch/3f7b696beab6ba1b55bb/raw/math.juttle' as math;
import 'https://gist.githubusercontent.com/welch/3f7b696beab6ba1b55bb/raw/ts.juttle' as ts;
import 'https://gist.githubusercontent.com/welch/3f7b696beab6ba1b55bb/raw/mav.juttle' as mav;
import 'https://gist.githubusercontent.com/welch/3f7b696beab6ba1b55bb/raw/seasonal.juttle' as seasonal;
import 'https://gist.githubusercontent.com/welch/3f7b696beab6ba1b55bb/raw/forecast.juttle' as forecast;
export sub normalize_daily(in, out) {
seasonal.squash_daily -in in -out '_normalize_squashed'
| mav.normalize -over :3d: -in '_normalize_squashed' -out out
| remove _normalize_squashed
}
export sub normalize_daily_by(in, out, by) {
seasonal.squash_daily_by -in in -out '_normalize_squashed' -by by
| mav.normalize_by -over :3d: -in '_normalize_squashed' -out out -by by
| remove _normalize_squashed
}
export sub normalize_forecast_daily(in, out) {
seasonal.squash_daily -in in -out '_normalize_squashed'
| forecast.forecast_err -in '_normalize_squashed' -out '_normalize_err'
| mav.normalize -over :3d: -in '_normalize_err' -out out
| remove _normalize_squashed, _normalize_err
}
export sub normalize_forecast_daily_by(in, out, by) {
seasonal.squash_daily_by -in in -out '_normalize_squashed' -by by
| forecast.forecast_err_by -in '_normalize_squashed' -out '_normalize_err' -by by
| mav.normalize_by -over :3d: -in '_normalize_err' -out out -by by
| remove _normalize_squashed, _normalize_err
}
export sub outlier_daily(in, out, after, sigma) {
normalize_daily -in in -out out
| put *out = (time > after && Math.abs(*out) > sigma) ? Math.abs(*out) : 0
}
export sub outlier_daily_by(in, out, after, sigma, by) {
normalize_daily_by -in in -out out -by by
| put *out = (time > after && Math.abs(*out) > sigma) ? Math.abs(*out) : 0
}
export sub outlier_forecast_daily(in, out, after, sigma) {
normalize_forecast_daily -in in -out out
| put *out = (time > after && Math.abs(*out) > sigma) ? Math.abs(*out) : 0
}
export sub outlier_forecast_daily_by(in, out, after, sigma, by) {
normalize_forecast_daily_by -in in -out out -by by
| put *out = (time > after && Math.abs(*out) > sigma) ? Math.abs(*out) : 0
}
export sub normalize_weekly(in, out) {
seasonal.squash_weekly -in in -out '_normalize_squashed'
| mav.normalize -over :3w: -in '_normalize_squashed' -out out
| remove _normalize_squashed
}
export sub normalize_forecast_weekly(in, out) {
seasonal.squash_weekly -in in -out '_normalize_squashed'
| forecast.forecast_err -in '_normalize_squashed' -out '_normalize_err'
| mav.normalize -over :3w: -in '_normalize_err' -out out
| remove _normalize_squashed, _normalize_err
}
export sub normalize_weekly_by(in, out, by) {
seasonal.squash_weekly_by -in in -out '_normalize_squashed' -by by
| mav.normalize_by -over :3w: -in '_normalize_squashed' -out out -by by
| remove _normalize_squashed
}
export sub outlier_weekly(in, out, after, sigma) {
normalize_weekly -in in -out out
| put *out = (time > after && Math.abs(*out) > sigma) ? Math.abs(*out) : 0
}
export sub outlier_weekly_by(in, out, after, sigma, by) {
normalize_weekly_by -in in -out out -by by
| put *out = (time > after && Math.abs(*out) > sigma) ? Math.abs(*out) : 0
}
//
// convert the output points of, eg, outlier_daily into an event
// stream to be overlayed on a timechart having the specified title. a
// nonzero input value indicates an event condition, and the maximum
// nonzero input value/time in each interval will be selected for reporting.
// successive nonzero intervals will be suppressed as belonging to the same
// inital event.
//
export sub to_events(in, every, after, title) {
(
// mark the end of training window
put _training_data = (time < after)
| put _end_training = (!_training_data && stdj.previous(_training_data, true))
| filter _end_training == true
| remove _training_data, _end_training, in
| put text = "End of training window"
| @events -on title;
// mark non-contiguous outlier events
batch -every every | percentile -p 1.0 in | unbatch // select max over each interval
| put _outlier_event = (*in > 0)
| put _deduped_event = _outlier_event && !stdj.previous(_outlier_event, false)
| filter _deduped_event == true
| remove _outlier_event, _deduped_event
| put text = "Outlier ("+math.roundStr(*in, 1)+"-sigma)", type = "fa-exclamation-triangle", label=in
| @events -on title;
merge;
)
}
export sub to_events_by(in, every, after, by, title) {
(
// mark the end of training window
put _training_data = (time < after)
| put _end_training = (!_training_data && stdj.previous(_training_data, true))
| filter _end_training == true
| remove _training_data, _end_training, in
| put text = "End of training window"
| @events -on title;
// mark non-contiguous outlier events
batch -every every | percentile -p 1.0 in by by | unbatch // select max over each interval
| put _outlier_event = (*in > 0)
| put _deduped_event = _outlier_event && !stdj.previous(_outlier_event, false) by by
| filter _deduped_event == true
| remove _outlier_event, _deduped_event
| put text = "Outlier ("+math.roundStr(*in, 1)+"-sigma)", type = "fa-exclamation-triangle", label=*by
| @events -on title;
merge;
)
}
export sub outlier_chart_daily(in, every, after, sigma, title) {
const sig_title = title + " " + Number.toString(sigma)+'-sigma outliers';
// normalize each input series, and flag outliers n-sigmas from the mean.
outlier_daily -in in -out 'sigma' -after after -sigma sigma
| filter time > after
| ts.split_dual_chart -title sig_title -secondary 'sigma'
// generate an event stream from the outliers just computed, and overlay it on the timechart
| to_events -in 'sigma' -every every -after after -title sig_title
| stdj.end
}
export sub outlier_chart_daily_by(in, every, after, sigma, by, title) {
const sig_title = title + " " + Number.toString(sigma)+'-sigma outliers';
// normalize each input series, and flag outliers n-sigmas from the mean.
outlier_daily_by -in in -out in -after after -sigma sigma -by by
| filter time > after
| ts.chart_by -by by -title sig_title // split makes this difficult to combine with the originals
// generate an event stream from the outliers just computed, and overlay it on the timechart
| to_events_by -in in -every every -after after -by by -title title
| stdj.end
}
export sub outlier_chart_forecast_daily(in, every, after, sigma, title) {
const sig_title = title + " " + Number.toString(sigma)+'-sigma outliers';
// normalize each input series, and flag outliers n-sigmas from the mean.
outlier_forecast_daily -in in -out 'sigma' -after after -sigma sigma
| filter time > after
| ts.split_dual_chart -title sig_title -secondary 'sigma'
// generate an event stream from the outliers just computed, and overlay it on the timechart
| to_events -in 'sigma' -every every -after after -title sig_title
| stdj.end
}
export sub outlier_chart_forecast_daily_by(in, every, after, sigma, title,by) {
const sig_title = title + " " + Number.toString(sigma)+'-sigma outliers';
// normalize each input series, and flag outliers n-sigmas from the mean.
outlier_forecast_daily_by -in in -out 'sigma' -after after -sigma sigma -by by
| filter time > after
| ts.split_dual_chart_by -title sig_title -secondary 'sigma' -by by
// generate an event stream from the outliers just computed, and overlay it on the timechart
| to_events_by -in 'sigma' -every every -after after -title sig_title -by by
| stdj.end
}
export sub outlier_chart_weekly(in, every, after, sigma, title) {
const sig_title = title + " " + Number.toString(sigma)+'-sigma outliers';
// normalize each input series, and flag outliers n-sigmas from the mean.
outlier_weekly -in in -out 'sigma' -after after -sigma sigma
| filter time > after
| ts.split_dual_chart -title sig_title -secondary 'sigma'
// generate an event stream from the outliers just computed, and overlay it on the timechart
| to_events -in 'sigma' -every every -after after -title sig_title
| stdj.end
}
export sub charts_weekly_by(in, every, after, sigma, by, title) {
const sig_title = title + " " + Number.toString(sigma)+'-sigma outliers';
// normalize each input series, and flag outliers n-sigmas from the mean.
outlier_weekly_by -in in -out in -after after -sigma sigma -by by
| filter time > after
| ts.chart_by -by by -title sig_title // split makes this difficult to combine with the originals
// generate an event stream from the outliers just computed, and overlay it on the timechart
| to_events_by -in in -every every -after after -by by -title title
| stdj.end
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment