-
-
Save Mistobaan/337222ac3acbfc00bdac to your computer and use it in GitHub Desktop.
# from https://cloud.google.com/solutions/machine-learning-with-financial-time-series-data | |
def tf_confusion_metrics(model, actual_classes, session, feed_dict): | |
predictions = tf.argmax(model, 1) | |
actuals = tf.argmax(actual_classes, 1) | |
ones_like_actuals = tf.ones_like(actuals) | |
zeros_like_actuals = tf.zeros_like(actuals) | |
ones_like_predictions = tf.ones_like(predictions) | |
zeros_like_predictions = tf.zeros_like(predictions) | |
tp_op = tf.reduce_sum( | |
tf.cast( | |
tf.logical_and( | |
tf.equal(actuals, ones_like_actuals), | |
tf.equal(predictions, ones_like_predictions) | |
), | |
"float" | |
) | |
) | |
tn_op = tf.reduce_sum( | |
tf.cast( | |
tf.logical_and( | |
tf.equal(actuals, zeros_like_actuals), | |
tf.equal(predictions, zeros_like_predictions) | |
), | |
"float" | |
) | |
) | |
fp_op = tf.reduce_sum( | |
tf.cast( | |
tf.logical_and( | |
tf.equal(actuals, zeros_like_actuals), | |
tf.equal(predictions, ones_like_predictions) | |
), | |
"float" | |
) | |
) | |
fn_op = tf.reduce_sum( | |
tf.cast( | |
tf.logical_and( | |
tf.equal(actuals, ones_like_actuals), | |
tf.equal(predictions, zeros_like_predictions) | |
), | |
"float" | |
) | |
) | |
tp, tn, fp, fn = \ | |
session.run( | |
[tp_op, tn_op, fp_op, fn_op], | |
feed_dict | |
) | |
tpr = float(tp)/(float(tp) + float(fn)) | |
fpr = float(fp)/(float(tp) + float(fn)) | |
accuracy = (float(tp) + float(tn))/(float(tp) + float(fp) + float(fn) + float(tn)) | |
recall = tpr | |
precision = float(tp)/(float(tp) + float(fp)) | |
f1_score = (2 * (precision * recall)) / (precision + recall) | |
print 'Precision = ', precision | |
print 'Recall = ', recall | |
print 'F1 Score = ', f1_score | |
print 'Accuracy = ', accuracy |
Correct me if I'm wrong, but building on the reply by @carlthome above, it looks like you could further simplify some of your performance measures with things like:
accuracy = tf.reduce_mean(tf.cast(tf.equal(actual, predicted), tf.float32))
error = tf.reduce_mean(tf.cast(tf.not_equal(actual, predicted), tf.float32))
The accuracy
operation above gives you the sum of all cases where actual == predicted
(equivalent to tp + tn
), divided by the total number of samples (equivalent to tp + fp + fn + tn
). The error measure gives you the inverse of this, so (fp + fn) / total_samples
.
I am not sure which method ends up being faster, but if all you need is accuracy
or error
(as defined here), this saves you having to find all of tp
, tn
, fp
, and fn
.
I think for multi-labels, there is no real true negative, so in the replay from @carlthome, the calculation of tn is no need and actually wrong
You're not supposed to use the sigmoid for a one-hot encoding because you get numerical instability. You need to use softmax EVERY TIME for one-hot else you will get erroneous results form the floating-point rounding errors.
The fpr
is wrong. It should be fpr = float(fp)/(float(fp) + float(tn))
https://en.wikipedia.org/wiki/False_positive_rate
Nice gist!
I noticed that TensorFlow now has a
tf.count_nonzero
function, so how about doing something like this instead for multi-label classification (e.g. sigmoid activations and true labels are binary tensors).Assuming
logits
is a model's final output, with tensor shape (samples, classes), andlabels
is a corresponding tensor of zeros and ones, where one denotes class membership.