Last active
January 9, 2018 02:24
-
-
Save debasishg/9fe9037d9a94595b35e96529fcdd4e9a to your computer and use it in GitHub Desktop.
Sample run of NN based solution to anomaly detection
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Python 3.6.0 |Anaconda 4.3.0 (x86_64)| (default, Dec 23 2016, 13:19:00) | |
[GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600.0.57)] on darwin | |
Type "help", "copyright", "credits" or "license" for more information. | |
>>> import pandas | |
>>> import numpy | |
>>> data_dir = "/Users/debasishghosh/temp/ingest-intrusion-data/ingest-intrusiondata-tmp" | |
>>> train_data = data_dir + "/new_train_data.csv" | |
>>> train_labels = data_dir + "/train_labels.csv" | |
>>> test_data = data_dir + "/new_test_data.csv" | |
>>> test_labels = data_dir + "/test_labels.csv" | |
>>> | |
## Import data from CSV files | |
## Did some feature engineering on the original dataset (https://kdd.ics.uci.edu/databases/kddcup99/kddcup99.html) | |
## 1. converted text values in columns 2,3,4 to numerical values | |
## 2. categorized and summarized attack types as per https://kdd.ics.uci.edu/databases/kddcup99/task.html | |
>>> train_X = pandas.read_csv(train_data, header=None) | |
>>> train_y = pandas.read_csv(train_labels, header=None) | |
>>> test_X = pandas.read_csv(test_data, header=None) | |
>>> test_y = pandas.read_csv(test_labels, header=None) | |
>>> | |
>>> train_X_arr = numpy.array(train_X) | |
>>> train_y_arr = numpy.array(train_y).ravel() | |
>>> test_X_arr = numpy.array(test_X) | |
>>> test_y_arr = numpy.array(test_y).ravel() | |
>>> | |
## convert labels to categorical values | |
## | |
>>> from keras.utils.np_utils import to_categorical | |
Using TensorFlow backend. | |
>>> | |
>>> to_categorical(train_y_arr) | |
array([[ 1., 0., 0., 0., 0.], | |
[ 1., 0., 0., 0., 0.], | |
[ 1., 0., 0., 0., 0.], | |
..., | |
[ 1., 0., 0., 0., 0.], | |
[ 1., 0., 0., 0., 0.], | |
[ 1., 0., 0., 0., 0.]]) | |
>>> | |
>>> from keras.models import Sequential | |
>>> from keras.layers import Dense | |
>>> | |
>>> model = Sequential() | |
## hidden layer 1: 64 neurons and relu for non-linearity | |
>>> model.add(Dense(64, activation='relu', input_dim=train_X_arr.shape[1])) | |
## hidden layer 2: 64 neurons and relu for non-linearity | |
>>> model.add(Dense(64, activation='relu')) | |
## output layer fully connected softmax | |
>>> model.add(Dense(5, activation='softmax')) | |
>>> | |
## split into validation set and proper training set | |
>>> val_X_arr = train_X_arr[:10000] | |
>>> partial_train_X_arr = train_X_arr[10000:] | |
>>> train_X_arr.shape | |
(4898431, 41) | |
>>> train_y_arr.shape | |
(4898431,) | |
>>> cat_train_y_arr = to_categorical(train_y_arr) | |
>>> cat_train_y_arr.shape | |
(4898431, 5) | |
>>> val_y_arr = cat_train_y_arr[:10000] | |
>>> partial_train_y_arr = cat_train_y_arr[10000:] | |
>>> | |
>>> partial_train_y_arr.shape | |
(4888431, 5) | |
>>> | |
## compile model for cross entropy loss and rmsprop optimization | |
>>> model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=[ 'accuracy' ]) | |
## train and validate | |
>>> model.fit(partial_train_X_arr, partial_train_y_arr, nb_epoch=5, batch_size=512, | |
... validation_data=(val_X_arr, val_y_arr)) | |
Train on 4888431 samples, validate on 10000 samples | |
Epoch 1/5 | |
4888431/4888431 [==============================] - 34s - loss: 0.1736 - acc: 0.9883 - val_loss: 0.0045 - val_acc: 0.9996 | |
Epoch 2/5 | |
4888431/4888431 [==============================] - 34s - loss: 0.0447 - acc: 0.9969 - val_loss: 0.0037 - val_acc: 0.9996 | |
Epoch 3/5 | |
4888431/4888431 [==============================] - 34s - loss: 0.0995 - acc: 0.9937 - val_loss: 0.0036 - val_acc: 0.9997 | |
Epoch 4/5 | |
4888431/4888431 [==============================] - 34s - loss: 0.0951 - acc: 0.9940 - val_loss: 0.0040 - val_acc: 0.9996 | |
Epoch 5/5 | |
4888431/4888431 [==============================] - 34s - loss: 0.0384 - acc: 0.9975 - val_loss: 0.0033 - val_acc: 0.9997 | |
<keras.callbacks.History object at 0x10e0c5908> | |
>>> | |
## predict | |
>>> results = model.evaluate(test_X_arr, to_categorical(test_y_arr)) | |
310592/311029 [============================>.] - ETA: 0s>>> | |
>>> | |
>>> results | |
[1.3010279860144072, 0.91896575560631588] | |
>>> | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment