Skip to content

Instantly share code, notes, and snippets.

@krishvishal
Created March 5, 2021 11:48
Show Gist options
  • Save krishvishal/d3f00bb51094df274bd69b3387d8511e to your computer and use it in GitHub Desktop.
Save krishvishal/d3f00bb51094df274bd69b3387d8511e to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import tensorflow as tf\n",
"from tensorflow.keras import layers\n",
"from tensorflow.keras.models import Model\n",
"from tensorflow.keras import Input\n",
"from tensorflow.keras.layers import LSTM\n",
"from tensorflow.keras.utils import to_categorical"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Import data:\n",
"# 1. read the text file line by line;\n",
"# 2. format the data in DataFrame.\n",
"\n",
"def read_data(path):\n",
" data_list = []\n",
" with open(path, 'r') as f:\n",
" while True:\n",
" line = f.readline()\n",
" if not line:\n",
" break\n",
" d_str = line.split()\n",
" d_tem = [float(d) for d in d_str]\n",
" data_list.append(d_tem)\n",
" data = pd.DataFrame(data_list)\n",
" return data.T\n",
"\n",
"# ready data for training:\n",
"# 1. sample_size=100: the most 100 recent updates\n",
"# 2. feature_num=40: 40 features per time stamp\n",
"# 3. target_num=5: relative changes for the next 1,2,3,5 and 10 events(5 in total)\n",
"def get_model_data(data, sample_size=100, feature_num=40, target_num=5):\n",
" data = data.values\n",
" shape = data.shape\n",
" X = np.zeros((shape[0]-sample_size, sample_size, feature_num))\n",
" Y = np.zeros(shape=(shape[0]-sample_size, target_num))\n",
" for i in range(shape[0]-sample_size):\n",
" X[i] = data[i:i+sample_size,0:feature_num]# take the first 40 columns as features\n",
" Y[i] = data[i+sample_size-1,-target_num:]# take the last 5 columns as labels\n",
" X = X.reshape(X.shape[0], sample_size, feature_num, 1)# add the 4th dimension: 1 channel\n",
" \n",
" # \"Benchmark dataset for mid-price forecasting of limit order book data with machine learning\"\n",
" # labels 1: equal to or greater than 0.002\n",
" # labels 2: -0.00199 to 0.00199\n",
" # labels 3: smaller or equal to -0.002\n",
" # Y=Y-1 relabels as 0,1,2\n",
" Y = Y-1\n",
" return X,Y\n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"data_path = r'E:\\JupyterFile\\BenchmarkDatasets\\BenchmarkDatasets\\NoAuction\\1.NoAuction_Zscore\\NoAuction_Zscore_Training\\Train_Dst_NoAuction_ZScore_CF_9.txt'\n",
"data = read_data(data_path)\n",
"train_X, train_Y = get_model_data(data)\n",
"train_Y = train_Y.astype(int)\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(362300, 100, 40, 1)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_X.shape"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: \"model_2\"\n",
"__________________________________________________________________________________________________\n",
"Layer (type) Output Shape Param # Connected to \n",
"==================================================================================================\n",
"input_3 (InputLayer) [(None, 100, 40, 1)] 0 \n",
"__________________________________________________________________________________________________\n",
"conv2d_28 (Conv2D) (None, 100, 20, 16) 48 input_3[0][0] \n",
"__________________________________________________________________________________________________\n",
"leaky_re_lu_28 (LeakyReLU) (None, 100, 20, 16) 0 conv2d_28[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_29 (Conv2D) (None, 100, 20, 16) 1040 leaky_re_lu_28[0][0] \n",
"__________________________________________________________________________________________________\n",
"leaky_re_lu_29 (LeakyReLU) (None, 100, 20, 16) 0 conv2d_29[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_30 (Conv2D) (None, 100, 20, 16) 1040 leaky_re_lu_29[0][0] \n",
"__________________________________________________________________________________________________\n",
"leaky_re_lu_30 (LeakyReLU) (None, 100, 20, 16) 0 conv2d_30[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_31 (Conv2D) (None, 100, 10, 16) 528 leaky_re_lu_30[0][0] \n",
"__________________________________________________________________________________________________\n",
"leaky_re_lu_31 (LeakyReLU) (None, 100, 10, 16) 0 conv2d_31[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_32 (Conv2D) (None, 100, 10, 16) 1040 leaky_re_lu_31[0][0] \n",
"__________________________________________________________________________________________________\n",
"leaky_re_lu_32 (LeakyReLU) (None, 100, 10, 16) 0 conv2d_32[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_33 (Conv2D) (None, 100, 10, 16) 1040 leaky_re_lu_32[0][0] \n",
"__________________________________________________________________________________________________\n",
"leaky_re_lu_33 (LeakyReLU) (None, 100, 10, 16) 0 conv2d_33[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_34 (Conv2D) (None, 100, 1, 16) 2576 leaky_re_lu_33[0][0] \n",
"__________________________________________________________________________________________________\n",
"leaky_re_lu_34 (LeakyReLU) (None, 100, 1, 16) 0 conv2d_34[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_35 (Conv2D) (None, 100, 1, 16) 1040 leaky_re_lu_34[0][0] \n",
"__________________________________________________________________________________________________\n",
"leaky_re_lu_35 (LeakyReLU) (None, 100, 1, 16) 0 conv2d_35[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_36 (Conv2D) (None, 100, 1, 16) 1040 leaky_re_lu_35[0][0] \n",
"__________________________________________________________________________________________________\n",
"leaky_re_lu_36 (LeakyReLU) (None, 100, 1, 16) 0 conv2d_36[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_37 (Conv2D) (None, 100, 1, 32) 544 leaky_re_lu_36[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_39 (Conv2D) (None, 100, 1, 32) 544 leaky_re_lu_36[0][0] \n",
"__________________________________________________________________________________________________\n",
"leaky_re_lu_37 (LeakyReLU) (None, 100, 1, 32) 0 conv2d_37[0][0] \n",
"__________________________________________________________________________________________________\n",
"leaky_re_lu_39 (LeakyReLU) (None, 100, 1, 32) 0 conv2d_39[0][0] \n",
"__________________________________________________________________________________________________\n",
"max_pooling2d_2 (MaxPooling2D) (None, 100, 1, 16) 0 leaky_re_lu_36[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_38 (Conv2D) (None, 100, 1, 32) 3104 leaky_re_lu_37[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_40 (Conv2D) (None, 100, 1, 32) 5152 leaky_re_lu_39[0][0] \n",
"__________________________________________________________________________________________________\n",
"conv2d_41 (Conv2D) (None, 100, 1, 32) 544 max_pooling2d_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"leaky_re_lu_38 (LeakyReLU) (None, 100, 1, 32) 0 conv2d_38[0][0] \n",
"__________________________________________________________________________________________________\n",
"leaky_re_lu_40 (LeakyReLU) (None, 100, 1, 32) 0 conv2d_40[0][0] \n",
"__________________________________________________________________________________________________\n",
"leaky_re_lu_41 (LeakyReLU) (None, 100, 1, 32) 0 conv2d_41[0][0] \n",
"__________________________________________________________________________________________________\n",
"concatenate_2 (Concatenate) (None, 100, 1, 96) 0 leaky_re_lu_38[0][0] \n",
" leaky_re_lu_40[0][0] \n",
" leaky_re_lu_41[0][0] \n",
"__________________________________________________________________________________________________\n",
"reshape_2 (Reshape) (None, 100, 96) 0 concatenate_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"lstm_2 (LSTM) (None, 64) 41216 reshape_2[0][0] \n",
"__________________________________________________________________________________________________\n",
"dense_2 (Dense) (None, 3) 195 lstm_2[0][0] \n",
"==================================================================================================\n",
"Total params: 60,691\n",
"Trainable params: 60,691\n",
"Non-trainable params: 0\n",
"__________________________________________________________________________________________________\n"
]
}
],
"source": [
"# the size of a single input is (100,40)\n",
"input_tensor = Input(shape=(100,40,1))\n",
"\n",
"# convolutional filter is (1,2) with stride of (1,2)\n",
"layer_x = layers.Conv2D(16, (1,2), strides=(1,2))(input_tensor)\n",
"layer_x = layers.LeakyReLU(alpha=0.01)(layer_x)\n",
"layer_x = layers.Conv2D(16, (4,1), padding='same')(layer_x)\n",
"layer_x = layers.LeakyReLU(alpha=0.01)(layer_x)\n",
"layer_x = layers.Conv2D(16, (4,1), padding='same')(layer_x)\n",
"layer_x = layers.LeakyReLU(alpha=0.01)(layer_x)\n",
"\n",
"layer_x = layers.Conv2D(16, (1,2), strides=(1,2))(layer_x)\n",
"layer_x = layers.LeakyReLU(alpha=0.01)(layer_x)\n",
"layer_x = layers.Conv2D(16, (4,1), padding='same')(layer_x)\n",
"layer_x = layers.LeakyReLU(alpha=0.01)(layer_x)\n",
"layer_x = layers.Conv2D(16, (4,1), padding='same')(layer_x)\n",
"layer_x = layers.LeakyReLU(alpha=0.01)(layer_x)\n",
"\n",
"layer_x = layers.Conv2D(16, (1,10))(layer_x)\n",
"layer_x = layers.LeakyReLU(alpha=0.01)(layer_x)\n",
"layer_x = layers.Conv2D(16, (4,1), padding='same')(layer_x)\n",
"layer_x = layers.LeakyReLU(alpha=0.01)(layer_x)\n",
"layer_x = layers.Conv2D(16, (4,1), padding='same')(layer_x)\n",
"layer_x = layers.LeakyReLU(alpha=0.01)(layer_x)\n",
"\n",
"# Inception Module\n",
"tower_1 = layers.Conv2D(32, (1,1), padding='same')(layer_x)\n",
"tower_1 = layers.LeakyReLU(alpha=0.01)(tower_1)\n",
"tower_1 = layers.Conv2D(32, (3,1), padding='same')(tower_1)\n",
"tower_1 = layers.LeakyReLU(alpha=0.01)(tower_1)\n",
"\n",
"tower_2 = layers.Conv2D(32, (1,1), padding='same')(layer_x)\n",
"tower_2 = layers.LeakyReLU(alpha=0.01)(tower_2)\n",
"tower_2 = layers.Conv2D(32, (5,1), padding='same')(tower_2)\n",
"tower_2 = layers.LeakyReLU(alpha=0.01)(tower_2) \n",
"\n",
"tower_3 = layers.MaxPooling2D((3,1), padding='same', strides=(1,1))(layer_x)\n",
"tower_3 = layers.Conv2D(32, (1,1), padding='same')(tower_3)\n",
"tower_3 = layers.LeakyReLU(alpha=0.01)(tower_3)\n",
"\n",
"layer_x = layers.concatenate([tower_1, tower_2, tower_3], axis=-1)\n",
"\n",
"# concatenate features of tower_1, tower_2, tower_3\n",
"layer_x = layers.Reshape((100,96))(layer_x)\n",
"\n",
"# 64 LSTM units\n",
"layer_x = LSTM(64)(layer_x)\n",
"# The last output layer uses a softmax activation function\n",
"output = layers.Dense(3, activation='softmax')(layer_x)\n",
"model = Model(input_tensor, output)\n",
"\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"W0819 21:54:52.619683 8128 deprecation.py:323] From E:\\Anaconda3\\lib\\site-packages\\tensorflow\\python\\ops\\math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Use tf.where in 2.0, which has the same broadcast rule as np.where\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/100\n",
"362300/362300 [==============================] - 1837s 5ms/sample - loss: 0.9045 - acc: 0.6388\n",
"Epoch 2/100\n",
"362300/362300 [==============================] - 1828s 5ms/sample - loss: 0.8130 - acc: 0.6598\n",
"Epoch 3/100\n",
"270112/362300 [=====================>........] - ETA: 7:46 - loss: 0.7450 - acc: 0.6844"
]
}
],
"source": [
"opt = tf.keras.optimizers.Adam(lr=0.01, epsilon=1)# learning rate and epsilon are the same as paper DeepLOB\n",
"y = to_categorical(train_Y[:,0])# y is the next event's mid price (k=1)\n",
"model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])\n",
"model.fit(train_X, y, epochs=100, batch_size=32)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"test_data.to_csv('FI2010_test.csv')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"test_path = r'E:\\JupyterFile\\BenchmarkDatasets\\BenchmarkDatasets\\NoAuction\\1.NoAuction_Zscore\\NoAuction_Zscore_Testing\\Test_Dst_NoAuction_ZScore_CF_9.txt'\n",
"test_data = read_data(test_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test_data = read_data(test_path)\n",
"test_X, test_Y = get_model_data(test_data)\n",
"test_Y = test_Y.astype(int)\n",
"test_y = to_categorical(test_Y[:,0])\n",
"\n",
"model.evaluate(test_X, test_Y)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment