yabyzq · June 3, 2018 22:24
diff --git a/Forecast methods b/Forecast methods
 import pandas as pd
 import matplotlib.pyplot as plt
 import numpy as np
 import os
 import time
 from numpy import newaxis
 import math
 from sklearn.metrics import mean_squared_error
 import statsmodels.api as sm
 from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt

 def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    
 #loading data
 df = pd.read_csv('C:/Users/Administrator/Desktop/LSTM/simple.csv')
 df.index = pd.DatetimeIndex(pd.to_datetime(df.date,format='%Y-%m-%d'),freq='D')

 #split train and test
 train_size = round(0.75*len(df))
 train = df[1:train_size]
 test = df[train_size:]

 #http://www.seanabu.com/2016/03/22/time-series-seasonal-ARIMA-model-in-python/
 from statsmodels.tsa.stattools import adfuller
 def test_stationarity(timeseries):

    #Determing rolling statistics
    rolmean = timeseries.rolling(window = 12).mean()
    rolstd = timeseries.rolling(window = 12).std() 

    #Plot rolling statistics:
    #fig = plt.figure(figsize=(12, 8))
    orig = plt.plot(timeseries, color='blue',label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show()
    
    #Perform Dickey-Fuller test:
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print(dfoutput)

 test_stationarity(df.balance)

 # Method 1 - Baseline using last day value
 prediction = test.copy().drop(['balance'], axis=1)
 prediction['last_day'] = train['balance'][len(train)-1]

 x = 30
 prediction['last_x_day'] = train['balance'].rolling(x).mean().iloc[-1]

 plt.plot(train.index, train['balance'], label='Train')
 plt.plot(test.index,test['balance'], label='Test')
 plt.plot(prediction.index,prediction['last_day'], label='Last Day')
 plt.plot(prediction.index,prediction['last_x_day'], label='Last %d Days'%x)
 plt.legend(loc='best')
 plt.show()

 print("#Last Day\nCoefficent:" ,round(np.corrcoef(test.balance,prediction.last_day)[0,1],4),\
      " RMSE:", round(math.sqrt(mean_squared_error(test.balance, prediction.last_day)),4),\
      " MAPE:", round(mean_absolute_percentage_error(test.balance, prediction.last_day),4)) 

 print("#Last %d Days\nCoefficent:"%x ,round(np.corrcoef(test.balance,prediction.last_x_day)[0,1],4),\
      " RMSE:", round(math.sqrt(mean_squared_error(test.balance, prediction.last_x_day)),4),\
      " MAPE:", round(mean_absolute_percentage_error(test.balance, prediction.last_x_day),4)) 
      
 # Method 2 - capture Seasonality using Holt, ARIMA
 # analysing training data
 sm.tsa.seasonal_decompose(train.balance).plot()
 plt.show()

 #fit and predict
 fit1 = Holt(np.asarray(train['balance'])).fit(smoothing_level = 0.05,smoothing_slope = 0.05)
 prediction['holt_linear'] = fit1.forecast(len(test))
 fit2 = ExponentialSmoothing(np.asarray(train['balance']) ,seasonal_periods=7 ,trend='add', seasonal='add',).fit()
 prediction['holt_winter'] = fit2.forecast(len(test))
 fit3 = sm.tsa.statespace.SARIMAX(train.balance, order=(2, 1, 4),seasonal_order=(0,1,1,7)).fit(maxiter=500)
 prediction['arima'] = fit3.predict(start=test.index[0], end=test.index[-1], dynamic=True)

 plt.plot(train['balance'], label='Train')
 plt.plot(test['balance'], label='Test')
 plt.plot(prediction['holt_linear'], label='Holt Linear')
 plt.plot(prediction['holt_winter'], label='Holt Winter')
 plt.plot(prediction['arima'], label='ARIMA')
 plt.legend(loc='best')
 plt.show()

 print("#Holt Linear\nCoefficent:",round(np.corrcoef(test.balance,prediction.holt_linear)[0,1],4),\
      " RMSE:", round(math.sqrt(mean_squared_error(test.balance, prediction.holt_linear)),4),\
      " MAPE:", round(mean_absolute_percentage_error(test.balance, prediction.holt_linear),4)) 

 print("#Holt Winter\nCoefficent:",round(np.corrcoef(test.balance,prediction.holt_winter)[0,1],4),\
      " RMSE:", round(math.sqrt(mean_squared_error(test.balance, prediction.holt_winter)),4),\
      " MAPE:", round(mean_absolute_percentage_error(test.balance, prediction.holt_winter),4)) 

 print("#ARIMA\nCoefficent:",round(np.corrcoef(test.balance,prediction.arima)[0,1],4),\
      " RMSE:", round(math.sqrt(mean_squared_error(test.balance, prediction.arima)),4),\
      " MAPE:", round(mean_absolute_percentage_error(test.balance, prediction.arima),4)) 

 import Prophet from fbprophet
 m = Prophet()
 m.fit(pd.DataFrame({'ds':train.date, 'y':train.balance}))
 forecast = m.predict(m.make_future_dataframe(periods=len(test)))
 prediction['prophet'] = forecast['yhat']

 #http://pythondata.com/forecasting-time-series-data-with-prophet-part-1/
 # Method 3 - Facebook Prophet
 forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

 type(forecast.yhat.index)
 type(train.balance.index)
 forecast = forecast.set_index(pd.DatetimeIndex(forecast['ds'],freq = 'D'))

 plt.figure(figsize=(16,8))
 plt.plot( train['balance'], label='Train')
 plt.plot(test['balance'], label='Test')
 plt.plot(forecast.yhat[-35:-1], label='PROPHET')
 plt.legend(loc='best')
 plt.show()

 m = Prophet()
 m.fit(pd.DataFrame({'ds':train.date, 'y':train.balance}))
 forecast = m.predict(m.make_future_dataframe(periods=len(test)))
 forecast = forecast.set_index(pd.DatetimeIndex(forecast['ds'],freq = 'D'))
 prediction['prophet'] = forecast['yhat']

 plt.figure(figsize=(16,8))
 plt.plot(train['balance'], label='Train')
 plt.plot(test['balance'], label='Test')
 plt.plot(prediction['prophet'], label='Prophet')
 plt.legend(loc='best')
 plt.show()

 print("#Prophet\nCoefficent:",round(np.corrcoef(test.balance,prediction.prophet)[0,1],4),\
      " RMSE:", round(math.sqrt(mean_squared_error(test.balance, prediction.prophet)),4),\
      " MAPE:", round(mean_absolute_percentage_error(test.balance, prediction.prophet),4)) 
      
      
 from datetime import date, timedelta
 import pandas as pd
 import numpy as np
 from sklearn.metrics import mean_squared_error
 from keras.models import Sequential
 from keras.layers.core import Dense, Dropout, Activation
 from keras.layers import LSTM
 from keras import callbacks
 from keras.callbacks import ModelCheckpoint
 from keras import optimizers
 from sklearn.preprocessing import MinMaxScaler
 import matplotlib.pyplot as plt
 %matplotlib inline

 #loading the data
 data = pd.read_csv("C:/Users/eye1/Desktop/sett_data.csv",parse_dates=["date"])
 scaler = MinMaxScaler(feature_range=(0, 1))
 data['balance'] = scaler.fit_transform(data['balance'].values.reshape(-1,1))
 data = data.set_index(pd.DatetimeIndex(data['date'],freq = 'D')).drop(['date'], axis=1)



 def get_timespan(input_data, date, minus, periods, freq='D'):
    return input_data.loc[pd.date_range(date - timedelta(days=minus), periods=periods, freq=freq)]

 train_start_date = pd.datetime(2017, 6, 10)

 def convert_timeseries(data, start_date, is_train=True):
    X = pd.DataFrame({"last_1": get_timespan(data, start_date, 1, 1).values.ravel(),
                  "last_3_mean": get_timespan(data, start_date, 3, 3).mean().values,#last 3
                  "last_5_mean": get_timespan(data, start_date, 5, 5).mean().values,#last 5
                  "last_7_mean": get_timespan(data, start_date, 7, 7).mean().values,#last 5
                  "last_14_mean": get_timespan(data, start_date, 14, 14).mean().values})
    for i in range(7):
        X['last_4_dow{}_mean'.format(i)] = get_timespan(data, start_date, 28-i, 4, freq='7D').mean().values #get every week last 7
    if is_train:
        y = data.loc[pd.date_range(start_date, periods=1)].values
        return X, y
    return X
    
 #choose cut off for train and test
 cut_off = pd.datetime(2018, 2, 1)

 #set up training data
 X_train, y_train = [], []
 for i in range(pd.Timedelta(cut_off - train_start_date).days):
    delta = timedelta(days=i)
    X_tmp, y_tmp = convert_timeseries(data, train_start_date + delta)
    X_train.append(X_tmp)
    y_train.append(y_tmp)
    
 X_train = pd.concat(X_train, axis=0)
 y_train = np.concatenate(y_train, axis=0)

 X_train = X_train.as_matrix()
 X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))


 X_test, y_test = [], []
 for i in range(pd.Timedelta(data.index[-2] - cut_off).days):
    delta = timedelta(days=i)
    X_tmp, y_tmp = convert_timeseries(data, cut_off + delta)
    X_test.append(X_tmp)
    y_test.append(y_tmp)
    
 X_test = pd.concat(X_test, axis=0)
 y_test = np.concatenate(y_test, axis=0)

 X_test = X_test.as_matrix()
 X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

 #model
 model = Sequential()
 model.add(LSTM(32, input_shape=(X_train.shape[1],X_train.shape[2])))
 model.add(Dropout(.1))
 model.add(Dense(8))
 model.add(Dropout(.1))
 model.add(Dense(1))
 #model.compile(loss = 'mse', optimizer='adam', metrics=['mse'])
 model.compile(loss='mse',optimizer=optimizers.SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True))

 from numpy import newaxis
 def predict_sequence_full(model, data, window_size):
    #Shift the window by 1 new prediction each time, re-run predictions on new window
    curr_frame = data
    predicted = []
    for i in range(len(data)):
        predicted.append(model.predict(curr_frame[i:i+1]))
        curr_frame = curr_frame[1:]
        curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
    return predicted
    
    
    model.fit(X_train, y_train, batch_size = 512, epochs = 500, verbose=2)
 #plt.plot(model.predict(X_test))
 plt.plot(np.concatenate(predict_sequence_full(model, X_test, window_size = 1)).ravel())
 plt.plot(y_test)
 plt.show()

 def get_timespan(input_data, date, minus, periods, freq='D'):
    return input_data.loc[pd.date_range(date - timedelta(days=minus), periods=periods, freq=freq)]

 def convert_timeseries(input_data, start_date, is_train=True):
    X = pd.DataFrame({"last_1": get_timespan(input_data, start_date, 1, 1).mean().values,
                  "last_2_mean": get_timespan(input_data, start_date, 2, 2).mean().values,#last 2
                  "last_3_mean": get_timespan(input_data, start_date, 3, 3).mean().values,#last 3
                  "last_4_mean": get_timespan(input_data, start_date, 4, 4).mean().values,#last 4 
                  "last_5_mean": get_timespan(input_data, start_date, 5, 5).mean().values,#last 5
                  "last_6_mean": get_timespan(input_data, start_date, 6, 6).mean().values,#last 6
                  "last_7_mean": get_timespan(input_data, start_date, 7, 7).mean().values,#last 7
                  "last_14_mean": get_timespan(input_data, start_date, 14, 14).mean().values})
    for i in range(7):
        X['last_4_dow{}_mean'.format(i)] = get_timespan(input_data, start_date, 28-i, 4, freq='7D').mean().values #get every week last 7
    if is_train:
        y = input_data.loc[pd.date_range(start_date, periods=1)].mean().values
        return X, y
    return X

 def predict_sequence_full(model, data, window_size):
    #Shift the window by 1 new prediction each time, re-run predictions on new window
    curr_frame = data
    predicted = []
    for i in range(len(data)):
        predicted.append(model.predict(curr_frame[i:i+1]))
        curr_frame = curr_frame[1:]
        curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
    return predicted
    
    
  #set up training data
 X_train, y_train = [], []
 for i in range(pd.Timedelta(cut_off - train_start_date).days):
    delta = timedelta(days=i)
    X_tmp, y_tmp = convert_timeseries(df, train_start_date + delta)
    X_train.append(X_tmp)
    y_train.append(y_tmp)
    
 X_train = pd.concat(X_train, axis=0)
 y_train = np.concatenate(y_train, axis=0)

 X_train = X_train.as_matrix()
 X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))

 #set up testing data
 X_test, y_test = [], []
 for i in range(pd.Timedelta(df.index[-1] - cut_off).days):
    delta = timedelta(days=i)
    X_tmp, y_tmp = convert_timeseries(df, cut_off + delta)
    X_test.append(X_tmp)
    y_test.append(y_tmp)
    
 X_test = pd.concat(X_test, axis=0)
 y_test = np.concatenate(y_test, axis=0)

 X_test = X_test.as_matrix()
 X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1])) 


 #model
 model = Sequential()
 model.add(LSTM(32, input_shape=(X_train.shape[1],X_train.shape[2])))
 model.add(Dropout(.1))
 model.add(Dense(8))
 model.add(Dropout(.1))
 model.add(Dense(1))
 model.compile(loss='mse',optimizer=optimizers.SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True))#optimizer='adam', metrics=['mse']
 model.fit(X_train, y_train, batch_size = 512, epochs = 1000, verbose=2)


 prediction['lstm'] = np.concatenate(predict_sequence_full(model, X_test, window_size = 1)).ravel()
 plt.figure(figsize=(12,4))
 plt.plot(train['balance'], label='Train')
 plt.plot(test['balance'], label='Test')
 plt.plot(prediction['lstm'], label='LSTM')
 plt.legend(loc='best')
 plt.show()

 print("#LSTM\nCoefficent:",round(np.corrcoef(test.balance,prediction.lstm)[0,1],4),\
      " RMSE:", round(math.sqrt(mean_squared_error(test.balance, prediction.lstm)),4),\
      " MAPE:", round(mean_absolute_percentage_error(test.balance, prediction.lstm),4))
	import pandas as pd
	import matplotlib.pyplot as plt
	import numpy as np
	import os
	import time
	from numpy import newaxis
	import math
	from sklearn.metrics import mean_squared_error
	import statsmodels.api as sm
	from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt

	def mean_absolute_percentage_error(y_true, y_pred):
	y_true, y_pred = np.array(y_true), np.array(y_pred)
	return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

	#loading data
	df = pd.read_csv('C:/Users/Administrator/Desktop/LSTM/simple.csv')
	df.index = pd.DatetimeIndex(pd.to_datetime(df.date,format='%Y-%m-%d'),freq='D')

	#split train and test
	train_size = round(0.75*len(df))
	train = df[1:train_size]
	test = df[train_size:]

	#http://www.seanabu.com/2016/03/22/time-series-seasonal-ARIMA-model-in-python/
	from statsmodels.tsa.stattools import adfuller
	def test_stationarity(timeseries):

	#Determing rolling statistics
	rolmean = timeseries.rolling(window = 12).mean()
	rolstd = timeseries.rolling(window = 12).std()

	#Plot rolling statistics:
	#fig = plt.figure(figsize=(12, 8))
	orig = plt.plot(timeseries, color='blue',label='Original')
	mean = plt.plot(rolmean, color='red', label='Rolling Mean')
	std = plt.plot(rolstd, color='black', label = 'Rolling Std')
	plt.legend(loc='best')
	plt.title('Rolling Mean & Standard Deviation')
	plt.show()

	#Perform Dickey-Fuller test:
	print('Results of Dickey-Fuller Test:')
	dftest = adfuller(timeseries, autolag='AIC')
	dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
	for key,value in dftest[4].items():
	dfoutput['Critical Value (%s)'%key] = value
	print(dfoutput)

	test_stationarity(df.balance)

	# Method 1 - Baseline using last day value
	prediction = test.copy().drop(['balance'], axis=1)
	prediction['last_day'] = train['balance'][len(train)-1]

	x = 30
	prediction['last_x_day'] = train['balance'].rolling(x).mean().iloc[-1]

	plt.plot(train.index, train['balance'], label='Train')
	plt.plot(test.index,test['balance'], label='Test')
	plt.plot(prediction.index,prediction['last_day'], label='Last Day')
	plt.plot(prediction.index,prediction['last_x_day'], label='Last %d Days'%x)
	plt.legend(loc='best')
	plt.show()

	print("#Last Day\nCoefficent:" ,round(np.corrcoef(test.balance,prediction.last_day)[0,1],4),\
	" RMSE:", round(math.sqrt(mean_squared_error(test.balance, prediction.last_day)),4),\
	" MAPE:", round(mean_absolute_percentage_error(test.balance, prediction.last_day),4))

	print("#Last %d Days\nCoefficent:"%x ,round(np.corrcoef(test.balance,prediction.last_x_day)[0,1],4),\
	" RMSE:", round(math.sqrt(mean_squared_error(test.balance, prediction.last_x_day)),4),\
	" MAPE:", round(mean_absolute_percentage_error(test.balance, prediction.last_x_day),4))

	# Method 2 - capture Seasonality using Holt, ARIMA
	# analysing training data
	sm.tsa.seasonal_decompose(train.balance).plot()
	plt.show()

	#fit and predict
	fit1 = Holt(np.asarray(train['balance'])).fit(smoothing_level = 0.05,smoothing_slope = 0.05)
	prediction['holt_linear'] = fit1.forecast(len(test))
	fit2 = ExponentialSmoothing(np.asarray(train['balance']) ,seasonal_periods=7 ,trend='add', seasonal='add',).fit()
	prediction['holt_winter'] = fit2.forecast(len(test))
	fit3 = sm.tsa.statespace.SARIMAX(train.balance, order=(2, 1, 4),seasonal_order=(0,1,1,7)).fit(maxiter=500)
	prediction['arima'] = fit3.predict(start=test.index[0], end=test.index[-1], dynamic=True)

	plt.plot(train['balance'], label='Train')
	plt.plot(test['balance'], label='Test')
	plt.plot(prediction['holt_linear'], label='Holt Linear')
	plt.plot(prediction['holt_winter'], label='Holt Winter')
	plt.plot(prediction['arima'], label='ARIMA')
	plt.legend(loc='best')
	plt.show()

	print("#Holt Linear\nCoefficent:",round(np.corrcoef(test.balance,prediction.holt_linear)[0,1],4),\
	" RMSE:", round(math.sqrt(mean_squared_error(test.balance, prediction.holt_linear)),4),\
	" MAPE:", round(mean_absolute_percentage_error(test.balance, prediction.holt_linear),4))

	print("#Holt Winter\nCoefficent:",round(np.corrcoef(test.balance,prediction.holt_winter)[0,1],4),\
	" RMSE:", round(math.sqrt(mean_squared_error(test.balance, prediction.holt_winter)),4),\
	" MAPE:", round(mean_absolute_percentage_error(test.balance, prediction.holt_winter),4))

	print("#ARIMA\nCoefficent:",round(np.corrcoef(test.balance,prediction.arima)[0,1],4),\
	" RMSE:", round(math.sqrt(mean_squared_error(test.balance, prediction.arima)),4),\
	" MAPE:", round(mean_absolute_percentage_error(test.balance, prediction.arima),4))

	import Prophet from fbprophet
	m = Prophet()
	m.fit(pd.DataFrame({'ds':train.date, 'y':train.balance}))
	forecast = m.predict(m.make_future_dataframe(periods=len(test)))
	prediction['prophet'] = forecast['yhat']

	#http://pythondata.com/forecasting-time-series-data-with-prophet-part-1/
	# Method 3 - Facebook Prophet
	forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

	type(forecast.yhat.index)
	type(train.balance.index)
	forecast = forecast.set_index(pd.DatetimeIndex(forecast['ds'],freq = 'D'))

	plt.figure(figsize=(16,8))
	plt.plot( train['balance'], label='Train')
	plt.plot(test['balance'], label='Test')
	plt.plot(forecast.yhat[-35:-1], label='PROPHET')
	plt.legend(loc='best')
	plt.show()

	m = Prophet()
	m.fit(pd.DataFrame({'ds':train.date, 'y':train.balance}))
	forecast = m.predict(m.make_future_dataframe(periods=len(test)))
	forecast = forecast.set_index(pd.DatetimeIndex(forecast['ds'],freq = 'D'))
	prediction['prophet'] = forecast['yhat']

	plt.figure(figsize=(16,8))
	plt.plot(train['balance'], label='Train')
	plt.plot(test['balance'], label='Test')
	plt.plot(prediction['prophet'], label='Prophet')
	plt.legend(loc='best')
	plt.show()

	print("#Prophet\nCoefficent:",round(np.corrcoef(test.balance,prediction.prophet)[0,1],4),\
	" RMSE:", round(math.sqrt(mean_squared_error(test.balance, prediction.prophet)),4),\
	" MAPE:", round(mean_absolute_percentage_error(test.balance, prediction.prophet),4))


	from datetime import date, timedelta
	import pandas as pd
	import numpy as np
	from sklearn.metrics import mean_squared_error
	from keras.models import Sequential
	from keras.layers.core import Dense, Dropout, Activation
	from keras.layers import LSTM
	from keras import callbacks
	from keras.callbacks import ModelCheckpoint
	from keras import optimizers
	from sklearn.preprocessing import MinMaxScaler
	import matplotlib.pyplot as plt
	%matplotlib inline

	#loading the data
	data = pd.read_csv("C:/Users/eye1/Desktop/sett_data.csv",parse_dates=["date"])
	scaler = MinMaxScaler(feature_range=(0, 1))
	data['balance'] = scaler.fit_transform(data['balance'].values.reshape(-1,1))
	data = data.set_index(pd.DatetimeIndex(data['date'],freq = 'D')).drop(['date'], axis=1)



	def get_timespan(input_data, date, minus, periods, freq='D'):
	return input_data.loc[pd.date_range(date - timedelta(days=minus), periods=periods, freq=freq)]

	train_start_date = pd.datetime(2017, 6, 10)

	def convert_timeseries(data, start_date, is_train=True):
	X = pd.DataFrame({"last_1": get_timespan(data, start_date, 1, 1).values.ravel(),
	"last_3_mean": get_timespan(data, start_date, 3, 3).mean().values,#last 3
	"last_5_mean": get_timespan(data, start_date, 5, 5).mean().values,#last 5
	"last_7_mean": get_timespan(data, start_date, 7, 7).mean().values,#last 5
	"last_14_mean": get_timespan(data, start_date, 14, 14).mean().values})
	for i in range(7):
	X['last_4_dow{}_mean'.format(i)] = get_timespan(data, start_date, 28-i, 4, freq='7D').mean().values #get every week last 7
	if is_train:
	y = data.loc[pd.date_range(start_date, periods=1)].values
	return X, y
	return X

	#choose cut off for train and test
	cut_off = pd.datetime(2018, 2, 1)

	#set up training data
	X_train, y_train = [], []
	for i in range(pd.Timedelta(cut_off - train_start_date).days):
	delta = timedelta(days=i)
	X_tmp, y_tmp = convert_timeseries(data, train_start_date + delta)
	X_train.append(X_tmp)
	y_train.append(y_tmp)

	X_train = pd.concat(X_train, axis=0)
	y_train = np.concatenate(y_train, axis=0)

	X_train = X_train.as_matrix()
	X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))


	X_test, y_test = [], []
	for i in range(pd.Timedelta(data.index[-2] - cut_off).days):
	delta = timedelta(days=i)
	X_tmp, y_tmp = convert_timeseries(data, cut_off + delta)
	X_test.append(X_tmp)
	y_test.append(y_tmp)

	X_test = pd.concat(X_test, axis=0)
	y_test = np.concatenate(y_test, axis=0)

	X_test = X_test.as_matrix()
	X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

	#model
	model = Sequential()
	model.add(LSTM(32, input_shape=(X_train.shape[1],X_train.shape[2])))
	model.add(Dropout(.1))
	model.add(Dense(8))
	model.add(Dropout(.1))
	model.add(Dense(1))
	#model.compile(loss = 'mse', optimizer='adam', metrics=['mse'])
	model.compile(loss='mse',optimizer=optimizers.SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True))

	from numpy import newaxis
	def predict_sequence_full(model, data, window_size):
	#Shift the window by 1 new prediction each time, re-run predictions on new window
	curr_frame = data
	predicted = []
	for i in range(len(data)):
	predicted.append(model.predict(curr_frame[i:i+1]))
	curr_frame = curr_frame[1:]
	curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
	return predicted


	model.fit(X_train, y_train, batch_size = 512, epochs = 500, verbose=2)
	#plt.plot(model.predict(X_test))
	plt.plot(np.concatenate(predict_sequence_full(model, X_test, window_size = 1)).ravel())
	plt.plot(y_test)
	plt.show()

	def get_timespan(input_data, date, minus, periods, freq='D'):
	return input_data.loc[pd.date_range(date - timedelta(days=minus), periods=periods, freq=freq)]

	def convert_timeseries(input_data, start_date, is_train=True):
	X = pd.DataFrame({"last_1": get_timespan(input_data, start_date, 1, 1).mean().values,
	"last_2_mean": get_timespan(input_data, start_date, 2, 2).mean().values,#last 2
	"last_3_mean": get_timespan(input_data, start_date, 3, 3).mean().values,#last 3
	"last_4_mean": get_timespan(input_data, start_date, 4, 4).mean().values,#last 4
	"last_5_mean": get_timespan(input_data, start_date, 5, 5).mean().values,#last 5
	"last_6_mean": get_timespan(input_data, start_date, 6, 6).mean().values,#last 6
	"last_7_mean": get_timespan(input_data, start_date, 7, 7).mean().values,#last 7
	"last_14_mean": get_timespan(input_data, start_date, 14, 14).mean().values})
	for i in range(7):
	X['last_4_dow{}_mean'.format(i)] = get_timespan(input_data, start_date, 28-i, 4, freq='7D').mean().values #get every week last 7
	if is_train:
	y = input_data.loc[pd.date_range(start_date, periods=1)].mean().values
	return X, y
	return X

	def predict_sequence_full(model, data, window_size):
	#Shift the window by 1 new prediction each time, re-run predictions on new window
	curr_frame = data
	predicted = []
	for i in range(len(data)):
	predicted.append(model.predict(curr_frame[i:i+1]))
	curr_frame = curr_frame[1:]
	curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
	return predicted


	#set up training data
	X_train, y_train = [], []
	for i in range(pd.Timedelta(cut_off - train_start_date).days):
	delta = timedelta(days=i)
	X_tmp, y_tmp = convert_timeseries(df, train_start_date + delta)
	X_train.append(X_tmp)
	y_train.append(y_tmp)

	X_train = pd.concat(X_train, axis=0)
	y_train = np.concatenate(y_train, axis=0)

	X_train = X_train.as_matrix()
	X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))

	#set up testing data
	X_test, y_test = [], []
	for i in range(pd.Timedelta(df.index[-1] - cut_off).days):
	delta = timedelta(days=i)
	X_tmp, y_tmp = convert_timeseries(df, cut_off + delta)
	X_test.append(X_tmp)
	y_test.append(y_tmp)

	X_test = pd.concat(X_test, axis=0)
	y_test = np.concatenate(y_test, axis=0)

	X_test = X_test.as_matrix()
	X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))


	#model
	model = Sequential()
	model.add(LSTM(32, input_shape=(X_train.shape[1],X_train.shape[2])))
	model.add(Dropout(.1))
	model.add(Dense(8))
	model.add(Dropout(.1))
	model.add(Dense(1))
	model.compile(loss='mse',optimizer=optimizers.SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True))#optimizer='adam', metrics=['mse']
	model.fit(X_train, y_train, batch_size = 512, epochs = 1000, verbose=2)


	prediction['lstm'] = np.concatenate(predict_sequence_full(model, X_test, window_size = 1)).ravel()
	plt.figure(figsize=(12,4))
	plt.plot(train['balance'], label='Train')
	plt.plot(test['balance'], label='Test')
	plt.plot(prediction['lstm'], label='LSTM')
	plt.legend(loc='best')
	plt.show()

	print("#LSTM\nCoefficent:",round(np.corrcoef(test.balance,prediction.lstm)[0,1],4),\
	" RMSE:", round(math.sqrt(mean_squared_error(test.balance, prediction.lstm)),4),\
	" MAPE:", round(mean_absolute_percentage_error(test.balance, prediction.lstm),4))