Created
October 19, 2016 12:46
-
-
Save dmus/b7167bdbeee135fa4ea420f074649676 to your computer and use it in GitHub Desktop.
LSTM train
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'nn' | |
require 'torch' | |
require 'rnn' | |
require 'gnuplot' | |
require 'optim' | |
--debugger = require('fb.debugger') | |
no_param = 1 -- number of parameters in one element. In case of x->sin(x) this is 1. | |
seq_length = 50 -- length of the sequence (number of Tensors in the table fed to nn.Sequencer()) | |
batch_size = 10 -- size of the batches | |
max_epochs = 500 -- maximum epochs | |
lstm = nn.Sequencer( | |
nn.Sequential() | |
:add(nn.FastLSTM(no_param, 100)) | |
:add(nn.FastLSTM(100, 100)) | |
:add(nn.Linear(100,no_param)) | |
:add(nn.Tanh()) | |
) | |
criterion = nn.SequencerCriterion(nn.MSECriterion()) | |
print(lstm) | |
print(criterion) | |
-- the input data is: x[i]=i (i=1..1000) (0.017....17.4707) rad | |
data = torch.Tensor(1001):range(1,1001):apply(math.rad):apply(math.sin) | |
input = data[{{1,1000}}] | |
target = data[{{2,1001}}] | |
-- setting up training mode | |
lstm:training() | |
params, gradParams = lstm:getParameters() | |
feval = function(params_new) | |
-- reset gradients (gradients are always accumulated, to accommodate batch methods) | |
gradParams:zero() | |
-- evaluate the loss function and its derivative with respect to x, given a mini batch | |
local prediction = lstm:forward(inputs) | |
local loss_x = criterion:forward(prediction, targets) | |
local dl_doutput = criterion:backward(prediction, targets) | |
lstm:backward(inputs, dl_doutput) | |
return loss_x / (seq_length*batch_size), gradParams | |
end | |
optim_config = { | |
learningRate = 1e-2, | |
--learningRateDecay = 1e-4, | |
--weightDecay = 0, | |
momentum = 0.9 | |
} | |
for epoch = 1,max_epochs do | |
errsum = 0 | |
-- Shuffling for better generalized training | |
shuffle = torch.randperm(input:size(1)-seq_length) | |
-- learning rate decay | |
if epoch % 15 == 0 then | |
local old_lr = optim_config.learningRate | |
optim_config = {learningRate = old_lr * 0.5} | |
end | |
for i = 1,5 do | |
num_samples = 5 | |
inputs = torch.Tensor(seq_length, num_samples, 1) | |
targets = torch.Tensor(seq_length, num_samples, 1) | |
-- Fill the batch with sequences of length seq_length with random offsets | |
for k = 1, num_samples do | |
inputs[{{}, {k}}] = input[{{shuffle[k], shuffle[k]+seq_length-1}}] | |
targets[{{}, {k}}] = target[{{shuffle[k], shuffle[k]+seq_length-1}}] | |
end | |
_, fs = optim.adam(feval, params, optim_config) | |
errsum = errsum + fs[1] | |
end | |
print("Epoch: " .. epoch ..", error: " ..errsum) | |
end | |
torch.save('rnn_sine.t7', lstm) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment