dmus · October 19, 2016 12:46
diff --git a/train.lua b/train.lua
 require 'nn'
 require 'torch'
 require 'rnn'
 require 'gnuplot'
 require 'optim'
 --debugger = require('fb.debugger')

 no_param = 1 -- number of parameters in one element. In case of x->sin(x) this is 1.
 seq_length = 50 -- length of the sequence (number of Tensors in the table fed to nn.Sequencer())
 batch_size = 10 -- size of the batches
 max_epochs = 500 -- maximum epochs

 lstm = nn.Sequencer(
   nn.Sequential()
      :add(nn.FastLSTM(no_param, 100))
      :add(nn.FastLSTM(100, 100))
      :add(nn.Linear(100,no_param))
      :add(nn.Tanh())
   )

 criterion = nn.SequencerCriterion(nn.MSECriterion())

 print(lstm)
 print(criterion)

 -- the input data is: x[i]=i (i=1..1000) (0.017....17.4707) rad
 data = torch.Tensor(1001):range(1,1001):apply(math.rad):apply(math.sin)

 input = data[{{1,1000}}]
 target = data[{{2,1001}}]

 -- setting up training mode
 lstm:training()

 params, gradParams = lstm:getParameters()

 feval = function(params_new)
 	-- reset gradients (gradients are always accumulated, to accommodate batch methods)
 	gradParams:zero()

 	-- evaluate the loss function and its derivative with respect to x, given a mini batch
 	local prediction = lstm:forward(inputs)

 	local loss_x = criterion:forward(prediction, targets)
    local dl_doutput = criterion:backward(prediction, targets)
 	lstm:backward(inputs, dl_doutput)

 	return loss_x / (seq_length*batch_size), gradParams
 end

 optim_config = {
   learningRate = 1e-2,
   --learningRateDecay = 1e-4,
   --weightDecay = 0,
   momentum = 0.9
 }

 for epoch = 1,max_epochs do
    errsum = 0
    -- Shuffling for better generalized training
    shuffle = torch.randperm(input:size(1)-seq_length)
    
    -- learning rate decay
    if epoch % 15 == 0 then
        local old_lr = optim_config.learningRate
        optim_config = {learningRate = old_lr * 0.5}
    end

    for i = 1,5 do
        num_samples = 5
        inputs = torch.Tensor(seq_length, num_samples, 1)
        targets = torch.Tensor(seq_length, num_samples, 1)
    
 	    -- Fill the batch with sequences of length seq_length with random offsets
        for k = 1, num_samples do  
            inputs[{{}, {k}}] = input[{{shuffle[k], shuffle[k]+seq_length-1}}]
            targets[{{}, {k}}] = target[{{shuffle[k], shuffle[k]+seq_length-1}}]
        end
    
 	    _, fs = optim.adam(feval, params, optim_config)
 	    errsum = errsum + fs[1]
    end
    print("Epoch: " .. epoch ..", error: " ..errsum)
 end

 torch.save('rnn_sine.t7', lstm)
	require 'nn'
	require 'torch'
	require 'rnn'
	require 'gnuplot'
	require 'optim'
	--debugger = require('fb.debugger')

	no_param = 1 -- number of parameters in one element. In case of x->sin(x) this is 1.
	seq_length = 50 -- length of the sequence (number of Tensors in the table fed to nn.Sequencer())
	batch_size = 10 -- size of the batches
	max_epochs = 500 -- maximum epochs

	lstm = nn.Sequencer(
	nn.Sequential()
	:add(nn.FastLSTM(no_param, 100))
	:add(nn.FastLSTM(100, 100))
	:add(nn.Linear(100,no_param))
	:add(nn.Tanh())
	)

	criterion = nn.SequencerCriterion(nn.MSECriterion())

	print(lstm)
	print(criterion)

	-- the input data is: x[i]=i (i=1..1000) (0.017....17.4707) rad
	data = torch.Tensor(1001):range(1,1001):apply(math.rad):apply(math.sin)

	input = data[{{1,1000}}]
	target = data[{{2,1001}}]

	-- setting up training mode
	lstm:training()

	params, gradParams = lstm:getParameters()

	feval = function(params_new)
	-- reset gradients (gradients are always accumulated, to accommodate batch methods)
	gradParams:zero()

	-- evaluate the loss function and its derivative with respect to x, given a mini batch
	local prediction = lstm:forward(inputs)

	local loss_x = criterion:forward(prediction, targets)
	local dl_doutput = criterion:backward(prediction, targets)
	lstm:backward(inputs, dl_doutput)

	return loss_x / (seq_length*batch_size), gradParams
	end

	optim_config = {
	learningRate = 1e-2,
	--learningRateDecay = 1e-4,
	--weightDecay = 0,
	momentum = 0.9
	}

	for epoch = 1,max_epochs do
	errsum = 0
	-- Shuffling for better generalized training
	shuffle = torch.randperm(input:size(1)-seq_length)

	-- learning rate decay
	if epoch % 15 == 0 then
	local old_lr = optim_config.learningRate
	optim_config = {learningRate = old_lr * 0.5}
	end

	for i = 1,5 do
	num_samples = 5
	inputs = torch.Tensor(seq_length, num_samples, 1)
	targets = torch.Tensor(seq_length, num_samples, 1)

	-- Fill the batch with sequences of length seq_length with random offsets
	for k = 1, num_samples do
	inputs[{{}, {k}}] = input[{{shuffle[k], shuffle[k]+seq_length-1}}]
	targets[{{}, {k}}] = target[{{shuffle[k], shuffle[k]+seq_length-1}}]
	end

	_, fs = optim.adam(feval, params, optim_config)
	errsum = errsum + fs[1]
	end
	print("Epoch: " .. epoch ..", error: " ..errsum)
	end

	torch.save('rnn_sine.t7', lstm)