RafaelCosman · June 20, 2016 02:15
diff --git a/Rafael's library of TF helper functions.py b/Rafael's library of TF helper functions.py
 ### From https://gist.github.com/RafaelCosman/9efc8181d37cdfdaaca70907bcd97372 ###

 import tensorflow as tf

 import numpy as np
 np.set_printoptions(precision=3)

 sess = tf.InteractiveSession()
    
 # use the following to import MNIST:
 # from tensorflow.examples.tutorials.mnist import input_data
 # mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)


 import matplotlib
 matplotlib.use("Pdf")
 import matplotlib.pyplot as plt
 %matplotlib inline
 # use the following to display an array inline in Jupyter:
 # data = np.random.random([100, 100])
 # plt.imshow(data, interpolation='nearest')

 def fc(input_tensor, in_size, out_size, collection=None, non_linearity=tf.nn.sigmoid):
    
    collections = [tf.GraphKeys.VARIABLES, tf.GraphKeys.TRAINABLE_VARIABLES]
    if collection is not None:
        collections.append(collection)
        
    W = tf.get_variable(
        name="W",
        initializer=tf.truncated_normal(
            shape=[in_size, out_size],
            mean=0.0,
            stddev=0.1
        ),
        collections=collections
    )
    
    b = tf.get_variable(
        name="b",
        initializer=tf.constant(
            value=0.1,
            shape=[out_size]
        ),
        collections=collections
    )
    
    return non_linearity( tf.matmul(input_tensor, W) + b )

 # fc test
 # fc_test_template = tf.make_template("fc_test_template", fc, in_size=2, out_size=2)
 # op = fc_test_template([[0., 1.]])
 # tf.initialize_all_variables().run()
 # print op.eval()

 def fc_stack(input_tensor, list_of_sizes, collection=None):
    result = input_tensor
    
    for layer_index in xrange(len(list_of_sizes)-1):

        with tf.variable_scope("layer"+str(layer_index)):
        
            if layer_index == len(list_of_sizes)-2:
                non_linearity = tf.identity
            else:
                non_linearity = tf.nn.sigmoid
        
            result = fc(
                result,        
                in_size=list_of_sizes[layer_index],
                out_size=list_of_sizes[layer_index+1],
                collection=collection,
                non_linearity=non_linearity
            )
        
    return result

 # fc_stack test
 # fc_stack_test_template = tf.make_template("fc_stack_test_template", fc_stack, list_of_sizes=[2, 3, 2])
 # op = fc_stack_test_template([[0., 1.]])
 # tf.initialize_all_variables().run()
 # print op.eval()

 def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

 def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

 def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)

 def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)

 def one_hot(index, name="one_hot"):
    return tf.one_hot(indices=index, depth=env.observation_space.n, on_value=1, off_value=0, axis=None, name=name).eval()

 def getFeedDict(observation):
    if type(env.observation_space) == gym.spaces.discrete.Discrete:
        return {x:[one_hot(observation)]}
    elif type(env.observation_space) == gym.spaces.box.Box:
        return {x:[observation]}
    else:
        print "ERR"
        
 class InitializeNewVariables:
    def __enter__(self):
        self.temp = set(tf.all_variables())
    def __exit__(self, type, value, traceback):
        sess.run(tf.initialize_variables(set(tf.all_variables()) - self.temp))

 def l2_dist(a, b):
    return tf.nn.l2_loss( tf.sub(a, b) )

 def batch_norm(x):
    mean, variance = tf.nn.moments(x, axes=[0], keep_dims=True)
    return tf.nn.batch_normalization(x, mean, variance, offset=None, scale=None, variance_epsilon=0.01)

 def l1_penalty():
    penalty = tf.constant([0.])
    for variable in tf.get_collection("variables"):
        penalty += tf.reduce_sum(tf.abs(variable))
    return penalty
 # print l1_penalty().eval()

 def l2_penalty():
    penalty = tf.constant([0.])
    for variable in tf.get_collection("variables"):
        penalty += tf.sqrt(tf.reduce_sum(tf.square(variable)))
    return penalty
 # print l2_penalty().eval()

 ### OpenAI Gym specific stuff ###
 def bound_to_env_action_space(my_action):
    my_action = tf.maximum(my_action, env.action_space.low)
    my_action = tf.minimum(my_action, env.action_space.high)
    return my_action
    
 def bound_to_env_observation_space(my_observation):
    my_observation = tf.maximum(my_observation, env.observation_space.low)
    my_observation = tf.minimum(my_observation, env.observation_space.high)
    return my_observation
	### From https://gist.github.com/RafaelCosman/9efc8181d37cdfdaaca70907bcd97372 ###

	import tensorflow as tf

	import numpy as np
	np.set_printoptions(precision=3)

	sess = tf.InteractiveSession()

	# use the following to import MNIST:
	# from tensorflow.examples.tutorials.mnist import input_data
	# mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)


	import matplotlib
	matplotlib.use("Pdf")
	import matplotlib.pyplot as plt
	%matplotlib inline
	# use the following to display an array inline in Jupyter:
	# data = np.random.random([100, 100])
	# plt.imshow(data, interpolation='nearest')

	def fc(input_tensor, in_size, out_size, collection=None, non_linearity=tf.nn.sigmoid):

	collections = [tf.GraphKeys.VARIABLES, tf.GraphKeys.TRAINABLE_VARIABLES]
	if collection is not None:
	collections.append(collection)

	W = tf.get_variable(
	name="W",
	initializer=tf.truncated_normal(
	shape=[in_size, out_size],
	mean=0.0,
	stddev=0.1
	),
	collections=collections
	)

	b = tf.get_variable(
	name="b",
	initializer=tf.constant(
	value=0.1,
	shape=[out_size]
	),
	collections=collections
	)

	return non_linearity( tf.matmul(input_tensor, W) + b )

	# fc test
	# fc_test_template = tf.make_template("fc_test_template", fc, in_size=2, out_size=2)
	# op = fc_test_template([[0., 1.]])
	# tf.initialize_all_variables().run()
	# print op.eval()

	def fc_stack(input_tensor, list_of_sizes, collection=None):
	result = input_tensor

	for layer_index in xrange(len(list_of_sizes)-1):

	with tf.variable_scope("layer"+str(layer_index)):

	if layer_index == len(list_of_sizes)-2:
	non_linearity = tf.identity
	else:
	non_linearity = tf.nn.sigmoid

	result = fc(
	result,
	in_size=list_of_sizes[layer_index],
	out_size=list_of_sizes[layer_index+1],
	collection=collection,
	non_linearity=non_linearity
	)

	return result

	# fc_stack test
	# fc_stack_test_template = tf.make_template("fc_stack_test_template", fc_stack, list_of_sizes=[2, 3, 2])
	# op = fc_stack_test_template([[0., 1.]])
	# tf.initialize_all_variables().run()
	# print op.eval()

	def conv2d(x, W):
	return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

	def max_pool_2x2(x):
	return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

	def weight_variable(shape):
	initial = tf.truncated_normal(shape, stddev=0.1)
	return tf.Variable(initial)

	def bias_variable(shape):
	initial = tf.constant(0.1, shape=shape)
	return tf.Variable(initial)

	def one_hot(index, name="one_hot"):
	return tf.one_hot(indices=index, depth=env.observation_space.n, on_value=1, off_value=0, axis=None, name=name).eval()

	def getFeedDict(observation):
	if type(env.observation_space) == gym.spaces.discrete.Discrete:
	return {x:[one_hot(observation)]}
	elif type(env.observation_space) == gym.spaces.box.Box:
	return {x:[observation]}
	else:
	print "ERR"

	class InitializeNewVariables:
	def __enter__(self):
	self.temp = set(tf.all_variables())
	def __exit__(self, type, value, traceback):
	sess.run(tf.initialize_variables(set(tf.all_variables()) - self.temp))

	def l2_dist(a, b):
	return tf.nn.l2_loss( tf.sub(a, b) )

	def batch_norm(x):
	mean, variance = tf.nn.moments(x, axes=[0], keep_dims=True)
	return tf.nn.batch_normalization(x, mean, variance, offset=None, scale=None, variance_epsilon=0.01)

	def l1_penalty():
	penalty = tf.constant([0.])
	for variable in tf.get_collection("variables"):
	penalty += tf.reduce_sum(tf.abs(variable))
	return penalty
	# print l1_penalty().eval()

	def l2_penalty():
	penalty = tf.constant([0.])
	for variable in tf.get_collection("variables"):
	penalty += tf.sqrt(tf.reduce_sum(tf.square(variable)))
	return penalty
	# print l2_penalty().eval()

	### OpenAI Gym specific stuff ###
	def bound_to_env_action_space(my_action):
	my_action = tf.maximum(my_action, env.action_space.low)
	my_action = tf.minimum(my_action, env.action_space.high)
	return my_action

	def bound_to_env_observation_space(my_observation):
	my_observation = tf.maximum(my_observation, env.observation_space.low)
	my_observation = tf.minimum(my_observation, env.observation_space.high)
	return my_observation