Last active
June 20, 2016 02:15
-
-
Save RafaelCosman/9efc8181d37cdfdaaca70907bcd97372 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### From https://gist.github.com/RafaelCosman/9efc8181d37cdfdaaca70907bcd97372 ### | |
import tensorflow as tf | |
import numpy as np | |
np.set_printoptions(precision=3) | |
sess = tf.InteractiveSession() | |
# use the following to import MNIST: | |
# from tensorflow.examples.tutorials.mnist import input_data | |
# mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) | |
import matplotlib | |
matplotlib.use("Pdf") | |
import matplotlib.pyplot as plt | |
%matplotlib inline | |
# use the following to display an array inline in Jupyter: | |
# data = np.random.random([100, 100]) | |
# plt.imshow(data, interpolation='nearest') | |
def fc(input_tensor, in_size, out_size, collection=None, non_linearity=tf.nn.sigmoid): | |
collections = [tf.GraphKeys.VARIABLES, tf.GraphKeys.TRAINABLE_VARIABLES] | |
if collection is not None: | |
collections.append(collection) | |
W = tf.get_variable( | |
name="W", | |
initializer=tf.truncated_normal( | |
shape=[in_size, out_size], | |
mean=0.0, | |
stddev=0.1 | |
), | |
collections=collections | |
) | |
b = tf.get_variable( | |
name="b", | |
initializer=tf.constant( | |
value=0.1, | |
shape=[out_size] | |
), | |
collections=collections | |
) | |
return non_linearity( tf.matmul(input_tensor, W) + b ) | |
# fc test | |
# fc_test_template = tf.make_template("fc_test_template", fc, in_size=2, out_size=2) | |
# op = fc_test_template([[0., 1.]]) | |
# tf.initialize_all_variables().run() | |
# print op.eval() | |
def fc_stack(input_tensor, list_of_sizes, collection=None): | |
result = input_tensor | |
for layer_index in xrange(len(list_of_sizes)-1): | |
with tf.variable_scope("layer"+str(layer_index)): | |
if layer_index == len(list_of_sizes)-2: | |
non_linearity = tf.identity | |
else: | |
non_linearity = tf.nn.sigmoid | |
result = fc( | |
result, | |
in_size=list_of_sizes[layer_index], | |
out_size=list_of_sizes[layer_index+1], | |
collection=collection, | |
non_linearity=non_linearity | |
) | |
return result | |
# fc_stack test | |
# fc_stack_test_template = tf.make_template("fc_stack_test_template", fc_stack, list_of_sizes=[2, 3, 2]) | |
# op = fc_stack_test_template([[0., 1.]]) | |
# tf.initialize_all_variables().run() | |
# print op.eval() | |
def conv2d(x, W): | |
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') | |
def max_pool_2x2(x): | |
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') | |
def weight_variable(shape): | |
initial = tf.truncated_normal(shape, stddev=0.1) | |
return tf.Variable(initial) | |
def bias_variable(shape): | |
initial = tf.constant(0.1, shape=shape) | |
return tf.Variable(initial) | |
def one_hot(index, name="one_hot"): | |
return tf.one_hot(indices=index, depth=env.observation_space.n, on_value=1, off_value=0, axis=None, name=name).eval() | |
def getFeedDict(observation): | |
if type(env.observation_space) == gym.spaces.discrete.Discrete: | |
return {x:[one_hot(observation)]} | |
elif type(env.observation_space) == gym.spaces.box.Box: | |
return {x:[observation]} | |
else: | |
print "ERR" | |
class InitializeNewVariables: | |
def __enter__(self): | |
self.temp = set(tf.all_variables()) | |
def __exit__(self, type, value, traceback): | |
sess.run(tf.initialize_variables(set(tf.all_variables()) - self.temp)) | |
def l2_dist(a, b): | |
return tf.nn.l2_loss( tf.sub(a, b) ) | |
def batch_norm(x): | |
mean, variance = tf.nn.moments(x, axes=[0], keep_dims=True) | |
return tf.nn.batch_normalization(x, mean, variance, offset=None, scale=None, variance_epsilon=0.01) | |
def l1_penalty(): | |
penalty = tf.constant([0.]) | |
for variable in tf.get_collection("variables"): | |
penalty += tf.reduce_sum(tf.abs(variable)) | |
return penalty | |
# print l1_penalty().eval() | |
def l2_penalty(): | |
penalty = tf.constant([0.]) | |
for variable in tf.get_collection("variables"): | |
penalty += tf.sqrt(tf.reduce_sum(tf.square(variable))) | |
return penalty | |
# print l2_penalty().eval() | |
### OpenAI Gym specific stuff ### | |
def bound_to_env_action_space(my_action): | |
my_action = tf.maximum(my_action, env.action_space.low) | |
my_action = tf.minimum(my_action, env.action_space.high) | |
return my_action | |
def bound_to_env_observation_space(my_observation): | |
my_observation = tf.maximum(my_observation, env.observation_space.low) | |
my_observation = tf.minimum(my_observation, env.observation_space.high) | |
return my_observation |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment