# 6.1 Example: Learning XOR - GBC Book - Chapter 6 - pp. 166 to 171 # Some parts are inspired by the blog post # Solving XOR with a Neural Network in TensorFlow # by Stephen OMAN # https://github.com/StephenOman/TensorFlowExamples/blob/master/xor%20nn/xor_nn.py # Activation RELU + sigmoid for binary classification output + MSE loss function import tensorflow as tf import time import numpy as np # The next step is to set up placeholders to hold the input data. # TensorFlow will automatically fill them with the data when we run the network # In our XOR problem, we have 4 different training examples and each example has 2 features. - X => [4,2] # There are also 4 expected outputs, each with just 1 value (either a 0 or 1) - Y => [4,1] X = tf.placeholder(tf.float32, shape=[4,2], name = 'X') Y = tf.placeholder(tf.float32, shape=[4,1], name = 'Y') # Set up the parameters for the network. These are called Variables in TensorFlow. # Variables will be modified by TensorFlow during the training steps # Weights W = tf.Variable(tf.truncated_normal([2,2]), name = "W") # random values from a truncated normal distribution with [2,2] shape (mean = 0, std = 1). w = tf.Variable(tf.truncated_normal([2,1]), name = "w") # random values from a truncated normal distribution with [2,1] shape(mean = 0, std = 1). # Biases c = tf.Variable(tf.zeros([4,2]), name = "c") # zeros with [4,2] shape b = tf.Variable(tf.zeros([4,1]), name = "b") # zeros with [4,1] shape # Set up the model with RELU activation functions = X*W + c with tf.name_scope("hidden_layer") as scope: h = tf.nn.relu(tf.add(tf.matmul(X, W),c)) # Set the output with sigmoid function = h*w + b with tf.name_scope("output") as scope: y_estimated = tf.sigmoid(tf.add(tf.matmul(h,w),b)) # Cost function is # tf.reduce_mean Computes the mean of elements across dimensions of a tensor with tf.name_scope("loss") as scope: loss = tf.reduce_mean(tf.squared_difference(y_estimated, Y)) # For better result with binary classifier, use cross entropy with a sigmoid # loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_estimated, labels=Y) # A naive direct implementation of the loss function # n_instances = X.get_shape().as_list()[0] # loss = tf.reduce_sum(tf.pow(y_estimated - Y, 2))/ n_instances # In case of problem with gradient (exploding or vanishing gradient)perform gradient clipping # n_instances = X.get_shape().as_list()[0] # loss = tf.reduce_sum(tf.pow(tf.clip_by_value(y_estimated,1e-10,1.0) - Y,2))/(n_instances) # TensorFlow ships with several different training algorithms # We are going to use the gradient descent algorithm: # 0.01 is the learning rate, and the goal is to minimize the loss with tf.name_scope("train") as scope: train_step = tf.train.GradientDescentOptimizer(0.01).minimize(loss) # Initialization step INPUT_XOR = [[0,0],[0,1],[1,0],[1,1]] OUTPUT_XOR = [[0],[1],[1],[0]] # initializes global variables in the graph init = tf.global_variables_initializer() # A Session object encapsulates the environment in which Operation objects are executed, and Tensor objects are evaluated sess = tf.Session() # write a graph log, which can be visualized trough Tensorboard (tensor_XOR>python -m tensorboard.main --logdir=\logs\xor_logs) # and a browser using URL: localhost:6006 writer = tf.summary.FileWriter("./logs/xor_logs", sess.graph) # TensorFlow runs a model inside a session, which it uses to maintain the state of the variables as they are passed through the network we have set up. # So the first step in that session is to initialise all the Variables from above. # This step allocates values to the various Variables in accordance with how we set them up sess.run(init) t_start = time.clock() for epoch in range(100001): sess.run(train_step, feed_dict={X: INPUT_XOR, Y: OUTPUT_XOR}) if epoch % 10000 == 0: # Each time the training step is executed, the values in the dictionary feed_dict are loaded into the placeholders that we set up at the beginning. # As the XOR problem is relatively simple, each epoch will contain the entire training set INPUT_XOR print("_"*80) print('Epoch: ', epoch) print(' y_estimated: ') for element in sess.run(y_estimated, feed_dict={X: INPUT_XOR, Y: OUTPUT_XOR}): print(' ',element) print(' W: ') for element in sess.run(W): print(' ',element) print(' c: ') for element in sess.run(c): print(' ',element) print(' w: ') for element in sess.run(w): print(' ',element) print(' b ') for element in sess.run(b): print(' ',element) print(' loss: ', sess.run(loss, feed_dict={X: INPUT_XOR, Y: OUTPUT_XOR})) t_end = time.clock() print("_"*80) print('Elapsed time ', t_end - t_start)