QBoard » Artificial Intelligence & ML » AI and ML - Tensorflow » How to properly apply stochastic gradient descent to simple neural network?

How to properly apply stochastic gradient descent to simple neural network?

  •  

    Im trying to write NN from scratch to better understand what is going on underneath Keras API. And now I have got problem with applying gradinets to my loss. I believe there is some problem with architecture and my understanding of TF. Basically line with grads returns Nones and hence code returns:

    ValueError: No gradients provided for any variable: ['Variable:0', 'Variable:0', 'Variable:0', 'Variable:0', 'Variable:0', 'Variable:0'].
    

    Here is my code. Its very straightforward but I want to do it simply before warping it into model class.

    input_shape = x_train.shape[1]
    n_hidden_1 = 32
    n_hidden_2 = 8
    output_shape = 1
    epochs = 1
    
    W_1 = tf.Variable(tf.random.normal([input_shape,n_hidden_1]))
    W_2 = tf.Variable(tf.random.normal([n_hidden_1,n_hidden_2]))
    W_output = tf.Variable(tf.random.normal([n_hidden_2,output_shape]))
        
    B_1 = tf.Variable(tf.random.normal([n_hidden_1]))
    B_2 = tf.Variable(tf.random.normal([n_hidden_2]))
    B_output = tf.Variable(tf.random.normal([output_shape]))
    
    var_list= [W_1, W_2, W_output, B_1, B_2, B_output]
    
    opt = tf.keras.optimizers.SGD(learning_rate=0.1)
    
    
    for epoch in range(epochs):
        
        input_tensor = tf.convert_to_tensor(x_train, dtype=tf.float32)
        size = input_tensor.shape[0]
        
        labels = tf.convert_to_tensor(y_train, dtype=tf.float32)
        labels = tf.reshape(labels, (size,1))
        
        #1_layer
        layer_1 = tf.matmul(input_tensor, W_1)
        layer_1 = tf.add(layer_1, B_1)
        layer_1 = tf.nn.relu(layer_1)
    
        #2_layer
        layer_2 = tf.matmul(layer_1, W_2)
        layer_2 = tf.add(layer_2, B_2)
        layer_2 = tf.nn.relu(layer_2)
    
        #output layer
        output = tf.matmul(layer_2, W_output)
        output = tf.add(output, B_output)
        
        with tf.GradientTape() as tape:
            _loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels, output))
            
        grads = tape.gradient(_loss, var_list)
        grads_and_vars = zip(grads, var_list)
        optimizer.apply_gradients(grads_and_vars)
      #output layer
        output = tf.matmul(layer_2, W_output)
        output = tf.add(output, B_output)
        
        with tf.GradientTape() as tape:
            _loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels, output))
            
        grads = tape.gradient(_loss, var_list)
        grads_and_vars = zip(grads, var_list)
        optimizer.apply_gradients(grads_and_vars)
      December 2, 2020 8:47 PM IST
    0
  • What is the objective of Gradient Descent?

    https://miro.medium.com/max/397/1*D-G0RuBASeMkZQRQZRHuwA.png 423w" alt="" width="423" height="395">
    A parabolic function with two dimensions (x,y)

    Gradient Descent — the algorithm

     
     

    How to move down in steps?

    Stochastic Gradient Descent (SGD)

    Conclusion

      August 26, 2021 2:08 PM IST
    0
  • Well, my problem was that I didn't clearly understand how gradients calculation in TF2 works. As for TF1 it was fine to pass a tensor to minimize, for TF2 its needed to pass a callable function in GradientTape.

    Here is my code, which is still very simple but is working properly now.

    input_shape = x_train.shape[1]
    n_hidden_1 = 32
    n_hidden_2 = 8
    output_shape = 1
    epochs = 100
    
    class model():
        def __init__(self, input_tensor, lables):
            self.W_1 = tf.Variable(tf.random.normal([input_shape,n_hidden_1]))
            self.W_2 = tf.Variable(tf.random.normal([n_hidden_1,n_hidden_2]))
            self.W_output = tf.Variable(tf.random.normal([n_hidden_2,output_shape]))
            self.B_1 = tf.Variable(tf.random.normal([n_hidden_1]))
            self.B_2 = tf.Variable(tf.random.normal([n_hidden_2]))
            self.B_output = tf.Variable(tf.random.normal([output_shape]))    
            self.var_list= [self.W_1, self.W_2, self.W_output, self.B_1, self.B_2, self.B_output]
        
        def train(self):
            layer_1 = tf.matmul(input_tensor, self.W_1)
            layer_1 = tf.add(layer_1, self.B_1)
            layer_1 = tf.nn.relu(layer_1)
            #2_layer
            layer_2 = tf.matmul(layer_1, self.W_2)
            layer_2 = tf.add(layer_2, self.B_2)
            layer_2 = tf.nn.relu(layer_2)
            #output layer
            output = tf.matmul(layer_2, self.W_output)
            output = tf.add(output, self.B_output)   
            return output
    
        def loss(self, output):
            loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels, output))
            return loss
    
    opt = tf.keras.optimizers.SGD(learning_rate=0.1)
    
    
    input_tensor = tf.convert_to_tensor(x_train, dtype=tf.float32)
    size = input_tensor.shape[0]
    labels = tf.convert_to_tensor(y_train, dtype=tf.float32)
    labels = tf.reshape(labels, (size,1))
    
    model = model(input_tensor, labels)
    
    for epoch in range(epochs):
        
        with tf.GradientTape() as tape:
            output = model.train()
            loss = model.loss(output)
        
        grads = tape.gradient(loss, model.var_list)
        grads_and_vars = zip(grads, model.var_list)
        opt.apply_gradients(grads_and_vars)
        print(loss)

     

      December 23, 2020 12:28 PM IST
    0