Im trying to write NN from scratch to better understand what is going on underneath Keras API. And now I have got problem with applying gradinets to my loss. I believe there is some problem with architecture and my understanding of TF. Basically line with grads returns Nones and hence code returns:
ValueError: No gradients provided for any variable: ['Variable:0', 'Variable:0', 'Variable:0', 'Variable:0', 'Variable:0', 'Variable:0'].
Here is my code. Its very straightforward but I want to do it simply before warping it into model class.
input_shape = x_train.shape[1]
n_hidden_1 = 32
n_hidden_2 = 8
output_shape = 1
epochs = 1
W_1 = tf.Variable(tf.random.normal([input_shape,n_hidden_1]))
W_2 = tf.Variable(tf.random.normal([n_hidden_1,n_hidden_2]))
W_output = tf.Variable(tf.random.normal([n_hidden_2,output_shape]))
B_1 = tf.Variable(tf.random.normal([n_hidden_1]))
B_2 = tf.Variable(tf.random.normal([n_hidden_2]))
B_output = tf.Variable(tf.random.normal([output_shape]))
var_list= [W_1, W_2, W_output, B_1, B_2, B_output]
opt = tf.keras.optimizers.SGD(learning_rate=0.1)
for epoch in range(epochs):
input_tensor = tf.convert_to_tensor(x_train, dtype=tf.float32)
size = input_tensor.shape[0]
labels = tf.convert_to_tensor(y_train, dtype=tf.float32)
labels = tf.reshape(labels, (size,1))
#1_layer
layer_1 = tf.matmul(input_tensor, W_1)
layer_1 = tf.add(layer_1, B_1)
layer_1 = tf.nn.relu(layer_1)
#2_layer
layer_2 = tf.matmul(layer_1, W_2)
layer_2 = tf.add(layer_2, B_2)
layer_2 = tf.nn.relu(layer_2)
#output layer
output = tf.matmul(layer_2, W_output)
output = tf.add(output, B_output)
with tf.GradientTape() as tape:
_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels, output))
grads = tape.gradient(_loss, var_list)
grads_and_vars = zip(grads, var_list)
optimizer.apply_gradients(grads_and_vars)
#output layer
output = tf.matmul(layer_2, W_output)
output = tf.add(output, B_output)
with tf.GradientTape() as tape:
_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels, output))
grads = tape.gradient(_loss, var_list)
grads_and_vars = zip(grads, var_list)
optimizer.apply_gradients(grads_and_vars)