I'm currently coding a basic neural network that is supposed to calculate a XOR, using backpropagation. However, it instead outputs the average of its target outputs. (A XOR returning {0,1,1,0}, that is 0.5).
I followed both the following articles [1][2] and can't find my error. That guy supposedly had the same problem, but never found an answer.
Anyway, here's my code:
network.c
void initialise_network(Network *network) { assert(network != NULL); network->inputs[network->num_inputs] = 1.0; network->hidden[network->num_hidden] = 1.0; for (int i = 0; i < network->num_inputs+1; i++) { for (int j = 0; j < network->num_hidden; j++) { network->ithw[j] = rnd_double(-1, 1); network->delta_hidden[j] = rnd_double(0, 0); printf("ithw[%d][%d]: %f\n", i, j, network->ithw[j]); } } for (int i = 0; i < network->num_hidden+1; i++) { for (int j = 0; j < network->num_outputs; j++) { network->htow[j] = rnd_double(-1, 1); network->delta_output[j] = rnd_double(0, 0); } } } void pass_forward(double* inputs, Network *network) { log_info("pass_forward() !"); printf("Inputs: \n"); for (int i = 0; i < network->num_inputs; i++) { network->inputs = inputs; printf("%f, ", network->inputs); } for (int i = 0; i < network->num_hidden; i++) { double sum = 0.0; for (int j = 0; j < network->num_inputs+1; j++) { printf("\n inputs[%d]: %f", j, network->inputs[j]); sum += network->inputs[j] * network->ithw[j]; printf("\nithw[%d][%d]: %f", j, i, network->ithw[j]); printf("\n sum[%d]: %f", j, sum); } printf("\n hidden[%d]: %f", i, sum); network->hidden = sigmoid(sum); printf("\n sigmoid(hidden[%d]): %f", i, network->hidden); } for (int i = 0; i < network->num_outputs; i++) { double sum = 0.0; for (int j = 0; j < network->num_hidden+1; j++) { sum += network->hidden[j] * network->htow[j]; } printf("\n output[%d]: %f\n", i, network->outputs); network->outputs = sigmoid(sum); } }
trainer_xor.c
void train_network(double *target_output, Network *network) { double *delta_hidden = malloc(sizeof(double) * network->num_hidden + 1); double *delta_output = malloc(sizeof(double) * network->num_outputs); double momentum = 0.1; printf("Inputs: %f, %f\n", network->inputs[0], network->inputs[1]); printf("Output: %f\n", network->outputs[0]); printf("Target Output: %f\n", target_output[0]); for (int i = 0; i < network->num_outputs; i++) { delta_output = network->outputs * (1.0 - network->outputs) * (target_output - network->outputs); printf("delta_output: %f\n", delta_output); } for (int i = 0; i < network->num_hidden + 1; i++) { double error = 0.0; for (int j = 0; j < network->num_outputs; j++) { error += network->htow[j] * delta_output[j]; } delta_hidden = network->hidden * (1.0 - network->hidden) * error; printf("hidden[%d]: %f\n", i, network->hidden); printf("delta_hidden[%d]: %f\n", i, delta_hidden); } for (int i = 0; i < network->num_outputs; i++) { for (int j = 0; j < network->num_hidden + 1; j++) { double delta = network->learning_rate * delta_output * network->hidden[j]; network->htow[j] += delta; network->htow[j] += momentum * network->delta_output[j]; network->delta_output[j] = delta; printf("htow[%d][%d]: %f\n", j, i, network->htow[j]); } } for (int i = 0; i < network->num_hidden; i++) { for (int j = 0; j < network->num_inputs + 1; j++) { double delta = network->learning_rate * delta_hidden * network->inputs[j]; network->ithw[j] += delta; network->ithw[j] += momentum * network->delta_hidden[j]; network->delta_hidden[j] = delta; printf("ithw[%d][%d]: %f\n", j, i, network->ithw[j]); } } getchar(); } void do_training(int training_times, Trainer *trainer) { trainer->training_times = training_times; for (int i = 0; i < training_times; i++) { for (int j = 0; j < trainer->train_set_size; j++) { pass_forward(trainer->train_set[j], trainer->network); train_network(get_target_values(trainer->train_set[j], trainer->train_set_size), trainer->network); } } }
main.c
int main() { initialize_utils(); Network *network = network_create(2, 2, 1); initialise_network(network); Trainer *trainer = trainer_create(network); do_training(300, trainer); return 0; }
I train my network for 300 times. The train_set is as follows:
[0][0] = 0 [0][1] = 0 [1][0] = 1 [1][1] = 0 [2][0] = 0 [2][1] = 1 [3][0] = 1 [3][1] = 1
For more informations, here are my outputs at a certain time:
Gen 0:
=== Gen 0! === [INFO] (src/network.c:100) pass_forward() ! Inputs: 0.000000, 0.000000, inputs[0]: 0.000000 ithw[0][0]: 0.316492 sum[0]: 0.000000 inputs[1]: 0.000000 ithw[1][0]: -0.028962 sum[1]: 0.000000 inputs[2]: 1.000000 ithw[2][0]: -0.915344 sum[2]: -0.915344 hidden[0]: -0.915344 sigmoid(hidden[0]): 0.285908 inputs[0]: 0.000000 ithw[0][1]: 0.089068 sum[0]: 0.000000 inputs[1]: 0.000000 ithw[1][1]: 0.176854 sum[1]: 0.000000 inputs[2]: 1.000000 ithw[2][1]: 0.958716 sum[2]: 0.958716 hidden[1]: 0.958716 sigmoid(hidden[1]): 0.722865 output[0]: 0.000000 train_network()! Inputs: 0.000000, 0.000000 Output: 0.625586 Target Output: 0.000000 delta_output: -0.146530 hidden[0]: 0.285908 delta_hidden[0]: 0.002849 hidden[1]: 0.722865 delta_hidden[1]: 0.007222 hidden[2]: 1.000000 delta_hidden[2]: -0.000000 htow[0][0]: -0.107817 htow[1][0]: -0.277817 htow[2][0]: 0.674453 ithw[0][0]: 0.316492 ithw[1][0]: -0.028962 ithw[2][0]: -0.914489 ithw[0][1]: 0.089068 ithw[1][1]: 0.176854 ithw[2][1]: 0.960883
Gen 1:
=== Gen 1! === [INFO] (src/network.c:100) pass_forward() ! Inputs: 0.000000, 0.000000, inputs[0]: 0.000000 ithw[0][0]: 0.316628 sum[0]: 0.000000 inputs[1]: 0.000000 ithw[1][0]: -0.028659 sum[1]: 0.000000 inputs[2]: 1.000000 ithw[2][0]: -0.914866 sum[2]: -0.914866 hidden[0]: -0.914866 sigmoid(hidden[0]): 0.286005 inputs[0]: 0.000000 ithw[0][1]: 0.089247 sum[0]: 0.000000 inputs[1]: 0.000000 ithw[1][1]: 0.177256 sum[1]: 0.000000 inputs[2]: 1.000000 ithw[2][1]: 0.959846 sum[2]: 0.959846 hidden[1]: 0.959846 sigmoid(hidden[1]): 0.723091 output[0]: 0.625643 train_network() Inputs: 0.000000, 0.000000 Output: 0.613576 Target Output: 0.000000 delta_output: -0.145479 hidden[0]: 0.286005 delta_hidden[0]: 0.003118 hidden[1]: 0.723091 delta_hidden[1]: 0.007844 hidden[2]: 1.000000 delta_hidden[2]: -0.000000 htow[0][0]: -0.118963 htow[1][0]: -0.304226 htow[2][0]: 0.639053 ithw[0][0]: 0.316718 ithw[1][0]: -0.028568 ithw[2][0]: -0.913841 ithw[0][1]: 0.089431 ithw[1][1]: 0.177440 ithw[2][1]: 0.962383
Gen 10:
=== Gen 10! === [INFO] (src/network.c:100) pass_forward() ! Inputs: 0.000000, 0.000000, inputs[0]: 0.000000 ithw[0][0]: 0.317382 sum[0]: 0.000000 inputs[1]: 0.000000 ithw[1][0]: -0.025525 sum[1]: 0.000000 inputs[2]: 1.000000 ithw[2][0]: -0.911555 sum[2]: -0.911555 hidden[0]: -0.911555 sigmoid(hidden[0]): 0.286682 inputs[0]: 0.000000 ithw[0][1]: 0.089229 sum[0]: 0.000000 inputs[1]: 0.000000 ithw[1][1]: 0.180321 sum[1]: 0.000000 inputs[2]: 1.000000 ithw[2][1]: 0.967483 sum[2]: 0.967483 hidden[1]: 0.967483 sigmoid(hidden[1]): 0.724618 output[0]: 0.547804 Inputs: 0.000000, 0.000000 Output: 0.539370 Target Output: 0.000000 delta_output: -0.134006 hidden[0]: 0.286682 delta_hidden[0]: 0.004474 hidden[1]: 0.724618 delta_hidden[1]: 0.010913 hidden[2]: 1.000000 delta_hidden[2]: -0.000000 htow[0][0]: -0.176218 htow[1][0]: -0.440373 htow[2][0]: 0.456051 ithw[0][0]: 0.317521 ithw[1][0]: -0.025386 ithw[2][0]: -0.910074 ithw[0][1]: 0.089499 ithw[1][1]: 0.180592 ithw[2][1]: 0.971027
Gen 100:
=== Gen 100! === [INFO] (src/network.c:100) pass_forward() ! Inputs: 0.000000, 0.000000, inputs[0]: 0.000000 ithw[0][0]: 0.295665 sum[0]: 0.000000 inputs[1]: 0.000000 ithw[1][0]: -0.014208 sum[1]: 0.000000 inputs[2]: 1.000000 ithw[2][0]: -0.929113 sum[2]: -0.929113 hidden[0]: -0.929113 sigmoid(hidden[0]): 0.283105 inputs[0]: 0.000000 ithw[0][1]: 0.023758 sum[0]: 0.000000 inputs[1]: 0.000000 ithw[1][1]: 0.161541 sum[1]: 0.000000 inputs[2]: 1.000000 ithw[2][1]: 0.932629 sum[2]: 0.932629 hidden[1]: 0.932629 sigmoid(hidden[1]): 0.717608 output[0]: 0.512934 Inputs: 0.000000, 0.000000 Output: 0.505055 Target Output: 0.000000 delta_output: -0.126251 hidden[0]: 0.283105 delta_hidden[0]: 0.004697 hidden[1]: 0.717608 delta_hidden[1]: 0.011935 hidden[2]: 1.000000 delta_hidden[2]: -0.000000 htow[0][0]: -0.195365 htow[1][0]: -0.496565 htow[2][0]: 0.365162 ithw[0][0]: 0.295813 ithw[1][0]: -0.014059 ithw[2][0]: -0.927556 ithw[0][1]: 0.024074 ithw[1][1]: 0.161856 ithw[2][1]: 0.936526