Neural network training
In this part of the article on the development of the simplest neural network, we will learn how to train neural networks.
We will train the neural network to solve the problem of finding patterns in a number of numbers. For simplicity, these will be the numbers 0 and 1.
To train a neural network, one must have problem conditions and solutions, problems must solve one problem.
The training data looks like this
Condition |
Answer |
|
one |
0 |
one |
0 |
0 |
0 |
0 |
one |
0 |
You can see this task in almost all articles on writing neural networks from scratch.
As a result of training, the neural network finds a logical connection between a condition and a response.
The solution to this problem is the first number entered.
As a condition of the problem that the neural network must solve, it looks like this
Condition |
Answer |
|
one |
one |
one |
Neural network training is about reducing losses.
The less losses the better and more correct the result.
In order to avoid complicated formulas, I will not explain in detail the principles of training, I will give you a code that is more or less clear and explain the functions necessary for training a neural network.
For training, we need the derivative of the sigmoid.
You can see what a derivative is in Wikipedia, and the derivative of a sigmoid looks like this:
The Python implementation looks like this:
def deriv_sig(x):
return sig(x) * (1 - sig(x))
Let's write this code to the Math.py file
We also need 2 constants for training:
rate - learning rate
count_learn - number of training repetitions
count_learn , . rate .
:
rate = 0.1
count_learn = 10000
, 0 count_learn, , .
, :
def learn(self, inputs, answers):
rate = 0.1
count_learn = 10000
for o in range(count_learn):
for inputt, answer in zip(inputs, answers):
sum_n1 = self.n[0].w[0] * inputt[0] + self.n[0].w[1] * inputt[1] + self.n[0].b
n1 = sig(sum_n1)
sum_n2 = self.n[1].w[0] * inputt[0] + self.n[1].w[1] * inputt[1] + self.n[1].b
n2 = sig(sum_n2)
sum_n3 = self.n[2].w[0] * n1 + self.n[2].w[1] * n2 + self.n[2].b
n3 = sig(sum_n3)
out_res = n3
err = -2 * (answer - out_res)
err_rate = rate * err
deriv_sig_n1 = deriv_sig(sum_n1)
deriv_sig_n2 = deriv_sig(sum_n2)
deriv_sig_n3 = deriv_sig(sum_n3)
self.n[0].w[0] -= err_rate * self.n[2].w[0] * deriv_sig_n3 * inputt[0] * deriv_sig_n1
self.n[0].w[1] -= err_rate * self.n[2].w[0] * deriv_sig_n3 * inputt[1] * deriv_sig_n1
self.n[0].b -= err_rate * self.n[2].w[0] * deriv_sig_n3 * deriv_sig_n1
self.n[1].w[0] -= err_rate * self.n[2].w[1] * deriv_sig_n3 * inputt[0] * deriv_sig_n2
self.n[1].w[1] -= err_rate * self.n[2].w[1] * deriv_sig_n3 * inputt[1] * deriv_sig_n2
self.n[1].b -= err_rate * self.n[2].w[1] * deriv_sig_n3 * deriv_sig_n2
self.n[2].w[0] -= err_rate * n1 * deriv_sig_n3
self.n[2].w[1] -= err_rate * n2 * deriv_sig_n3
self.n[2].b -= err_rate * deriv_sig_n3
NeuronNet.
NeuronNet.py :
from Neuron import *
class NeuronNet:
def __init__(self):
self.n = []
for i in range(3):
self.n.append(Neuron(2))
def activate(self, inputs):
return self.n[2].activate(np.array([self.n[0].activate(inputs), self.n[1].activate(inputs)]))
def learn(self, inputs, answers):
rate = 0.1
count_learn = 10000
for o in range(count_learn):
for inputt, answer in zip(inputs, answers):
sum_n1 = self.n[0].w[0] * inputt[0] + self.n[0].w[1] * inputt[1] + self.n[0].b
n1 = sig(sum_n1)
sum_n2 = self.n[1].w[0] * inputt[0] + self.n[1].w[1] * inputt[1] + self.n[1].b
n2 = sig(sum_n2)
sum_n3 = self.n[2].w[0] * n1 + self.n[2].w[1] * n2 + self.n[2].b
n3 = sig(sum_n3)
out_res = n3
err = -2 * (answer - out_res)
err_rate = rate * err
deriv_sig_n1 = deriv_sig(sum_n1)
deriv_sig_n2 = deriv_sig(sum_n2)
deriv_sig_n3 = deriv_sig(sum_n3)
self.n[0].w[0] -= err_rate * self.n[2].w[0] * deriv_sig_n3 * inputt[0] * deriv_sig_n1
self.n[0].w[1] -= err_rate * self.n[2].w[0] * deriv_sig_n3 * inputt[1] * deriv_sig_n1
self.n[0].b -= err_rate * self.n[2].w[0] * deriv_sig_n3 * deriv_sig_n1
self.n[1].w[0] -= err_rate * self.n[2].w[1] * deriv_sig_n3 * inputt[0] * deriv_sig_n2
self.n[1].w[1] -= err_rate * self.n[2].w[1] * deriv_sig_n3 * inputt[1] * deriv_sig_n2
self.n[1].b -= err_rate * self.n[2].w[1] * deriv_sig_n3 * deriv_sig_n2
self.n[2].w[0] -= err_rate * n1 * deriv_sig_n3
self.n[2].w[1] -= err_rate * n2 * deriv_sig_n3
self.n[2].b -= err_rate * deriv_sig_n3
Math.py :
import numpy as np
def sig(x):
return 1 / (1 + np.exp(-x))
def deriv_sig(x):
return sig(x) * (1 - sig(x))
.
main.py :
learn_inputs = np.array([[1, 0], [0, 0], [0, 1]])
learn_answers = np.array([1, 0, 0])
:
net.learn(learn_inputs, learn_answers)
:
x = np.array([1, 1])
if (net.activate(x) < 0.5):
print("0")
else:
print("1")
main.py :
import numpy as np
from NeuronNet import *
net = NeuronNet()
learn_inputs = np.array([[1, 0], [0, 0], [0, 1]])
learn_answers = np.array([1, 0, 0])
net.learn(learn_inputs, learn_answers)
x = np.array([1, 1])
if (net.activate(x) < 0.5):
print("0")
else:
print("1")
:
python main.py
,
|
|
|
1 |
1 |
1 |
|
|
|
0 |
1 |
0 |
.
.