1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
| import numpy as np import random
class Network(object): """[summary] Arguments: object {[type]} -- [description] Returns: [type] -- [description] """
def __init__(self, sizes): """[summary] Arguments: sizes {[type]} -- [description] """ self.num_layers = len(sizes) self.sizes = sizes self.biases = [np.random.randn(y, 1) for y in sizes[1: ]] self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])] def feedfoward(self, a): """[summary] Arguments: a {[type]} -- [description] Returns: [type] -- [description] """ for b, w in zip(self.biases, self.weights): a = sigmoid(np.dot(w, a) + b) return a
def SGD(self, training_data, epochs, mini_batch_size, learn_rate, test_data=None): """[summary] Arguments: training_data {list} -- [训练数据] epochs {int} -- 训练轮书 mini_batch_size {int} -- [每批计算数据大小] learn_rate {float} -- [学习率] Keyword Arguments: test_data {list{int}} -- [测试数据集] (default: {None}) """ if test_data: n_test = len(test_data) n = len(training_data) for j in xrange(epochs): random.shuffle(training_data) mini_batches = [training_data[k: k + mini_batch_size] for k in xrange(0, n, mini_batch_size)] for mini_batch in mini_batches: self.update_mini_batch(mini_batch, learn_rate) if test_data: print "Epoch {0}: {1} / {2}".format(j, self.evaluate(test_data), n_test) else: print "Epoch {0} complete".format(j)
def update_mini_batch(self, mini_batch, eta): """[summary] Arguments: mini_batch {list} -- [description] eta {float} -- [学习率] """ nabla_b = [np.zeros(b.shape) for b in self.biases] nabla_w = [np.zeros(w.shape) for w in self.weights] for x, y in mini_batch: delta_nabla_b, delta_nabla_w = self.backprop(x, y) nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)] nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)] self.weights = [w - (eta / len(mini_batch)) * nw for w, nw in zip(self.weights, nabla_w)] self.biases = [b - (eta / len(mini_batch)) * nb for b, nb in zip(self.biases, nabla_b)] def backprop(self, x, y): """[summary] Arguments: x {int} -- [输入信号] y {int} -- [正确输出信号] Returns: [tuple(list, list)] -- [返回] """ nabla_b = [np.zeros(b.shape) for b in self.biases] nabla_w = [np.zeros(w.shape) for w in self.weights] activation = x activations = [x] zs = [] for b, w in zip(self.biases, self.weights): z = np.dot(w, activation) + b zs.append(z) activation = sigmoid(z) activations.append(activation) delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1]) nabla_b[-1] = delta nabla_w[-1] = np.dot(delta, activations[-2].transpose()) for l in xrange(2, self.num_layers): z = zs[-l] sp = sigmoid_prime(z) delta = np.dot(self.weights[-l + 1].transopse(), delta) * sp nabla_b[-l] = delta nabla_w[-l] = np.dot(delta, activations[-l - 1].trainspose()) return (nabla_b, nabla_w) def evaluate(self, test_data): """[summary] Arguments: test_data {list} -- [测试数据集] Returns: [int] -- [返回预测正确的个数] """ test_result = [(np.argmax(self.feedfoward(x)), y) for x, y in test_data] return sum(int(x == y) for (x, y) in test_result) def cost_derivative(self, output_activations, y): """[代价函数] Arguments: output_activations {list} -- [预测输出值] y {list} -- [实际值] Returns: [list] -- [返回对应项实际值与预测输出值的误差] """ return (output_activations - y)
def sigmoid(z): return 1.0 / (1.0 + np.exp(-z))
def sigmoid_prime(z): """[sigmoid函数的导数公式] Arguments: z {list} -- [输入信号] Returns: [list] -- [sigmoid激活函数的导数值] """ return sigmoid(z) * (1 - sigmoid(z))
|