$J(w) = \frac12 \sum_i^n(y^i – \phi(z^i))^2$

$注意：w为向量，w_j为向量w中的某一分量\\w = w + \Delta w\\\Delta w = -\eta\Delta J(w)\\\frac{\partial J}{\partial w_j} = \frac{\partial }{\partial w_j} \frac 12 \sum_i(y^i-\phi(z^i))^2 \\= \frac 12 \frac{\partial }{\partial w_j} \sum_i(y^i-\phi(z^i))^2 \\= \frac 12 \sum_i2(y^i-\phi(z^i)) \frac{\partial }{\partial w_j}(y^i-\phi(z^i))\\= \sum_i(y^i-\phi(z^i))\frac{\partial }{\partial w_j}(y^i-\sum_i(w_j^ix_j^i))\\=\sum_i(y^i-\phi(z^i))(-x_j^i)\\=-\sum_i(y^i-\phi(z^i))x_j^i\\所以\Delta w_j = -\eta\frac{\partial J}{\partial w_j}=\eta\sum_i(y^i-\phi(z^i))x_j^i$

class AdalineGD(object):
Parameters
----------
eta:float
Learning rate(between 0.0 and 1.0
n_iter:int
Passes over the training dataset.
Attributes
----------
w_:1d-array
weights after fitting.
errors_:list
Number of miscalssifications in every epoch.
"""
def __init__(self, eta=0.01, n_iter=10):
self.eta = eta
self.n_iter = n_iter
def fit(self, X, y):
"""Fit training data.
:param X:{array-like}, shape=[n_samples, n_features]
Training vectors,
:param y: array-like, shape=[n_samples]
Target values.
:return:
self:object
"""
self.w_ = np.zeros(1 + X.shape[1]) # Add w_0
self.cost_ = []
for i in range(self.n_iter):
output = self.net_input(X)
errors = (y - output)
self.w_[1:] += self.eta * X.T.dot(errors)
self.w_[0] += self.eta * errors.sum()
cost = (errors ** 2).sum() / 2.0
self.cost_.append(cost)
return self
def net_input(self, X):
"""Calculate net input"""
return np.dot(X, self.w_[1:]) + self.w_[0]

def activation(self, X):
"""Computer linear activation"""
return self.net_input(X)

def predict(self, X):
"""Return class label after unit step"""
return np.where(self.activation(X) >= 0.0, 1, -1)

fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8,4))
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('log(Sum-squared-error)')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Sum-squared-error')
plt.show()

$x_j^, = \frac {x_j-\mu_j}{\sigma_j}$

X_std = np.copy(X)

X_std[:, 0] = (X[:,0] - X[:,0].mean()) / X[:,0].std()

X_std[:, 1] = (X[:,1] - X[:,1].mean()) / X[:,1].std()

ada = AdalineGD(n_iter=15, eta=0.01)
ada.fit(X_std, y)

plot_decision_region(X_std, y, classifier=ada)
plt.xlabel('sepal length [standardized]')
plt.ylabel('petal length [standardized]')
plt.legend(loc='upper left')
plt.show()
plt.show()