SVM算法的核心是SMO算法的实现，首先对SMO算法过程进行实现，先对一些辅助函数进行定义：

``` 1 # 先定义一些辅助函数
2 # 选取第二变量函数
3 def select_J_rand(i, m):
4     j=i
5     while(j==i):
6         j = int(random.uniform(0, m))
7     return j
8
9 # 定义对α进行裁剪的函数
10 def clip_alpha(aj, H, L):
11     if aj > H:
12         aj=H
13     if L > aj:
14         aj = L
15     return aj```

```"""
Input：dataX, dataY, C(惩罚因子), toler（容忍度）, iter_num
Output: alpha、b
"""
def smo_simple(dataX, dataY, C, toler, iter_num):
dataMatrix = mat(dataX); labelMat = dataY.transpose()
# 初始化参数
b = 0; m, n = np.shape(dataMatrix)
alphas = mat(np.zeros((m, 1)))
iter = 0
# 当超过迭代次数停止
while iter < iter_num:
# 记录α1和α2变化次数
alphaPairsChanged = 0
for i in range(m):
# 计算f(xi),预测类别
fXi = float(np.multiply(alphas, labelMat).T * (dataMatrix * dataMatrix[i, :].T)) + b
# 计算误差
Ei = fXi - float(labelMat[i])
# 当不满足条件时，选取变量j，这里要判断α是否在[0,C]内，若超出范围则不再进行优化
if ((labelMat[i] * Ei < -toler) and (alphas[i] < C)) or ((labelMat[i]*Ei > toler) and alphas[i] > 0):
j = select_J_rand(i, m)
# 计算x2的预测值y2
fXj = float(np.multiply(alphas, labelMat).T * (dataMatrix * dataMatrix[j, :].T)) + b
Ej = fXj - float(labelMat[j])
alpha_I_old = alphas[i].copy()
alpha_J_old = alphas[j].copy()
if (labelMat[i] != labelMat[j]):
L = max(0, alphas[j] - alphas[i])
H = min(C, C + alphas[j] - alphas[i])
else:
L = max(0, alphas[i] + alphas[j] - C)
H = min(C, alphas[i] + alphas[j])
if L == H:
print("L == H")
continue
eta = 2.0 * dataMatrix[i, :] * dataMatrix[j, :].T - dataMatrix[i, :] * dataMatrix[i, :].T - dataMatrix[j, :] * dataMatrix[j, :].T
if eta > 0:
print("eta > 0")
continue
alphas[j] -= labelMat[j] * (Ej - Ei)/eta
alphas[j] = clip_alpha(alphas[j], H, L)
# 当α2不再变化
if (abs(alphas[j]-alpha_J_old) < 0.00001):
print("j not moving enough")
continue
# 更新α1
alphas[i] += labelMat[i] * labelMat[j] * (alpha_J_old - alphas[j])
# 计算b1和b2
b1 = b - Ei - labelMat[i] * (alphas[i] - alpha_I_old) * dataMatrix[i, :] * dataMatrix[i, :].T - labelMat[j] * (alphas[j] - alpha_J_old) * dataMatrix[i, :] * dataMatrix[j, :].T
b2 = b - Ej - labelMat[i] * (alphas[i] - alpha_I_old) * dataMatrix[i, :] * dataMatrix[j, :].T - labelMat[j] * (alphas[j] - alpha_J_old) * dataMatrix[j, :] * dataMatrix[j, :].T
if (alphas[i] > 0) and (alphas[i] < C):
b = b1
elif (alphas[j] > 0) and (alphas[j] < C):
b = b2
else:
b = (b1 + b2)/2
alphaPairsChanged += 1
if alphaPairsChanged == 0:
iter += 1
else:
iter = 0
print("iteration number: %d"%iter)
return b, alphas```

SMO算法具有一定的随机性，因此每次运行的结果不一定相同。上面就是一个简单的SMO算法的实现部分，对于小批量数据可以满足需求，但当数据量过于庞大时，上面的算法的效率将会很慢，这是因为在α的选择问题上，下面提供一种改进的SMO算法，改进的SMO算法会通过一个外循环选择第一个α的值，选择方法是在遍历所有样本（数据集）和非边界α中进行扫描，所谓非边界α是指那些不等于0或者C的α值，建立这些α值的列表，在列表中进行遍历，并在扫描时跳过不再改变的α进行遍历。下面是具体实现过程

``` 1 # 首先建立3个辅助函数用于对E进行缓存，以及1种用于存储数据的数据结构
2 # 存储变量的数据结构
3 class optStruct:
4     def __init__(self, dataX, dataY, C, toler):
5         self.X = dataX
6         self.Y = dataY
7         self.C = C
8         self.toler = toler
9         self.m = np.shape(dataX)[0]
10         self.alphas = np.mat(zeros((self.m, 1)))
11         self.b = 0
12         # cache第一列为有效性标志位，第二列为E值
13         self.eCache = np.mat(np.zeros((self.m, 2)))
14
15 # 计算E值并返回，由于频繁使用单独写成一个函数
16 def calcEk(oS, k):
17     fXk = float(np.multiply(oS.alphas, oS.labelMat).T * (oS.X * oS.X[k, :].T)) + oS.b
18     Ek = fXk - float(oS.labelMat[k])
19     return Ek
20
21 # 用于选择第二个α的值，保证每次优化采用最大的步长
22 def select_J(i, oS, Ei):
23     maxK = -1; maxDeltaE = 0; Ej = 0
24     oS.eCache[i] = [1, Ei]
25     validEcacheList = nonzero(oS.eCache[:, 0].A)[0]
26     if len(validEcacheList) > 1:
27         for k in validEcacheList:
28             if k == i:
29                 continue
30             Ek = calcEk(oS, k)
31             deltaE = abs(Ei - Ek)
32             # 选择变化最大的那个
33             if deltaE > maxDeltaE:
34                 maxK = k
35                 maxDeltaE = deltaE
36                 Ej = Ek
37         return maxK, Ej
38     else:
39         j = select_J_rand(i, oS.m)
40         Ej = calcEk(oS, j)
41     return j, Ej
42
43
44 def updateEk(oS, k):
45     Ek = calcEk(oS, k)
46     oS.eCache[k] = [1, Ek]```

``` 1 # 与simpleSMO一致，更新的alpha存入cache中
2 def innerL(i, oS):
3     Ei = calcEk(oS, i)
4     if ((oS.labelMat[i] * Ei < -oS.toler) and (oS.alphas[i] < oS.C)) or ((oS.labelMat[i] * Ei > oS.toler) and (oS.alphas[i] > 0)):
5         j, Ej = select_J(i, oS, Ei)
6         alpha_I_old = oS.alphas[i].copy()
7         alpha_J_old = oS.alphas[j].copy()
8         if oS.labelMat[i] != oS.labelMat[j]:
9             L = max(0, oS.alphas[j] - oS.alphas[i])
10             H = min(oS.C, oS.C+ oS.alphas[j] - oS.alphas[i])
11         else:
12             L = min(0, oS.alphas[i] + oS.alphas[j] - oS.C)
13             H = max(oS.C, oS.alphas[i] + oS.alphas[j])
14         if H == L:
15             return 0
16         eta = 2.0 * oS.X[i, :] * oS.X[j, :].T - oS.X[i, :] * oS.X[i, :].T - oS.X[j, :] * oS.X[j, :].T
17         if eta >= 0:
18             return 0
19         oS.alphas[j] -= oS.labelMat[j] * (Ei - Ej)/eta
20         oS.alphas[j] = clip_alpha(oS.alphas[j], H, L)
21         updateEk(oS, j)
22         if abs(oS.alphas[j] - alpha_J_old) < 0.00001:
23             return 0
24         oS.alphas[i] -= oS.labelMat[i] * oS.labelMat[j] * (oS.alphas[j] - alpha_J_old)
25         updateEk(oS, i)
26         b1 = oS.b - Ei - oS.labelMat[i] * (oS.alphas[i] - alpha_I_old) * oS.X[i, :] * oS.X[i, :].T - oS.labelMat[j] * (oS.alphas[j] - alpha_J_old) * oS.X[i, :] * oS.X[j, :].T
27         b2 = oS.b - Ej - oS.labelMat[i] * (oS.alphas[i] - alpha_I_old) * oS.X[i, :] * oS.X[j, :].T - oS.labelMat[j] * (oS.alphas[j] - alpha_J_old) * oS.X[j, :] * oS.X[j, :].T
28         if oS.alphas[i] > 0 and oS.alphas[i] < oS.C:
29             oS.b = b1
30         elif oS.alphas[j] > 0 and oS.alphas[j] < oS.C:
31             oS.b = b2
32         else:
33             os.b = (b1 + b2)/2
34         return 1
35     else:
36         return 0
37
38
39 def smoP(dataX, labelMat, C, toler, maxIter, kTup=('lin', 0)):
40     oS = optStruct(mat(dataX), mat(labelMat).transpose(), C, toler)
41     iter = 0
42     entireSet = True
43     alphaPairsChanged = 0
44     while (iter < maxIter) and alphaPairsChanged > 0 or entireSet:
45         alphaPairsChanged = 0
# 搜索第一个变量的值，采用两个方法交替进行的方式，利用entireSet变量控制
46         # 第一种遍历全体样本
47         if entireSet:
48             for i in range(oS.m):
49                 alphaPairsChanged += innerL(i, oS)
50                 iter += 1
51         # 第二种遍历非边界样本
52         else:
53             nonBoundIs = nonzero((oS.alphas.A > 0) * oS.alphas.A < C)[0]
54             for i in nonBoundIs:
55                 alphaPairsChanged += innerL(i, oS)
56             iter += 1
57         if entireSet:
58             entireSet = False
59         elif alphaPairsChanged == 0:
60             entireSet = True
61     return oS.alphas, oS.b```

```1 def calW(alphas, dataArr, classLabels):
2     X = mat(dataArr)
3     labelMat = mat(classLabels).transpose()
4     m, n = shape(X)
5     w = zeros((m, 1))
6     for i in range(m):
7         w += multiply(alphas[i] * labelMat[i], X[i, :])
8     return w```

```1 # 首先原先的数据结构中要计算核矩阵，将下述代码加入数据结构代码部分即可
2 self.K = mat(zeros(self.m, self.m))
3 for i in range(self.m):
4     self.K[:, i] = kernelTrans(self.X, self.X[i, :], self.kTup)```

``` 1 def kernelTrans(X, A, kTup):
2     m, n = shape(X)
3     K = mat(zeros((m, 1)))
4     if kTup['0'] == 'lin':
5         K = X * A.T
6     elif kTup[0] == 'rbf':
7         for j in range(m):
8             deltaRow = X[j, :] - A
9             K[j] = deltaRow * deltaRow.T
10         K = exp(K/(-1 * kTup[1] ** 2))
11     else:
12         raise NameError('没有定义核函数')
13     return K```

`　　然后需要在参数计算的函数中将对应的xi*xj替换为核函数即可：`

```1 # 首先是innerL中
2 eta = 2.0 * oS.K[i, j] - oS.K[i, i] - oS.K[j, j]
3 b1 = oS.b - Ei - oS.labelMat[i] * (oS.alphas[i] - alpha_I_old) * oS.K[i, i] - oS.labelMat[j] * (oS.alphas[j] - alpha_J_old) * oS.K[i, j]
4 b2 = oS.b - Ej - oS.labelMat[i] * (oS.alphas[i] - alpha_I_old) * oS.K[i, j] - oS.labelMat[j] * (oS.alphas[j] - alpha_J_old) * oS.K[j, j]
5
6 # 然后是calcEk
7 fXk = float(multiply(oS.alphas, oS.labelMat).T * oS.K[:, k] + oS.b)```

```1 def img2Vector(filename):
2     returnVec = zeros((1, 1024))
3     fr = open(filename)
4     for i in range(32):
6         for j in range(32):
7             returnVec[0, 32*i + j] = int(lineStr[j])
8     return returnVec```

``` 1 def loadImages(dir):
2     hwLabels = []
3     trainingFileList = os.listdir(dir)
4     m = len(trainingFileList)
5     for i in range(m):
6         fileNameStr = trainingFileList[i]
7         fileStr = fileNameStr.split('.')[0]
8         classNumStr = int(fileStr.split('_')[0])
# 只考虑二分类问题
9         if classNumStr == 9:
10             hwLabels.append(-1)
11         else:
12             hwLabels.append(1)
13         trainingMat[i, :] = img2Vector('%s/%s' % (dir, fileNameStr))
14     return trainingMat, hwLabels```

``` 1 def testDigits(kTup=('rbf', 10)):
# 获取数据
# 利用SMO算法求解出α和b
3     alphas, b = smoP(dataArr, labelArr, 200, 0.0001, 10000, kTup)
4     dataMat = mat(dataArr)
5     labelMat = mat(labelArr).transpose()
# 获取支持向量的索引
6     svInd = nonzero(alphas.A > 0)[0]
# 获取支持向量
7     svs = dataMat[svInd]
8     labelSv = labelMat[svInd]
9     m, n = shape(dataMat)
10     errorCount = 0
11     for i in range(m):
12         kernelEval = kernelTrans(svs, dataMat[i, :], kTup)
13         predict = kernelEval.T * multiply(labelSv, alphas[svInd]) + b
14         if sign(predict) != sign(labelArr[i]):
15             errorCount += 1
16     print('there are %d Support Vectors'%shape(svs)[0])
17     print('the error rate is %f' % (errorCount / (len(labelMat))))
19     test_dataMat = mat(test_dataArr)
20     test_labelMat = mat(test_labelArr).transpose()
21     m1, n1 = shape(test_dataMat)
22     test_errorCount = 0
23     for i in range(m1):
24         kernelEval = kernelTrans(svs, test_dataMat[i, :], kTup)
25         predict = kernelEval.T * multiply(labelSv, alphas[svInd]) + b
26         if sign(predict) != sign(test_labelArr[i]):
27             errorCount += 1
28     print('the error rate is %f' % (test_errorCount / (len(test_labelMat))))```

```1 from sklearn import svm

``` 1 model = svm.SVC(C=200, kernel='rbf', tol=1e-4, max_iter=-1, degree=3, gamma='auto_deprecated', coef0=0, shrinking=True,
2                 probability=False, cache_size=200, verbose=False, class_weight=None, decision_function_shape='ovr')
3 """
4 C: 惩罚因子，默认为1.0，C越大，相当于惩罚松弛变量，希望松弛变量接近0，即对误分类的惩罚增大，趋向于对训练集全分对的情况，这样对训练集测试时准确率很高，但泛化能力弱。C值小，对误分类的惩罚减小，允许容错，将他们当成噪声点，泛化能力较强。
5 tol: 容忍度阈值
6 max_iter: 迭代次数
7 kernel：核函数，包括:
8                     linear(线性核)：u*v
9                     poly(多项式核):(gamma * u * v + coef0)^degree
10                     rbf(高斯核): exp(-gamma|u-v|^2)
11                     sigmoid核: tanh(gamma*u*v + coef0)
12 degree: 多项式核中的维度
13 gamma： 核函数中的参数，默认为“auto”，选择1/n_features
14 coef： 多项式核和simoid核中的常数项，仅对这两个核函数有效
15 probability: 是否采用概率估计，默认为False
16 shrinking： 是否采用shrinking heuristic方法，默认为true
17 cache_size: 核函数的缓存大小，默认为200
18 verbose: 是否允许冗余输出
19 class_weight: 类别权重
20 decision_function_shape: 可以取'ovo'一对一和'ovr'一对其他
21 """```

```1 model.fit(mat(train_dataArr), mat(train_labelArr).transpose())
2 train_score = model.score(mat(train_dataArr), mat(train_labelArr).transpose())
3 print('训练集上的准确率为%s'%train_score)
4
5 test_dataArr, test_labelArr = loadImages('E:\资料\PythonML_Code\Charpter 5\data\\testDigits'.format(dir))
6 test_score = model.score(mat(test_dataArr), mat(test_labelArr).transpose())
7 print('测试集上的准确率为%f' % test_score)```