## 开始训练

### 前提依赖

`pip install sklearn`

### 训练数据

```[[5.344187740028914], [30.91441332291272]]
[[4.690797837330457], [17.989132245249227]]
[[3.06514407164054], [32.67390058378043]]
[[0.29136844635404446], [-15.046942990405128]]
[[2.7042454045721764], [-4.198779971237319]]
[[9.15496044375243], [54.50659423843143]]
[[4.323588254945952], [76.06219903136115]]```

```#!/usr/bin/python
# -*- coding: UTF-8 -*-
import sys
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn import metrics
def curce_data(x,y,y_pred):
x=x.tolist()
y=y.tolist()
y_pred=y_pred.tolist()
results=zip(x,y,y_pred)
results=["{},{},{}".format(s[0][0],s[1][0],s[2][0]) for s in results ]
return results
with open(path) as f :
lines=[eval(line.strip()) for line in lines]
X,y=zip(*lines)
X=np.array(X)
y=np.array(y)
return X,y
#这里得到的数据情况。
#原始数据
# [[5.344187740028914], [30.91441332291272]]
# [[4.690797837330457], [17.989132245249227]]
# [[3.06514407164054], [32.67390058378043]]
# 得到的X_train:
# [[5.344187740028914],[4.690797837330457],[3.06514407164054]]
#得到的y_train
#[[30.91441332291272],[17.989132245249227],[32.67390058378043]]
#一个对象，它代表的线性回归模型，它的成员变量，就已经有了w，b. 刚生成w和b的时候 是随机的
model = LinearRegression()
#一调用这个函数，就会不停地找合适的w和b 直到误差最小
model.fit(X_train, y_train)
#打印W
print (model.coef_)
#打印b
print (model.intercept_)
#模型已经训练完毕,用模型看下在训练集的表现
y_pred_train = model.predict(X_train)
#sklearn 求解训练集的mse
# y_train 在训练集上 真实的y值
# y_pred_train 通过模型预测出来的y值
#计算  (y_train-y_pred_train)^2/n
train_mse=metrics.mean_squared_error(y_train, y_pred_train)
print ("训练集MSE:", train_mse)
#看下在测试集上的效果
y_pred_test = model.predict(X_test)
test_mse=metrics.mean_squared_error(y_test, y_pred_test)
print ("测试集MSE:",test_mse)
train_curve=curce_data(X_train,y_train,y_pred_train)
test_curve=curce_data(X_test,y_test,y_pred_test)
print ("推广mse差", test_mse-train_mse)
'''
with open("train_curve.csv","w") as f :
f.writelines("
".join(train_curve))
with open("test_curve.csv","w") as f :
f.writelines("
".join(test_curve))
'''```

,,

## 开始训练

```# -*- encoding:utf-8 -*-
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_predict
from numpy import shape
from sklearn import metrics
import numpy as np
def extend_feature(x):
result=[x[0],x[0]]
result.extend(x[1:])
return result
#return [x[0],x[0]]
with open(path) as f :
lines=[eval(line.strip()) for line in lines]
X,y=zip(*lines)
X=[extend_feature(x) for x in X]
X=np.array(X)
y=np.array(y)
return X,y
#train_data里面的数据
#[[3.69311, 0.0, 18.1, 0.0, 0.713, 6.376, 88.4, 2.5671, 24.0, 666.0, 20.2, 391.43, 14.65], 17.7]
#[[0.06211, 40.0, 1.25, 0.0, 0.429, 6.49, 44.4, 8.7921, 1.0, 335.0, 19.7, 396.9, 5.98], 22.9]
#这里就是得到了x的数组，和对应y的数组
#x[[3.69311, 0.0, 18.1, 0.0, 0.713, 6.376, 88.4, 2.5671, 24.0, 666.0, 20.2, 391.43, 14.65],[0.06211, 40.0, 1.25, 0.0, 0.429, 6.49, 44.4, 8.7921, 1.0, 335.0, 19.7, 396.9, 5.98]]
#y[17.7,22.9]

model = LinearRegression()
#对于得到的x,y分别的数组进行训练
model.fit(X_train, y_train)
#得到对应的w,b
print (model.coef_)#打印w
print (model.intercept_)#打印b
#根据得到的模型，输入x，得到y
y_pred = model.predict(X_train)
#根据预测的数值和真实的数值求mse，mse越小的，模型训练的也就越好。w,b也就越准确
print ("MSE:", metrics.mean_squared_error(y_train, y_pred))
y_pred = model.predict(X_test)
print ("MSE:", metrics.mean_squared_error(y_test, y_pred))```