a和b的值需要用梯度下降算法进行优化。然后，我们得到了一条自变量和因变量之间最合适的回归直线。通过回归线，我们可以预测任意输入x的y的值。这些是如何建立典型的或确定性的线性回归算法的步骤。

### 加载和预处理数据

import pandas as pd
auto_data.head()

import matplotlib.pyplot as plt
import seaborn as sns
corr_df = auto_data.corr()
sns.heatmap(corr_df, cmap="YlGnBu", annot = True)
plt.show()

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
x = auto_data['weight']
y = auto_data['mpg']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state=5)
min_max_scaler = preprocessing.MinMaxScaler()
x_train_minmax = min_max_scaler.fit_transform(x_train.values.reshape(len(x_train),1))
y_train_minmax = min_max_scaler.fit_transform(y_train.values.reshape(len(y_train),1))
x_test_minmax = min_max_scaler.fit_transform(x_test.values.reshape(len(x_test),1))
y_test_minmax = min_max_scaler.fit_transform(y_test.values.reshape(len(y_test),1))

### 基于TensorFlow的确定性线性回归

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.losses import MeanSquaredError
model = Sequential([
Dense(units=1, input_shape=(1,))
])
model.compile(loss=MeanSquaredError(), optimizer=RMSprop(learning_rate=0.01))
history = model.fit(x_train_minmax, y_train_minmax, epochs=100, verbose=False)

def plot_loss(history):

plt.plot(history.history['loss'], label='loss')
plt.xlabel('Epoch')
plt.ylabel('Error')
plt.legend()
plt.grid(True)
plot_loss(history)

### TensorFlow概率的概率线性回归

pip install tensorflow_probability

#### 针对不确定性建立概率线性回归模型

import tensorflow_probability as tfp
import tensorflow as tf
tfd = tfp.distributions
tfpl = tfp.layers
model = Sequential([
Dense(units=1+1, input_shape=(1,)),
tfpl.DistributionLambda(
lambda t: tfd.Normal(loc=t[..., :1],
scale=tf.math.softplus(t[...,1:]))),
])

#### 负对数似然作为成本函数

def negative_log_likelihood(y_true, y_pred):

return -y_pred.log_prob(y_true)

#### 随机不确定性概率线性回归模型的训练与预测结果

model.compile(optimizer=RMSprop(learning_rate=0.01), loss=negative_log_likelihood)
history = model.fit(x_train_minmax, y_train_minmax, epochs=200, verbose=False);

y_model = model(x_test_minmax)
y_sample = y_model.sample()
plt.scatter(x_test_minmax, y_test_minmax, alpha=0.5, label='test data')
plt.scatter(x_test_minmax, y_sample, alpha=0.5, color='green', label='model sample')
plt.xlabel('Weight')
plt.ylabel('MPG')
plt.legend()
plt.show()

y_mean = y_model.mean()
y_sd = y_model.stddev()
y_mean_m2sd = y_mean - 2 * y_sd
y_mean_p2sd = y_mean + 2 * y_sd
plt.scatter(x_test_minmax, y_test_minmax, alpha=0.4, label='data')
plt.plot(x_test_minmax, y_mean, color='red', alpha=0.8, label='model $\mu$')
plt.plot(x_test_minmax, y_mean_m2sd, color='green', alpha=0.8, label='model $\mu \pm 2 \sigma$')
plt.plot(x_test_minmax, y_mean_p2sd, color='green', alpha=0.8)
plt.xlabel('Weight')
plt.ylabel('MPG')
plt.legend()
plt.show()

#### 建立随机和认知不确定性的概率线性回归模型

def prior(kernel_size, bias_size, dtype=None):
n = kernel_size + bias_size
return Sequential([
tfpl.DistributionLambda(lambda t: tfd.Independent(
tfd.Normal(loc=tf.zeros(n), scale=tf.ones(n))))
])

def posterior(kernel_size, bias_size, dtype=None):
n = kernel_size + bias_size
return Sequential([
tfpl.VariableLayer(2 * n, dtype=dtype),
tfpl.DistributionLambda(lambda t: tfd.Independent(
tfd.Normal(loc=t[..., :n],
scale=tf.nn.softplus(t[..., n:]))))
])

model = Sequential([
tfpl.DenseVariational(units = 1 + 1,
make_prior_fn = prior,
make_posterior_fn = posterior,
kl_weight=1/x.shape[0]),
tfpl.DistributionLambda(
lambda t: tfd.Normal(loc=t[..., :1],
scale=tf.math.softplus(t[...,1:])))
])

#### 随机不确定性和认知不确定性概率线性回归模型的训练和预测结果

model.compile(optimizer= RMSprop(learning_rate=0.01), loss=negative_log_likelihood)
history = model.fit(x_train_minmax, y_train_minmax, epochs=500, verbose=False);

plt.scatter(x_test_minmax, y_test_minmax, marker='.', alpha=0.8, label='data')
for i in range(10):
y_model = model(x_test_minmax)
y_mean = y_model.mean()
y_mean_m2sd = y_mean - 2 * y_model.stddev()
y_mean_p2sd = y_mean + 2 * y_model.stddev()

if i == 0:
plt.plot(x_test_minmax, y_mean, color='red', alpha=0.8, label='model $\mu$')
plt.plot(x_test_minmax, y_mean_m2sd, color='green', alpha=0.8, label='model $\mu \pm 2 \sigma$')
plt.plot(x_test_minmax, y_mean_p2sd, color='green', alpha=0.8)
else:
plt.plot(x_test_minmax, y_mean, color='red', alpha=0.8)
plt.plot(x_test_minmax, y_mean_m2sd, color='green', alpha=0.8)
plt.plot(x_test_minmax, y_mean_p2sd, color='green', alpha=0.8)
plt.xlabel('Weight')
plt.ylabel('MPG')
plt.legend()
plt.show()