1.1 Z-Score标准化

```import random
import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits .mplot3d import Axes3D
import seaborn as sns
import numpy as np
import torch
from torch import nn,optim
import torch.nn.functional as F
from torch.utils.data import random_split
from torch.utils.tensorboard import SummaryWriter```

```# 回归类数据集创建函数
def tensorGenReg(num_examples = 1000, w = [2, -1, 1], bias = True, delta = 0.01, deg = 1):
"""回归类数据集创建函数。
:param num_examples: 创建数据集的数据量
:param w: 包括截距的（如果存在）特征系数向量
:param bias：是否需要截距
:param delta：扰动项取值
:param deg：方程次数
:return: 生成的特征张和标签张量
"""

if bias == True:
num_inputs = len(w)-1                                                        # 特征张量
features_true = torch.randn(num_examples, num_inputs)                        # 不包含全是1的列的特征张量
w_true = torch.tensor(w[:-1]).reshape(-1, 1).float()                         # 自变量系数
b_true = torch.tensor(w[-1]).float()                                         # 截距
if num_inputs == 1:                                                          # 若输入特征只有1个，则不能使用矩阵乘法
labels_true = torch.pow(features_true, deg) * w_true + b_true
else:
labels_true = torch.mm(torch.pow(features_true, deg), w_true) + b_true
features = torch.cat((features_true, torch.ones(len(features_true), 1)), 1)  # 在特征张量的最后添加一列全是1的列
labels = labels_true + torch.randn(size = labels_true.shape) * delta

else:
num_inputs = len(w)
features = torch.randn(num_examples, num_inputs)
w_true = torch.tensor(w).reshape(-1, 1).float()
if num_inputs == 1:
labels_true = torch.pow(features, deg) * w_true
else:
labels_true = torch.mm(torch.pow(features, deg), w_true)
labels = labels_true + torch.randn(size = labels_true.shape) * delta
return features, labels
# 常用数据处理类
# 适用于封装自定义数据集的类
class GenData(Dataset):
def __init__(self, features, labels):
self.features = features
self.labels = labels
self.lens = len(features)
def __getitem__(self, index):
return self.features[index,:],self.labels[index]
def __len__(self):
return self.lens
"""数据封装、切分和加载函数：

:param features：输入的特征
:param labels: 数据集标签张量
:param batch_size：数据加载时的每一个小批数据量
:param rate: 训练集数据占比
:return：加载好的训练集和测试集
"""
data = GenData(features, labels)
num_train = int(data.lens * 0.7)
num_test = data.lens - num_train
data_train, data_test = random_split(data, [num_train, num_test])
class Sigmoid_class3(nn.Module):
def __init__(self, in_features=2, n_hidden1=4, n_hidden2=4, n_hidden3=4, out_features=1, BN_model=None):
super(Sigmoid_class3, self).__init__()
self.linear1 = nn.Linear(in_features, n_hidden1)
self.normalize1 = nn.BatchNorm1d(n_hidden1)
self.linear2 = nn.Linear(n_hidden1, n_hidden2)
self.normalize2 = nn.BatchNorm1d(n_hidden2)
self.linear3 = nn.Linear(n_hidden2, n_hidden3)
self.normalize3 = nn.BatchNorm1d(n_hidden3)
self.linear4 = nn.Linear(n_hidden3, out_features)
self.BN_model = BN_model

def forward(self, x):
if self.BN_model == None:
z1 = self.linear1(x)
p1 = torch.sigmoid(z1)
z2 = self.linear2(p1)
p2 = torch.sigmoid(z2)
z3 = self.linear3(p2)
p3 = torch.sigmoid(z3)
out = self.linear4(p3)
elif self.BN_model == 'pre':
z1 = self.normalize1(self.linear1(x))
p1 = torch.sigmoid(z1)
z2 = self.normalize2(self.linear2(p1))
p2 = torch.sigmoid(z2)
z3 = self.normalize3(self.linear3(p2))
p3 = torch.sigmoid(z3)
out = self.linear4(p3)
elif self.BN_model == 'post':
z1 = self.linear1(x)
p1 = torch.sigmoid(z1)
z2 = self.linear2(self.normalize1(p1))
p2 = torch.sigmoid(z2)
z3 = self.linear3(self.normalize2(p2))
p3 = torch.sigmoid(z3)
out = self.linear4(self.normalize3(p3))
return out
"""mse计算函数

:param net: 模型
:return：根据输入的数据，输出其MSE计算结果
"""
X = data[:][0]                            # 还原数据的特征
y = data[:][1]                            # 还原数据的标签
yhat = net(X)
return F.mse_loss(yhat, y)
def fit(net, criterion, optimizer, batchdata, epochs=3, cla=False):
"""模型训练函数

:param net：待训练的模型
:param criterion: 损失函数
:param optimizer：优化算法
:param batchdata: 训练数据集
:param cla: 是否是分类问题
:param epochs: 遍历数据次数
"""
for epoch  in range(epochs):
for X, y in batchdata:
if cla == True:
y = y.flatten().long()          # 如果是分类问题，需要对y进行整数转化
yhat = net.forward(X)
loss = criterion(yhat, y)
loss.backward()
optimizer.step()
def model_train_test(model,
train_data,
test_data,
num_epochs = 20,
criterion = nn.MSELoss(),
optimizer = optim.SGD,
lr = 0.03,
cla = False,
eva = mse_cal):
"""模型误差测试函数：

:param model_l：模型
:param train_data：训练数据
:param test_data: 测试数据
:param num_epochs：迭代轮数
:param criterion: 损失函数
:param lr: 学习率
:param cla: 是否是分类模型
:return：MSE列表
"""
# 模型评估指标矩阵
train_l = []
test_l = []
# 模型训练过程
for epochs in range(num_epochs):
model.train()
fit(net = model,
criterion = criterion,
optimizer = optimizer(model.parameters(), lr = lr),
batchdata = train_data,
epochs = epochs,
cla = cla)
model.eval()
train_l.append(eva(train_data, model).detach())
test_l.append(eva(test_data, model).detach())
return train_l, test_l
"""观察各层参数取值和梯度的小提琴图绘图函数。

:param model：观察对象（模型）
:return: 对应att的小提琴图
"""
vp = []
for i, m in enumerate(model.modules()):
if isinstance(m, nn.Linear):
else:
vp_x = m.weight.detach().reshape(-1, 1).numpy()
vp_y = np.full_like(vp_x, i)
vp_a = np.concatenate((vp_x, vp_y), 1)
vp.append(vp_a)
vp_r = np.concatenate((vp), 0)
ax = sns.violinplot(y = vp_r[:, 0], x = vp_r[:, 1])
ax.set(xlabel='num_hidden', title=att)

class tanh_class2(nn.Module):
def __init__(self, in_features=2, n_hidden1=4, n_hidden2=4, out_features=1, BN_model=None):
super(tanh_class2, self).__init__()
self.linear1 = nn.Linear(in_features, n_hidden1)
self.normalize1 = nn.BatchNorm1d(n_hidden1)
self.linear2 = nn.Linear(n_hidden1, n_hidden2)
self.normalize2 = nn.BatchNorm1d(n_hidden2)
self.linear3 = nn.Linear(n_hidden2, out_features)
self.BN_model = BN_model

def forward(self, x):
if self.BN_model == None:
z1 = self.linear1(x)
p1 = torch.tanh(z1)
z2 = self.linear2(p1)
p2 = torch.tanh(z2)
out = self.linear3(p2)
elif self.BN_model == 'pre':
z1 = self.normalize1(self.linear1(x))
p1 = torch.tanh(z1)
z2 = self.normalize2(self.linear2(p1))
p2 = torch.tanh(z2)
out = self.linear3(p2)
elif self.BN_model == 'post':
z1 = self.linear1(x)
p1 = torch.tanh(z1)
z2 = self.linear2(self.normalize1(p1))
p2 = torch.tanh(z2)
out = self.linear3(self.normalize2(p2))
return out

# 分类数据集的创建函数
def tensorGenCla(num_examples = 500, num_inputs = 2, num_class = 3, deg_dispersion = [4, 2], bias = False):
"""分类数据集创建函数。

:param num_examples: 每个类别的数据数量
:param num_inputs: 数据集特征数量
:param num_class：数据集标签类别总数
:param deg_dispersion：数据分布离散程度参数，需要输入一个列表，其中第一个参数表示每个类别数组均值的参考、第二个参数表示随机数组标准差。
:param bias：建立模型逻辑回归模型时是否带入截距
:return: 生成的特征张量和标签张量，其中特征张量是浮点型二维数组，标签张量是长正型二维数组。
"""

cluster_l = torch.empty(num_examples, 1)                         # 每一类标签张量的形状
mean_ = deg_dispersion[0]                                        # 每一类特征张量的均值的参考值
std_ = deg_dispersion[1]                                         # 每一类特征张量的方差
lf = []                                                          # 用于存储每一类特征张量的列表容器
ll = []                                                          # 用于存储每一类标签张量的列表容器
k = mean_ * (num_class-1) / 2                                    # 每一类特征张量均值的惩罚因子（视频中部分是+1，实际应该是-1）

for i in range(num_class):
data_temp = torch.normal(i*mean_-k, std_, size=(num_examples, num_inputs))     # 生成每一类张量
lf.append(data_temp)                                                           # 将每一类张量添加到lf中
labels_temp = torch.full_like(cluster_l, i)                                    # 生成类一类的标签
ll.append(labels_temp)                                                         # 将每一类标签添加到ll中

features = torch.cat(lf).float()
labels = torch.cat(ll).long()

if bias == True:
features = torch.cat((features, torch.ones(len(features), 1)), 1)              # 在特征张量中添加一列全是1的列
return features, labels```

```def Z_ScoreNormalization(data):
stdDf = data.std(0)
meanDf = data.mean(0)
normSet = (data - meanDf) / stdDf
return normSet```

2.1 Z-Score建模实验

```#设置随机种子
torch.manual_seed(420)
#创建最高项为2的多项式回归数据集
features,labels=tensorGenReg(w=[2,-1],bias=False,deg=2)
features_norm=Z_ScoreNormalization(features)
#进行数据集切分与加载

```#设置随机种子
torch.manual_seed(420)
#关键参数
lr=0.03
num_epochs=40
#实例化模型
sigmoid_model3=Sigmoid_class3()
sigmoid_model3_norm=Sigmoid_class3()
#进行Xavier初始化
for m in sigmoid_model3.modules():
if isinstance(m,nn.Linear):
nn.init.xavier_uniform_(m.weight)

for m in sigmoid_model3_norm.modules():
if isinstance(m,nn.Linear):
nn.init.xavier_uniform_(m.weight)

#sigmoid_model3模型训练
train_l,test_l=model_train_test(sigmoid_model3
,num_epochs=num_epochs
,criterion=nn.MSELoss()
,optimizer=optim.SGD
,lr=lr
, cla=False
,eva=mse_cal
)
#sigmoid_model3_norm模型训练
train_l_norm,test_l_norm=model_train_test(sigmoid_model3_norm
,num_epochs=num_epochs
,criterion=nn.MSELoss()
,optimizer=optim.SGD
,lr=lr
,cla=False
,eva=mse_cal
)
plt.plot(list(range(num_epochs)),train_l,label='train_mse')
plt.plot(list(range(num_epochs)),train_l_norm,label='train_norm_mse')
plt.legend(loc=1)```

三 Z-Score数据归一化的局限

Z-Score初始化并不是为深度学习算法量身设计的数据归一化方法，在实际神经网络建模过程中，Z-Score的使用还是存在很多局限，具体来说主要有以下两点。

3.1 Zero-Centered特性消失

```#设置随机种子
torch.manual_seed(420)
#学习率
lr=0.03
#实例化模型
tanh_model2_norm1=tanh_class2()
tanh_model2_norm2=tanh_class2()
#进行Xavier初始化
for m in tanh_model2_norm1.modules():
if isinstance(m,nn.Linear):
nn.init.xavier_uniform_(m.weight)

for m in tanh_model2_norm2.modules():
if isinstance(m,nn.Linear):
nn.init.xavier_uniform_(m.weight)

#tanh_model2模型训练
train_l,test_l=model_train_test(tanh_model2_norm1
,num_epochs=5
,criterion=nn.MSELoss()
,optimizer=optim.SGD
,lr=lr
,cla=False
,eva=mse_cal
)
#tanh_model2_norm模型训练
train_l_norm,test_l_norm=model_train_test(tanh_model2_norm2
,num_epochs=40
,criterion=nn.MSELoss()
,optimizer=optim.SGD
,lr=lr
,cla=False
,eva=mse_cal
)

`weights_vp(tanh_model2_norm2,att='grad')`

四 输入数据调整保证梯度平稳

Batch Normalization已经是被验证的、行之有效的模型优化手段，

4.1 归一化方法与数据分布的相互独立

```#设置随机种子
torch.manual_seed(420)
#创建数据集
features,labels=tensorGenCla(num_class=2,deg_dispersion=[6,2])
#查看其分布
plt.scatter(features[:,0],features[:,1],c=labels)```

`features`

```f=Z_ScoreNormalization(features)
f```

```plt.subplot(121)
plt.scatter(features[:,0],features[:,1],c=labels)
plt.title('features distribution')
plt.subplot(122)
plt.scatter(f[:,0],f[:,1],c=labels)
plt.title('f distribution')```