### 一、欠拟合和过拟合简介

#### 2、什幺是过拟合？

1）加载带有噪音的二分类数据集（训练集和验证集）；

2）使用不同的神经网络来演示欠拟合和过拟合；

3）过拟合应对法：早停法、权重衰减、丢弃法

### 二、加载带有噪音的二分类数据集

src\overfit\index.html

```<!DOCTYPE html>
<html lang="en">
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Document</title>
<body>
<script src="index.js"></script>
</body>
</html>```

src\overfit\index.js

```import * as tfvis from '@tensorflow/tfjs-vis';
import {getData} from './data';
const data = getData(200, 3);
console.log(data);
tfvis.render.scatterplot(
{ name: '训练数据' },
{
values: [
data.filter(p => p.label === 1),
data.filter(p => p.label === 0)
]
}
)
}```

```/**
*
* @param {*} numSamples 生成的样本的数量
* @param {*} variance 方差，变异，不一样的地方。它是用来控制生成的数据的噪音的，variance调得越大，生成的数据的噪音就越大
*/
export function getData(numSamples, variance) {
let points = [];
function genGauss(cx, cy, label) {
for (let i = 0; i < numSamples / 2; i++) {
let x = normalRandom(cx, variance);
let y = normalRandom(cy, variance);
points.push({x, y, label});
}
}
genGauss(2, 2, 1);
genGauss(-2, -2, 0);
return points;
}
/**
* 生成一个正态分布，也叫高斯分布
* @param {*} mean
* @param {*} variance
*/
function normalRandom(mean = 0, variance = 1) {
let v1, v2, s;
do {
v1 = 2 * Math.random() - 1;
v2 = 2 * Math.random() - 1;
s = v1 * v1 + v2 * v2;
} while (s > 1);
let result = Math.sqrt(-2 * Math.log(s) / s) * v1;
return mean + Math.sqrt(variance) * result;
}```

### 三、使用简单神经网络演示欠拟合

```import * as tf from '@tensorflow/tfjs';
import * as tfvis from '@tensorflow/tfjs-vis';
// import {getData} from './data';
import {getData} from '../xor/data';
window.onload = async () => {
// const data = getData(200, 3);
const data = getData(200);
console.log(data);
tfvis.render.scatterplot(
{ name: '训练数据' },
{
values: [
data.filter(p => p.label === 1),
data.filter(p => p.label === 0)
]
}
);
const model = tf.sequential();
units: 1,
activation: 'sigmoid',
inputShape: [2]
}));
model.compile({
loss: tf.losses.logLoss,
});
const inputs = tf.tensor(data.map(p => [p.x, p.y]));
const labels = tf.tensor(data.map(p => p.label));
await model.fit(inputs, labels, {
validationSplit: 0.2, // 从数据集里面分出20%的数据作为验证集
epochs: 200,
callbacks: tfvis.show.fitCallbacks(
{ name: '训练效果' },
['loss', 'val_loss'], // 要看到训练集和验证集上的损失
{ callbacks: ['onEpochEnd']}
)
});
}```

### 四、使用复杂神经网络演示过拟合

src\overfit\index2.html

```<!DOCTYPE html>
<html lang="en">
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Document</title>
<body>
<script src="index2.js"></script>
</body>
</html>```

src\overfit\index2.js

```import * as tf from '@tensorflow/tfjs';
import * as tfvis from '@tensorflow/tfjs-vis';
import {getData} from './data';
window.onload = async () => {
const data = getData(200, 1);
console.log(data);
tfvis.render.scatterplot(
{ name: '训练数据' },
{
values: [
data.filter(p => p.label === 1),
data.filter(p => p.label === 0)
]
}
);
const model = tf.sequential();
units: 10,
activation: 'tanh',
inputShape: [2]
}));
units: 1,
activation: 'sigmoid'
}));
model.compile({
loss: tf.losses.logLoss,
});
const inputs = tf.tensor(data.map(p => [p.x, p.y]));
const labels = tf.tensor(data.map(p => p.label));
await model.fit(inputs, labels, {
validationSplit: 0.2, // 从数据集里面分出20%的数据作为验证集
epochs: 200,
callbacks: tfvis.show.fitCallbacks(
{ name: '训练效果' },
['loss', 'val_loss'], // 要看到训练集和验证集上的损失
{ callbacks: ['onEpochEnd']}
)
});
}```

### 五、过拟合应对方法

#### 过拟合的应对方法有：早停法、权重衰减、丢弃法。

TensorFlow.js提供了使用权重衰减法的API，即设置L2正则化。即在最复杂的隐藏层上加一个kernelRegularizer属性，即把：

```model.add(tf.layers.dense({
units: 10,
activation: 'tanh',
inputShape: [2]
}));```

```model.add(tf.layers.dense({
units: 10,
activation: 'tanh',
inputShape: [2],
kernelRegularizer: tf.regularizers.l2({ l2: 0.2 }) // l2: 1是将L2正则化率设置为1，它是一个超参数
}));```

```// 用丢弃法避免过拟合
rate: 0.9 // 丢弃率设置为0.9，意思是会随机丢弃90%的神经元
}));```