一起读《动手学深度学习（PyTorch版）》- 多项式回归：欠拟合、过拟合

LitchiCheng · 发表于2024-10-27 15:15

一起读《动手学深度学习（PyTorch版）》- 多项式回归：欠拟合、过拟合 [复制链接]

邀请：@tagetage @littleshrimp @okhxyyo @freebsder 参与回复

本帖最后由 LitchiCheng 于 2024-10-27 15:15 编辑

多项式回归

这样可以避免很大的 x^i 带来的特别大的指数值，变成 x^i / i !，也就是x的i次幂 / i的阶乘，如下图可以对比

import matplotlib.pyplot as plt
import math

num = 3
x = range(0, 1000)
y = []
y1 = []
for i in x:
    y.append(i**num)
    y1.append((i**num)/math.factorial(num))
plt.plot(x, y)
plt.plot(x, y1)
plt.show()

3阶多项式拟合

100个训练样本，100和验证样本

设定多项式的权重，3阶多项式，有4个权重值，经过400次训练

import torch
import torchvision
from torch.utils import data
from torchvision import transforms
import matplotlib.pyplot as plt
from torch import nn
import numpy as np
import math

def get_dataloader_workers():
    return 6

def accurancy(y_hat, y):
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)
    # cmp is a dict which restore true or false 
    cmp = y_hat.type(y.dtype) == y
    # calc the num of true
    return float(cmp.type(y.dtype).sum())

class Accumulator:
    def __init__(self, n) -> None:
        self.data = [0.0]*n
    
    def add(self, *args):
        # args is a tupe
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

def evaluate_accurancy(net, data_iter):
    if isinstance(net, torch.nn.Module):
        net.eval()
    metric = Accumulator(2)
    with torch.no_grad():
        for X, y in data_iter:
            metric.add(accurancy(net(X), y), y.numel())
    return metric[0] / metric[1]

def train_epoch_ch3(net, train_iter, loss, updater):
    if isinstance(net, torch.nn.Module):
        print("is instance nn.Module")
        net.train()
    metric = Accumulator(3)
    for X, y in train_iter:
        y_hat = net(X)
        # print(y, y_hat)
        l = loss(y_hat, y)
        if isinstance(updater, torch.optim.Optimizer):
            updater.zero_grad()
            l.mean().backward()
            updater.step()
        else:
            l.sum().backward()
            updater(X.shape[0])
        metric.add(float(l.sum()), accurancy(y_hat, y), y.numel())
    #     print(metric[0] , metric[1], metric[2])
    # print("", metric[0] , metric[1], metric[2])
    # return metric[0] / metric[1], metric[1] / metric[2]

def set_axes(axes, xlable, ylable, xlim, ylim, xscale, yscale, legend):
    axes.set_xlabel(xlable)
    axes.set_ylabel(ylable)
    axes.set_xscale(xscale)
    axes.set_yscale(yscale)
    axes.set_xlim(xlim)
    axes.set_ylim(ylim)
    if legend:
        axes.legend(legend)
        axes.grid()

class Animator:
    def __init__(self, xlable=None, ylable=None, legend=None, xlim=None, ylim=None, 
    xscale='linear', yscale='linear',fmts=('-','m--','g-.','r:'), nrows=1, ncols=1, figsize=(3.5, 2.5)):
        if legend is None:
            legend = []
        self.fig, self.axes = plt.subplots(nrows, ncols, figsize=figsize)
        if nrows * ncols == 1:
            self.axes = [self.axes, ]
        self.config_axes = lambda: set_axes(self.axes[0], xlable, ylable, xlim, ylim, xscale, yscale, legend)
        self.X, self.Y, self.fmts = None, None, fmts
    
    def add(self, x, y):
        if not hasattr(y, "__len__"):
            y=[y]
        n = len(y)
        if not hasattr(x, "__len__"):
            x = [x] * n
        if not self.X:
            self.X = [[] for _ in range(n)]
        if not self.Y:
            self.Y = [[] for _ in range(n)]
        for i, (a,b) in enumerate(zip(x, y)):
            if a is not None and b is not None:
                self.X[i].append(a)
                self.Y[i].append(b)
        self.axes[0].cla()
        for x, y, fmt in zip(self.X, self.Y, self.fmts):
            self.axes[0].plot(x, y, fmt)
        self.config_axes()

def load_array(data_arrays, batch_size, is_train=True):  #@save   dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train, num_workers=get_dataloader_workers())

max_degree = 20  # 20 power
n_train, n_test = 100, 100  
true_w = np.zeros(max_degree)
true_w[0:4] = np.array([5, 1.2, -3.4, 5.6])

features = np.random.normal(size=(n_train + n_test, 1))
np.random.shuffle(features)
poly_features = np.power(features, np.arange(max_degree).reshape(1, -1))
for i in range(max_degree):
    poly_features[:, i] /= math.gamma(i + 1)  # gamma(n)=(n-1)!
labels = np.dot(poly_features, true_w)
labels += np.random.normal(scale=0.1, size=labels.shape)

true_w, features, poly_features, labels = [torch.tensor(x, dtype=
    torch.float32) for x in [true_w, features, poly_features, labels]]

# print(features[:2], poly_features[:2, :], labels[:2])

def evaluate_loss(net, data_iter, loss):
    metric = Accumulator(2)
    for X, y in data_iter:
        out = net(X)
        y = y.reshape(out.shape)
        l = loss(out, y)
        metric.add(l.sum(), l.numel())
    return metric[0] / metric[1]

def train(train_features, test_features, train_labels, test_labels,
          num_epochs=400):
    loss = nn.MSELoss(reduction='none')
    input_shape = train_features.shape[-1]
    net = nn.Sequential(nn.Linear(input_shape, 1, bias=False))
    batch_size = min(10, train_labels.shape[0])
    train_iter = load_array((train_features, train_labels.reshape(-1,1)),
                                batch_size)
    test_iter = load_array((test_features, test_labels.reshape(-1,1)),
                               batch_size, is_train=False)
    trainer = torch.optim.SGD(net.parameters(), lr=0.01)
    animator = Animator(xlable='epoch', ylable='loss', yscale='log',
                            xlim=[1, num_epochs], ylim=[1e-3, 1e2],
                            legend=['train', 'test'])
    for epoch in range(num_epochs):
        train_epoch_ch3(net, train_iter, loss, trainer)
        if epoch == 0 or (epoch + 1) % 20 == 0:
            animator.add(epoch + 1, (evaluate_loss(net, train_iter, loss),
                                     evaluate_loss(net, test_iter, loss)))
    print('weight:', net[0].weight.data.numpy())

train(poly_features[:n_train, :4], poly_features[n_train:, :4],
      labels[:n_train], labels[n_train:])

plt.show()

可以看到权重值也就是多项式系数和最初设定的系数基本相同，loss也逐渐变小

线性函数，欠拟合

原因：由四个系数构成的20阶多项式，当训练系数只有2个时，不太可能表达出4个的效果，再增加数量或者训练次数，都不能减少损失

train(poly_features[:n_train, :2], poly_features[n_train:, :2],
      labels[:n_train], labels[n_train:])

高阶多项式，过拟合

3阶以上的系数未指定，本身应该时0值，但因为添加了噪声，没有明显的规律，所以训练结果很好，但面对随机的噪声怎么可能预估的出来

train(poly_features[:n_train, :], poly_features[n_train:, :],
      labels[:n_train], labels[n_train:], num_epochs=1000)

风尘流沙 · 发表于2024-10-29 15:23

谢谢楼主的分享，《动手学深度学习（PyTorch版）》，已下载第二版拜读了。

freebsder · 发表于2024-10-29 15:25

随机噪声可能需要估计噪声分布，白噪声还是高斯噪声，可以先处理一下。虽然现在深度学习网络很强大，但是必要的前处理还是可以有的。

LitchiCheng · 发表于2024-10-29 21:28

freebsder 发表于 2024-10-29 15:25 随机噪声可能需要估计噪声分布，白噪声还是高斯噪声，可以先处理一下。虽然现在深度学习网络很强大，但是必 ...

这个是真实噪声，消除就意味失真

一起读《动手学深度学习（PyTorch版）》- 多项式回归：欠拟合、过拟合 [复制链接]

最新回复

点评