1
0
Files
ai-school/exp2/modified/mnist.py

115 lines
4.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from pathlib import Path
import numpy
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import v2 as tvtrans
from torchvision import datasets
import torch.nn.functional as F
class CNN(torch.nn.Module):
"""卷积神经网络模型"""
def __init__(self):
super(CNN, self).__init__()
self.conv1 = torch.nn.Conv2d(1, 32, kernel_size=(3, 3))
self.pool1 = torch.nn.MaxPool2d(kernel_size=(2, 2))
self.conv2 = torch.nn.Conv2d(32, 64, kernel_size=(3, 3))
self.pool2 = torch.nn.MaxPool2d(kernel_size=(2, 2))
self.conv3 = torch.nn.Conv2d(64, 64, kernel_size=(3, 3))
self.flatten = torch.nn.Flatten()
# 28x28过第一轮卷积后变为26x26过第一轮池化后变为13x13
# 过第二轮卷积后变为11x11过第二轮池化后变为5x5
# 过第三轮卷积后变为3x3。
# 最后一轮卷积核个数为64。
self.fc1 = torch.nn.Linear(64 * 3 * 3, 64)
torch.nn.init.xavier_normal_(self.fc1.weight)
torch.nn.init.zeros_(self.fc1.bias)
self.fc2 = torch.nn.Linear(64, 10)
torch.nn.init.xavier_normal_(self.fc2.weight)
torch.nn.init.zeros_(self.fc2.bias)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.pool1(x)
x = F.relu(self.conv2(x))
x = self.pool2(x)
x = F.relu(self.conv3(x))
x = self.flatten(x)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return F.softmax(x, dim=1)
class MnistDataset(Dataset):
"""用于加载Mnist的自定义数据集"""
shape: int
transform: tvtrans.Transform
images_data: numpy.ndarray
labels_data: torch.Tensor
def __init__(self, images: numpy.ndarray, labels: numpy.ndarray, transform: tvtrans.Transform):
images_len: int = images.shape[0]
labels_len: int = labels.shape[0]
assert (images_len == labels_len)
self.shape = images_len
self.images_data = images
self.labels_data = torch.from_numpy(labels)
self.transform = transform
def __getitem__(self, index):
return self.transform(self.images_data[index]), self.labels_data[index]
def __len__(self):
return self.shape
class MnistDataSource:
"""用于读取MNIST数据的数据读取器"""
train_loader: DataLoader
test_loader: DataLoader
def __init__(self, batch_size: int):
dataset_path = Path(__file__).resolve().parent.parent / 'datasets' / 'mnist.npz'
dataset = numpy.load(dataset_path)
# 所有图片均为黑底白字
# 6万张训练图片60000x28x28。标签只有第一维。
train_images: numpy.ndarray = dataset['x_train']
train_labels: numpy.ndarray = dataset['y_train']
# 1万张测试图片10000x28x28。标签只有第一维。
test_images: numpy.ndarray = dataset['x_test']
test_labels: numpy.ndarray = dataset['y_test']
# 定义数据转换器
trans = tvtrans.Compose([
# 从uint8转换为float32并自动归一化到0-1区间
# tvtrans.ToTensor(),
tvtrans.ToImage(),
tvtrans.ToDtype(torch.float32, scale=True),
# 为了符合后面图像的输入颜色通道条件,要在最后挤出一个新的维度
#tvtrans.Lambda(lambda x: x.unsqueeze(-1))
# 这个特定的标准化参数 (0.1307, 0.3081) 是 MNIST 数据集的标准化参数这些数值是MNIST训练集的全局均值和标准差。
# 这种标准化有助于模型训练时的数值稳定性和收敛速度。
#tvtrans.Normalize((0.1307,), (0.3081,)),
])
# 创建数据集
train_dataset = MnistDataset(train_images, train_labels, transform=trans)
test_dataset = MnistDataset(test_images, test_labels, transform=trans)
# 赋值到自身
self.train_loader = DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=False)
self.test_loader = DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)