1
0

fix exp2 pytorch rewrite fatal train issue

This commit is contained in:
2025-11-30 22:01:56 +08:00
parent 48fcdfcc80
commit 43b807679f
13 changed files with 738 additions and 112 deletions

View File

@@ -1,2 +1,3 @@
# Ignore every saved model files
*.pth
*.pth
*.ckpt

View File

@@ -3,47 +3,9 @@ import numpy
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import v2 as tvtrans
from torchvision import datasets
import torch.nn.functional as F
class CNN(torch.nn.Module):
"""卷积神经网络模型"""
def __init__(self):
super(CNN, self).__init__()
self.conv1 = torch.nn.Conv2d(1, 32, kernel_size=(3, 3))
self.pool1 = torch.nn.MaxPool2d(kernel_size=(2, 2))
self.conv2 = torch.nn.Conv2d(32, 64, kernel_size=(3, 3))
self.pool2 = torch.nn.MaxPool2d(kernel_size=(2, 2))
self.conv3 = torch.nn.Conv2d(64, 64, kernel_size=(3, 3))
self.flatten = torch.nn.Flatten()
# 28x28过第一轮卷积后变为26x26过第一轮池化后变为13x13
# 过第二轮卷积后变为11x11过第二轮池化后变为5x5
# 过第三轮卷积后变为3x3。
# 最后一轮卷积核个数为64。
self.fc1 = torch.nn.Linear(64 * 3 * 3, 64)
torch.nn.init.xavier_normal_(self.fc1.weight)
torch.nn.init.zeros_(self.fc1.bias)
self.fc2 = torch.nn.Linear(64, 10)
torch.nn.init.xavier_normal_(self.fc2.weight)
torch.nn.init.zeros_(self.fc2.bias)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.pool1(x)
x = F.relu(self.conv2(x))
x = self.pool2(x)
x = F.relu(self.conv3(x))
x = self.flatten(x)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return F.softmax(x, dim=1)
class MnistDataset(Dataset):
"""用于加载Mnist的自定义数据集"""
"""用于加载Mnist数据的自定义数据集"""
shape: int
transform: tvtrans.Transform
@@ -101,9 +63,10 @@ class MnistDataSource:
])
# 创建数据集
train_dataset = MnistDataset(train_images, train_labels, transform=trans)
test_dataset = MnistDataset(test_images, test_labels, transform=trans)
train_dataset = MnistDataset(train_images, train_labels,
transform=trans)
test_dataset = MnistDataset(test_images, test_labels,
transform=trans)
# 赋值到自身
self.train_loader = DataLoader(dataset=train_dataset,

53
exp2/modified/model.py Normal file
View File

@@ -0,0 +1,53 @@
import torch
import torch.nn.functional as F
class Cnn(torch.nn.Module):
"""卷积神经网络模型"""
def __init__(self):
super(Cnn, self).__init__()
self.conv1 = torch.nn.Conv2d(1, 32, kernel_size=(3, 3))
self.pool1 = torch.nn.MaxPool2d(kernel_size=(2, 2))
self.conv2 = torch.nn.Conv2d(32, 64, kernel_size=(3, 3))
self.pool2 = torch.nn.MaxPool2d(kernel_size=(2, 2))
self.conv3 = torch.nn.Conv2d(64, 64, kernel_size=(3, 3))
self.flatten = torch.nn.Flatten()
# 28x28过第一轮卷积后变为26x26过第一轮池化后变为13x13
# 过第二轮卷积后变为11x11过第二轮池化后变为5x5
# 过第三轮卷积后变为3x3。
# 最后一轮卷积核个数为64。
self.fc1 = torch.nn.Linear(64 * 3 * 3, 64)
self.fc2 = torch.nn.Linear(64, 10)
# 初始化模型参数
self._initialize_weights()
def _initialize_weights(self):
# YYC MARK:
# 把两个全连接线性层按tensorflow默认设置初始化
# - kernel_initializer='glorot_uniform'
# - bias_initializer='zeros'
torch.nn.init.xavier_normal_(self.fc1.weight)
torch.nn.init.zeros_(self.fc1.bias)
torch.nn.init.xavier_normal_(self.fc2.weight)
torch.nn.init.zeros_(self.fc2.bias)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.pool1(x)
x = F.relu(self.conv2(x))
x = self.pool2(x)
x = F.relu(self.conv3(x))
x = self.flatten(x)
x = F.relu(self.fc1(x))
x = self.fc2(x)
# YYC MARK:
# 绝对不要在这里用F.softmax(x, dim=1)输出!
# 由于这些代码是从tensorflow里转换过来的
# tensorflow的loss function是接受possibility作为交叉熵计算的
# 而pytorch要求接受logits即模型softmax之前的参数作为交叉熵计算。
# 所以这里直接输出模型结果。
return x

View File

@@ -1,18 +1,21 @@
from pathlib import Path
import sys
import torch
import numpy
import torch
import torch.nn.functional as F
from PIL import Image, ImageFile
import matplotlib.pyplot as plt
from mnist import CNN
from model import Cnn
sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
import gpu_utils
import pytorch_gpu_utils
class PredictResult:
"""预测的结果"""
possibilities: torch.Tensor
"""预测结果是每个数字不同的概率是经过softmax后的数值"""
def __init__(self, possibilities: torch.Tensor):
self.possibilities = possibilities
@@ -29,47 +32,54 @@ class PredictResult:
class Predictor:
device: torch.device
cnn: CNN
model: Cnn
def __init__(self):
self.device = gpu_utils.get_gpu_device()
self.cnn = CNN().to(self.device)
self.device = pytorch_gpu_utils.get_gpu_device()
self.model = Cnn().to(self.device)
# 加载保存好的模型参数
file_path = Path(__file__).resolve().parent.parent / 'models' / 'cnn.pth'
self.cnn.load_state_dict(torch.load(file_path))
self.model.load_state_dict(torch.load(file_path))
def generic_predict(self, in_data: torch.Tensor) -> PredictResult:
"""
其它预测函数都要使用的预测后端。其它预测函数将数据处理成Tensor然后传递给此函数进行实际预测。
:param in_data: 传入的tensor该tensor的shape必须是28x28dtype为float32。
"""
# 上传tensor到GPU
in_data = in_data.to(self.device)
# 为了满足要求要在第一维度挤出2下
# 一次是灰度通道,一次是批次。
# 相当于batch size = 1的计算
in_data = in_data.unsqueeze(0).unsqueeze(0)
# 开始预测由于模型输出的是没有softmax的数值因此最后还需要softmax一下
with torch.no_grad():
out_data = self.model(in_data)
out_data = F.softmax(out_data, dim=-1)
return PredictResult(out_data)
def predict_sketchpad(self, image: list[list[bool]]) -> PredictResult:
input = torch.Tensor(image).float().to(self.device)
input = torch.Tensor(image).float()
assert(input.dim() == 2)
assert(input.size(0) == 28)
assert(input.size(1) == 28)
# 为了满足要求要在第一维度挤出2下
# 一次是灰度通道,一次是批次。
# 相当于batch size = 1的计算
input = input.unsqueeze(0).unsqueeze(0)
# 预测
with torch.no_grad():
output = self.cnn(input)
return PredictResult(output)
return self.generic_predict(input)
def predict_image(self, image: ImageFile.ImageFile) -> PredictResult:
# 确保图像为灰度图像然后转换为numpy数组。
# 注意这里的numpy数组是只读的所以要先拷贝一份
grayscale_image = image.convert('L')
numpy_data = numpy.reshape(grayscale_image, (28, 28), copy=True)
# 转换到Tensor设置dtype并传到GPU上
data = torch.from_numpy(numpy_data).float().to(self.device)
# 转换到Tensor设置dtype
data = torch.from_numpy(numpy_data).float()
# 归一化到255又因为图像输入是白底黑字需要做转换。
data.div_(255.0).sub_(1).mul_(-1)
# 同理,挤出维度并预测
input = data.unsqueeze(0).unsqueeze(0)
with torch.no_grad():
output = self.cnn(input)
return PredictResult(output)
return self.generic_predict(data)
def main():
predictor = Predictor()
@@ -91,5 +101,6 @@ def main():
if __name__ == "__main__":
pytorch_gpu_utils.print_gpu_availability()
main()

View File

@@ -2,11 +2,10 @@ from pathlib import Path
import sys
import typing
import tkinter as tk
from tkinter import messagebox
from predict import PredictResult, Predictor
sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
import gpu_utils
import pytorch_gpu_utils
class SketchpadApp:
@@ -182,7 +181,8 @@ class SketchpadApp:
# endregion
if __name__ == "__main__":
gpu_utils.print_gpu_availability()
pytorch_gpu_utils.print_gpu_availability()
predictor = Predictor()
root = tk.Tk()

View File

@@ -7,10 +7,11 @@ import ignite.engine
import ignite.metrics
from ignite.engine import Engine, Events
from ignite.handlers.tqdm_logger import ProgressBar
from mnist import CNN, MnistDataSource
from dataset import MnistDataSource
from model import Cnn
sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
import gpu_utils
import pytorch_gpu_utils
class Trainer:
@@ -19,12 +20,12 @@ class Trainer:
device: torch.device
data_source: MnistDataSource
model: CNN
model: Cnn
def __init__(self):
self.device = gpu_utils.get_gpu_device()
self.device = pytorch_gpu_utils.get_gpu_device()
self.data_source = MnistDataSource(Trainer.N_BATCH_SIZE)
self.model = CNN().to(self.device)
self.model = Cnn().to(self.device)
# 展示模型结构。批次为指定批次数量通道只有一个灰度通道大小28x28。
torchinfo.summary(self.model, (Trainer.N_BATCH_SIZE, 1, 28, 28))
@@ -101,7 +102,7 @@ def main():
# device: torch.device
# data_source: MnistDataSource
# model: CNN
# model: Cnn
# trainer: Engine
# train_evaluator: Engine
@@ -109,7 +110,7 @@ def main():
# def __init__(self):
# self.device = gpu_utils.get_gpu_device()
# self.model = CNN().to(self.device)
# self.model = Cnn().to(self.device)
# self.data_source = MnistDataSource(batch_size=N_BATCH_SIZE)
# # 展示模型结构。批次为指定批次数量通道只有一个灰度通道大小28x28。
# torchinfo.summary(self.model, (N_BATCH_SIZE, 1, 28, 28))
@@ -188,5 +189,5 @@ def main():
if __name__ == "__main__":
gpu_utils.print_gpu_availability()
pytorch_gpu_utils.print_gpu_availability()
main()

View File

@@ -5,13 +5,6 @@ import matplotlib.pyplot as plt
from train import CNN
'''
python 3.9
tensorflow 2.0.0b0
pillow(PIL) 4.3.0
'''
class Predict(object):
def __init__(self):
latest = tf.train.latest_checkpoint('./ckpt')

View File

@@ -1,11 +1,13 @@
import os
from pathlib import Path
import sys
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import keras
from keras import datasets, layers, models
sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
import tensorflow_gpu_util
'''
python 3.9
tensorflow 2.0.0b0
'''
class CNN(object):
def __init__(self):
model = models.Sequential()
@@ -26,8 +28,7 @@ class CNN(object):
class DataSource(object):
def __init__(self):
# mnist数据集存储的位置如何不存在将自动下载
data_path = os.path.abspath(os.path.dirname(
__file__)) + '.'
data_path = Path(__file__).resolve().parent.parent / 'datasets' / 'mnist.npz'
(train_images, train_labels), (test_images,
test_labels) = datasets.mnist.load_data(path=data_path)
# 6万张训练图片1万张测试图片
@@ -43,19 +44,20 @@ class Train:
self.cnn = CNN()
self.data = DataSource()
def train(self):
check_path = './ckpt/cp-{epoch:04d}.ckpt'
check_path = Path(__file__).resolve().parent.parent / 'models' / 'cnn.ckpt'
# period 每隔5epoch保存一次
save_model_cb = tf.keras.callbacks.ModelCheckpoint(
check_path, save_weights_only=True, verbose=1, period=5)
save_model_cb = keras.callbacks.ModelCheckpoint(
str(check_path), save_weights_only=True, verbose=1, period=5)
self.cnn.model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
self.cnn.model.fit(self.data.train_images, self.data.train_labels,
epochs=5, callbacks=[save_model_cb])
epochs=5, batch_size=1000, callbacks=[save_model_cb])
test_loss, test_acc = self.cnn.model.evaluate(
self.data.test_images, self.data.test_labels)
print("准确率: %.4f共测试了%d张图片 " % (test_acc, len(self.data.test_labels)))
print("准确率: %.4f, 共测试了%d张图片 " % (test_acc, len(self.data.test_labels)))
if __name__ == "__main__":
app = Train()
app.train()
tensorflow_gpu_util.print_gpu_availability()
#app = Train()
#app.train()