1
0

finish exp2

This commit is contained in:
2025-11-24 21:02:44 +08:00
parent 936f852466
commit af890d899e
5 changed files with 352 additions and 56 deletions

1
.style.yapf Normal file
View File

@@ -0,0 +1 @@
column_limit=120

2
exp2/models/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
# Ignore every saved model files
*.pth

View File

@@ -0,0 +1,54 @@
from pathlib import Path
import sys
import torch
from train import CNN
sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
import gpu_utils
class PredictResult:
possibilities: torch.Tensor
def __init__(self, possibilities: torch.Tensor):
self.possibilities = possibilities
def chosen_number(self) -> int:
"""获取最终选定的数字"""
# 依然是找最大的那个index
_, prediction = self.possibilities.max(1)
return prediction.item()
def number_possibilities(self) -> list[float]:
"""获取每个数字出现的概率"""
return list(self.possibilities[0][i].item() for i in range(10))
class Predictor:
device: torch.device
cnn: CNN
def __init__(self):
self.device = gpu_utils.get_gpu_device()
self.cnn = CNN().to(self.device)
# 加载保存好的模型参数
file_path = Path(__file__).resolve().parent.parent / 'models' / 'cnn.pth'
self.cnn.load_state_dict(torch.load(file_path))
def predict(self, image: list[list[bool]]) -> PredictResult:
input = torch.Tensor(image).float().to(self.device)
assert(input.dim() == 2)
assert(input.size(0) == 28)
assert(input.size(1) == 28)
# 为了满足要求要在第一维度挤出2下
# 一次是灰度通道,一次是批次。
# 相当于batch size = 1的计算
input = input.unsqueeze(0).unsqueeze(0)
# 预测
with torch.no_grad():
output = self.cnn(input)
return PredictResult(output)

190
exp2/modified/sketchpad.py Normal file
View File

@@ -0,0 +1,190 @@
from pathlib import Path
import sys
import typing
import tkinter as tk
from tkinter import messagebox
from predict import PredictResult, Predictor
sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
import gpu_utils
class SketchpadApp:
IMAGE_HW: typing.ClassVar[int] = 28
PIXEL_HW: typing.ClassVar[int] = 15
def __init__(self, root: tk.Tk, predictor: Predictor):
self.root = root
self.root.title("看图说数")
# 创建画板框架
canvas_frame = tk.Frame(root)
canvas_frame.pack(pady=10)
# 创建图像大小的画板
self.canvas_pixel_count = SketchpadApp.IMAGE_HW
self.canvas_pixel_size = SketchpadApp.PIXEL_HW # 每个像素的大小
canvas_hw = self.canvas_pixel_count * self.canvas_pixel_size
self.canvas_width = canvas_hw
self.canvas_height = canvas_hw
self.canvas = tk.Canvas(
canvas_frame,
width=self.canvas_width,
height=self.canvas_height,
bg='black'
)
self.canvas.pack()
# 存储画板状态。False表示没有画黑色True表示画了白色
self.canvas_data = [[False for _ in range(self.canvas_pixel_count)] for _ in range(self.canvas_pixel_count)]
# 绑定鼠标事件
self.canvas.bind("<B1-Motion>", self.paint)
self.canvas.bind("<Button-1>", self.paint)
# 绘制初始网格
self.draw_grid()
# 创建表格框架
table_frame = tk.Frame(root)
table_frame.pack(pady=10)
# 表头数据
header_words = ("猜测的数字", ) + tuple(f'{i}的概率' for i in range(10))
# 创建表头
for col, header in enumerate(header_words):
header_label = tk.Label(
table_frame,
text=header,
relief="solid",
borderwidth=1,
width=12,
height=2,
bg="lightblue"
)
header_label.grid(row=0, column=col, sticky="nsew")
# 创建第二行(显示数值的行)
self.value_labels = []
for col in range(len(header_words)):
value_label = tk.Label(
table_frame,
text="0.00", # 默认显示0.00
relief="solid",
borderwidth=1,
width=12,
height=2,
bg="white"
)
value_label.grid(row=1, column=col, sticky="nsew")
self.value_labels.append(value_label)
# 设置第一列的特殊样式(猜测的数字)
self.value_labels[0].config(text="N/A", bg="lightyellow")
# 清空样式
self.clear_table()
# 创建按钮框架
button_frame = tk.Frame(root)
button_frame.pack(pady=10)
# 执行按钮
execute_button = tk.Button(
button_frame,
text="执行",
command=self.execute,
bg='lightgreen',
width=10
)
execute_button.pack(side=tk.LEFT, padx=5)
# 重置按钮
reset_button = tk.Button(
button_frame,
text="重置",
command=self.reset,
bg='lightcoral',
width=10
)
reset_button.pack(side=tk.LEFT, padx=5)
# 设置用于执行的predictor
self.predictor = predictor
# region: 画板部分
canvas: tk.Canvas
canvas_data: list[list[bool]]
canvas_width: int
canvas_height: int
def draw_grid(self):
"""绘制网格线"""
for i in range(self.canvas_pixel_count + 1):
# 垂直线
self.canvas.create_line(
i * self.canvas_pixel_size, 0,
i * self.canvas_pixel_size, self.canvas_height,
fill='lightgray'
)
# 水平线
self.canvas.create_line(
0, i * self.canvas_pixel_size,
self.canvas_width, i * self.canvas_pixel_size,
fill='lightgray'
)
def paint(self, event):
"""处理鼠标绘制事件"""
# 计算点击的网格坐标
col = event.x // self.canvas_pixel_size
row = event.y // self.canvas_pixel_size
# 确保坐标在有效范围内
if 0 <= col < self.canvas_pixel_count and 0 <= row < self.canvas_pixel_count:
# 更新网格状态
if self.canvas_data[row][col] != True:
self.canvas_data[row][col] = True
# 绘制黑色矩形
x1 = col * self.canvas_pixel_size
y1 = row * self.canvas_pixel_size
x2 = x1 + self.canvas_pixel_size
y2 = y1 + self.canvas_pixel_size
self.canvas.create_rectangle(x1, y1, x2, y2, fill='white', outline='')
# endregion
# region: 表格部分
value_labels: list[tk.Label]
def show_in_table(self, result: PredictResult):
self.value_labels[0].config(text=str(result.chosen_number()))
number_possibilities = result.number_possibilities()
for index, label in enumerate(self.value_labels[1:]):
label.config(text=f'{number_possibilities[index]:.4f}')
def clear_table(self):
for label in self.value_labels:
label.config(text='N/A')
# endregion
# region: 按钮部分
predictor: Predictor
def execute(self):
"""执行按钮功能 - 将画板数据传递给后端"""
prediction = self.predictor.predict(self.canvas_data)
self.show_in_table(prediction)
def reset(self):
"""重置按钮功能 - 清空画板"""
self.canvas.delete("all")
self.canvas_data = [[0 for _ in range(self.canvas_pixel_count)] for _ in range(self.canvas_pixel_count)]
self.draw_grid()
self.clear_table()
# endregion
if __name__ == "__main__":
gpu_utils.print_gpu_availability()
predictor = Predictor()
root = tk.Tk()
app = SketchpadApp(root, predictor)
root.mainloop()

View File

@@ -1,8 +1,10 @@
from pathlib import Path from pathlib import Path
import sys import sys
import typing
import numpy import numpy
import torch import torch
from torch.utils.data import DataLoader, Dataset from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import v2 as tvtrans
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import torch.nn.functional as F import torch.nn.functional as F
@@ -16,11 +18,10 @@ class CNN(torch.nn.Module):
def __init__(self): def __init__(self):
super(CNN, self).__init__() super(CNN, self).__init__()
# 使用Ceil模式设置MaxPooling因为tensorflow默认是这个模式。
self.conv1 = torch.nn.Conv2d(1, 32, kernel_size=(3, 3)) self.conv1 = torch.nn.Conv2d(1, 32, kernel_size=(3, 3))
self.pool1 = torch.nn.MaxPool2d(kernel_size=(2, 2), ceil_mode=True) self.pool1 = torch.nn.MaxPool2d(kernel_size=(2, 2))
self.conv2 = torch.nn.Conv2d(32, 64, kernel_size=(3, 3)) self.conv2 = torch.nn.Conv2d(32, 64, kernel_size=(3, 3))
self.pool2 = torch.nn.MaxPool2d(kernel_size=(2, 2), ceil_mode=True) self.pool2 = torch.nn.MaxPool2d(kernel_size=(2, 2))
self.conv3 = torch.nn.Conv2d(64, 64, kernel_size=(3, 3)) self.conv3 = torch.nn.Conv2d(64, 64, kernel_size=(3, 3))
self.flatten = torch.nn.Flatten() self.flatten = torch.nn.Flatten()
# 28x28过第一轮卷积后变为26x26过第一轮池化后变为13x13 # 28x28过第一轮卷积后变为26x26过第一轮池化后变为13x13
@@ -46,20 +47,22 @@ class MnistDataset(Dataset):
"""用于加载Mnist的自定义数据集""" """用于加载Mnist的自定义数据集"""
shape: int shape: int
x_data: torch.Tensor transform: tvtrans.Transform
y_data: torch.Tensor images_data: numpy.ndarray
labels_data: torch.Tensor
def __init__(self, x_data: torch.Tensor, y_data: torch.Tensor): def __init__(self, images: numpy.ndarray, labels: numpy.ndarray, transform: tvtrans.Transform):
x_len = x_data.shape[0] images_len: int = images.size(0)
y_len = y_data.shape[0] labels_len: int = labels.size(0)
assert (x_len == y_len) assert (images_len == labels_len)
self.shape = x_len self.shape = images_len
self.x_data = x_data self.images_data = images
self.y_data = y_data self.labels_data = torch.from_numpy(labels)
self.transform = transform
def __getitem__(self, index): def __getitem__(self, index):
return self.x_data[index], self.y_data[index] return self.transform(self.images_data[index]), self.labels_data[index]
def __len__(self): def __len__(self):
return self.shape return self.shape
@@ -72,66 +75,112 @@ class DataSource:
test_data: DataLoader test_data: DataLoader
def __init__(self, batch_size: int): def __init__(self, batch_size: int):
datasets_path = Path( datasets_path = Path(__file__).resolve().parent.parent / 'datasets' / 'mnist.npz'
__file__).resolve().parent.parent / 'datasets' / 'mnist.npz'
datasets = numpy.load(datasets_path) datasets = numpy.load(datasets_path)
# 所有图片均为黑底白字
# 6万张训练图片60000x28x28。标签只有第一维。 # 6万张训练图片60000x28x28。标签只有第一维。
train_images = torch.from_numpy(datasets['x_train']) train_images = datasets['x_train']
train_label = torch.from_numpy(datasets['y_train']) train_labels = datasets['y_train']
# 1万张测试图片10000x28x28。标签只有第一维。 # 1万张测试图片10000x28x28。标签只有第一维。
test_images = torch.from_numpy(datasets['x_test']) test_images = datasets['x_test']
test_label = torch.from_numpy(datasets['y_test']) test_labels = datasets['y_test']
# 为了符合后面图像的输入颜色通道条件,要在最后挤出一个新的维度 # 定义数据转换器
train_images.unsqueeze(-1) trans = tvtrans.Compose([
test_images.unsqueeze(-1) # 从uint8转换为float32并自动归一化到0-1区间
# 像素值归一化 # tvtrans.ToTensor(),
train_images /= 255.0 tvtrans.ToImage(),
test_images /= 255.0 tvtrans.ToDtype(torch.float32, scale=True),
# 为了符合后面图像的输入颜色通道条件,要在最后挤出一个新的维度
#tvtrans.Lambda(lambda x: x.unsqueeze(-1))
])
# 创建数据集 # 创建数据集
train_dataset = MnistDataset(train_images, train_label) train_dataset = MnistDataset(train_images,
test_dataset = MnistDataset(test_images, test_label) train_labels,
transform=trans)
test_dataset = MnistDataset(test_images, test_labels, transform=trans)
# 赋值到自身 # 赋值到自身
self.train_data = DataLoader(dataset=train_dataset, self.train_data = DataLoader(dataset=train_dataset,
batch_size=batch_size, batch_size=batch_size,
shuffle=True) shuffle=False)
self.test_data = DataLoader(dataset=test_dataset, self.test_data = DataLoader(dataset=test_dataset,
batch_size=batch_size, batch_size=batch_size,
shuffle=False) shuffle=False)
class Trainer:
N_EPOCH: typing.ClassVar[int] = 5
N_BATCH_SIZE: typing.ClassVar[int] = 1000
device: torch.device
data_source: DataSource
cnn: CNN
def __init__(self):
self.device = gpu_utils.get_gpu_device()
self.data_source = DataSource(Trainer.N_BATCH_SIZE)
self.cnn = CNN().to(self.device)
def train(self):
optimizer = torch.optim.Adam(self.cnn.parameters())
loss_func = torch.nn.CrossEntropyLoss()
for epoch in range(Trainer.N_EPOCH):
self.cnn.train()
batch_images: torch.Tensor
batch_labels: torch.Tensor
for batch_index, (batch_images, batch_labels) in enumerate(self.data_source.train_data):
gpu_images = batch_images.to(self.device)
gpu_labels = batch_labels.to(self.device)
optimizer.zero_grad()
prediction: torch.Tensor = self.cnn(gpu_images)
loss: torch.Tensor = loss_func(prediction, gpu_labels)
loss.backward()
optimizer.step()
if batch_index % 100 == 0:
literal_loss = loss.item()
print(f'Epoch: {epoch+1}, Batch: {batch_index}, Loss: {literal_loss:.4f}')
def save(self):
file_dir_path = Path(__file__).resolve().parent.parent / 'models'
file_dir_path.mkdir(parents=True, exist_ok=True)
file_path = file_dir_path / 'cnn.pth'
torch.save(self.cnn.state_dict(), file_path)
print(f'模型已保存至:{file_path}')
def test(self):
self.cnn.eval()
correct_sum = 0
total_sum = 0
with torch.no_grad():
for batch_images, batch_labels in self.data_source.test_data:
gpu_images = batch_images.to(self.device)
gpu_labels = batch_labels.to(self.device)
possibilities: torch.Tensor = self.cnn(gpu_images)
# 输出出来是10个数字各自的可能性所以要选取最高可能性的那个对比
# 在dim=1上找最大的那个就选那个。dim=0是批次所以不管他。
_, prediction = possibilities.max(1)
# 返回标签的个数作为这一批的总个数
total_sum += gpu_labels.size(0)
correct_sum += prediction.eq(gpu_labels).sum()
test_acc = 100. * correct_sum / total_sum
print(f"准确率: {test_acc:.4f}%,共测试了{total_sum}张图片")
def main(): def main():
n_epoch = 5 trainer = Trainer()
n_batch_size = 25 trainer.train()
trainer.save()
device = gpu_utils.get_gpu_device() trainer.test()
data_source = DataSource(n_batch_size)
cnn = CNN().to(device)
optimizer = torch.optim.Adam(cnn.parameters())
loss_func = torch.nn.CrossEntropyLoss()
for epoch in range(n_epoch):
cnn.train()
batch_images: torch.Tensor
batch_labels: torch.Tensor
for batch_index, (batch_images, batch_labels) in enumerate(data_source.train_data):
gpu_images = batch_images.to(device)
gpu_labels = batch_labels.to(device)
optimizer.zero_grad()
prediction: torch.Tensor = cnn(gpu_images)
loss: torch.Tensor = loss_func(prediction, gpu_labels)
loss.backward()
optimizer.step()
loss_showcase = loss.item()
print(f'Epoch: {epoch+1}, Batch: {batch_index}, Loss: {loss.item():.4f}')
if __name__ == "__main__": if __name__ == "__main__":