From 7aa7ae3335c33eed2434e7a444e6e4708cf2104b Mon Sep 17 00:00:00 2001 From: yyc12345 Date: Sat, 6 Dec 2025 20:48:27 +0800 Subject: [PATCH] fix exp3 loss function error --- exp3/modified/dataset.py | 3 +-- exp3/modified/predict.py | 45 +++++++++++++++++++++++++++++----------- exp3/modified/train.py | 11 +++++++--- 3 files changed, 42 insertions(+), 17 deletions(-) diff --git a/exp3/modified/dataset.py b/exp3/modified/dataset.py index f4ced6d..a9c3c9c 100644 --- a/exp3/modified/dataset.py +++ b/exp3/modified/dataset.py @@ -264,8 +264,7 @@ class PoetryDataLoader: # 这么做是为了让RNN从输入推到输出(下一个字符)。 # 此外,输出要做onehot编码 input = torch.tensor(numpy_batch[:, :-1], dtype=torch.long) - output = F.one_hot(torch.tensor(numpy_batch[:, 1:], dtype=torch.long), - num_classes=self.preprocessor.tokenizer.vocab_size).float() + output = torch.tensor(numpy_batch[:, 1:], dtype=torch.long) # 返回结果 return input, output diff --git a/exp3/modified/predict.py b/exp3/modified/predict.py index 19120ce..dfbafa2 100644 --- a/exp3/modified/predict.py +++ b/exp3/modified/predict.py @@ -11,7 +11,7 @@ sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) import gpu_utils -def generate_random_poetry(tokenizer: Tokenizer, model: Rnn, device: torch.device, s: str=''): +def generate_random_poetry(tokenizer: Tokenizer, model: Rnn, device: torch.device, s: str='') -> str: """ 随机生成一首诗 @@ -33,12 +33,12 @@ def generate_random_poetry(tokenizer: Tokenizer, model: Rnn, device: torch.devic # 由于后续预测概率时,需要批次维度,所以方括号里第一项写:保留批次维度。 # 然后因为只有最后一个字符是预测的,其他字符都是辅助推断的,所以方括号第二项-1表示取这个最后一个字符。 # 最后,它的概率分布中不包含[PAD][UNK][CLS]的概率分布,所以方括号第三项3:把这些东西删掉(这些编号是Tokenizer在编译时写死的,详细查看对应模块)。 - possibilities = F.softmax(output[:, -1, 3:]) + possibilities = F.softmax(output[:, -1, 3:], dim=-1) # 按照预测出的概率,随机选择一个词作为预测结果。 # 如果需要贪心,则用argmax替代。 target_index = torch.multinomial(possibilities, num_samples=1) # 记得把之前删除的维度加回来才是token id - target_id = target_index + 3 + target_id = target_index.item() + 3 # 把target_id加入序列 token_ids.append(target_id) @@ -49,7 +49,7 @@ def generate_random_poetry(tokenizer: Tokenizer, model: Rnn, device: torch.devic return tokenizer.decode(token_ids) -def generate_acrostic(tokenizer: Tokenizer, model: Rnn, device: torch.device, head: str): +def generate_acrostic(tokenizer: Tokenizer, model: Rnn, device: torch.device, head: str) -> str: """ 随机生成一首藏头诗 @@ -83,9 +83,9 @@ def generate_acrostic(tokenizer: Tokenizer, model: Rnn, device: torch.device, he # 与generate_random_poetry函数相同的方式,不断地生成诗句的下一个字。 input = torch.tensor(token_ids, dtype=torch.long).unsqueeze(0) output: torch.Tensor = model(input.to(device)) - possibilities = F.softmax(output[:, -1, 3:]) + possibilities = F.softmax(output[:, -1, 3:], dim=-1) target_index = torch.multinomial(possibilities, num_samples=1) - target_id = target_index + 3 + target_id = target_index.item() + 3 # 把target_id加入序列 token_ids.append(target_id) @@ -110,17 +110,38 @@ class Predictor: # 加载保存好的模型参数 self.model.load_state_dict(torch.load(settings.SAVED_MODEL_PATH)) + self.model.eval() - def generate_random_poetry(self): + def generate_random_poetry(self, s: str = ''): """随机生成一首诗""" with torch.no_grad(): - generate_random_poetry(self.data_loader.get_tokenizer(), + print(generate_random_poetry(self.data_loader.get_tokenizer(), self.model, - self.device) + self.device, + s)) - def generate_acrostic(self): + def generate_acrostic(self, s: str): """随机生成一首藏头诗""" with torch.no_grad(): - generate_acrostic(self.data_loader.get_tokenizer(), + print(generate_acrostic(self.data_loader.get_tokenizer(), self.model, - self.device) + self.device, + s)) + + +def main(): + predictor = Predictor() + + # 随机生成一首诗 + predictor.generate_random_poetry() + # 给出部分信息的情况下,随机生成剩余部分 + predictor.generate_random_poetry('床前明月光,') + # 生成藏头诗 + predictor.generate_acrostic('好好学习天天向上') + + +if __name__ == "__main__": + gpu_utils.print_gpu_availability() + main() + + diff --git a/exp3/modified/train.py b/exp3/modified/train.py index 2f006f2..f157e64 100644 --- a/exp3/modified/train.py +++ b/exp3/modified/train.py @@ -31,7 +31,7 @@ class Trainer: self.device = gpu_utils.get_gpu_device() self.data_loader = PoetryDataLoader(batch_size=settings.N_BATCH_SIZE) self.model = Rnn(self.data_loader.get_vocab_size()).to(self.device) - # 展示模型结构。批次为指定批次数量,通道只有一个灰度通道,大小28x28。 + # 展示模型结构。批次为指定批次数量,最大诗歌长度,同时输入一定是int32 torchinfo.summary(self.model, (settings.N_BATCH_SIZE, settings.POETRY_MAX_LEN), dtypes=[torch.int32,]) @@ -41,16 +41,21 @@ class Trainer: criterion = torch.nn.CrossEntropyLoss() # 创建训练器 self.trainer = ignite.engine.create_supervised_trainer( - self.model, optimizer, criterion, self.device) + self.model, optimizer, criterion, self.device, + # 由于PyTorch的交叉熵函数总是要求概率在dim=1,所以要调换一下维度才能传入。 + model_transform=lambda output: self.__adjust_for_loss(output)) # 将训练器关联到进度条 self.pbar = ProgressBar(persist=True) self.pbar.attach(self.trainer, output_transform=lambda loss: {"loss": loss}) # 每次epoch后,作诗一首看看结果 self.trainer.add_event_handler( Events.EPOCH_COMPLETED, - lambda: generate_random_poetry(self.data_loader.get_tokenizer(), self.model, ) + lambda: print(generate_random_poetry(self.data_loader.get_tokenizer(), self.model, self.device)) ) + def __adjust_for_loss(self, output: torch.Tensor) -> torch.Tensor: + return output.permute(0, 2, 1) + def train_model(self): # 训练模型 self.trainer.run(self.data_loader.loader, max_epochs=settings.N_EPOCH)