fix exp3 loss function error
This commit is contained in:
@@ -264,8 +264,7 @@ class PoetryDataLoader:
|
||||
# 这么做是为了让RNN从输入推到输出(下一个字符)。
|
||||
# 此外,输出要做onehot编码
|
||||
input = torch.tensor(numpy_batch[:, :-1], dtype=torch.long)
|
||||
output = F.one_hot(torch.tensor(numpy_batch[:, 1:], dtype=torch.long),
|
||||
num_classes=self.preprocessor.tokenizer.vocab_size).float()
|
||||
output = torch.tensor(numpy_batch[:, 1:], dtype=torch.long)
|
||||
|
||||
# 返回结果
|
||||
return input, output
|
||||
|
||||
@@ -11,7 +11,7 @@ sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
|
||||
import gpu_utils
|
||||
|
||||
|
||||
def generate_random_poetry(tokenizer: Tokenizer, model: Rnn, device: torch.device, s: str=''):
|
||||
def generate_random_poetry(tokenizer: Tokenizer, model: Rnn, device: torch.device, s: str='') -> str:
|
||||
"""
|
||||
随机生成一首诗
|
||||
|
||||
@@ -33,12 +33,12 @@ def generate_random_poetry(tokenizer: Tokenizer, model: Rnn, device: torch.devic
|
||||
# 由于后续预测概率时,需要批次维度,所以方括号里第一项写:保留批次维度。
|
||||
# 然后因为只有最后一个字符是预测的,其他字符都是辅助推断的,所以方括号第二项-1表示取这个最后一个字符。
|
||||
# 最后,它的概率分布中不包含[PAD][UNK][CLS]的概率分布,所以方括号第三项3:把这些东西删掉(这些编号是Tokenizer在编译时写死的,详细查看对应模块)。
|
||||
possibilities = F.softmax(output[:, -1, 3:])
|
||||
possibilities = F.softmax(output[:, -1, 3:], dim=-1)
|
||||
# 按照预测出的概率,随机选择一个词作为预测结果。
|
||||
# 如果需要贪心,则用argmax替代。
|
||||
target_index = torch.multinomial(possibilities, num_samples=1)
|
||||
# 记得把之前删除的维度加回来才是token id
|
||||
target_id = target_index + 3
|
||||
target_id = target_index.item() + 3
|
||||
|
||||
# 把target_id加入序列
|
||||
token_ids.append(target_id)
|
||||
@@ -49,7 +49,7 @@ def generate_random_poetry(tokenizer: Tokenizer, model: Rnn, device: torch.devic
|
||||
return tokenizer.decode(token_ids)
|
||||
|
||||
|
||||
def generate_acrostic(tokenizer: Tokenizer, model: Rnn, device: torch.device, head: str):
|
||||
def generate_acrostic(tokenizer: Tokenizer, model: Rnn, device: torch.device, head: str) -> str:
|
||||
"""
|
||||
随机生成一首藏头诗
|
||||
|
||||
@@ -83,9 +83,9 @@ def generate_acrostic(tokenizer: Tokenizer, model: Rnn, device: torch.device, he
|
||||
# 与generate_random_poetry函数相同的方式,不断地生成诗句的下一个字。
|
||||
input = torch.tensor(token_ids, dtype=torch.long).unsqueeze(0)
|
||||
output: torch.Tensor = model(input.to(device))
|
||||
possibilities = F.softmax(output[:, -1, 3:])
|
||||
possibilities = F.softmax(output[:, -1, 3:], dim=-1)
|
||||
target_index = torch.multinomial(possibilities, num_samples=1)
|
||||
target_id = target_index + 3
|
||||
target_id = target_index.item() + 3
|
||||
|
||||
# 把target_id加入序列
|
||||
token_ids.append(target_id)
|
||||
@@ -110,17 +110,38 @@ class Predictor:
|
||||
|
||||
# 加载保存好的模型参数
|
||||
self.model.load_state_dict(torch.load(settings.SAVED_MODEL_PATH))
|
||||
self.model.eval()
|
||||
|
||||
def generate_random_poetry(self):
|
||||
def generate_random_poetry(self, s: str = ''):
|
||||
"""随机生成一首诗"""
|
||||
with torch.no_grad():
|
||||
generate_random_poetry(self.data_loader.get_tokenizer(),
|
||||
print(generate_random_poetry(self.data_loader.get_tokenizer(),
|
||||
self.model,
|
||||
self.device)
|
||||
self.device,
|
||||
s))
|
||||
|
||||
def generate_acrostic(self):
|
||||
def generate_acrostic(self, s: str):
|
||||
"""随机生成一首藏头诗"""
|
||||
with torch.no_grad():
|
||||
generate_acrostic(self.data_loader.get_tokenizer(),
|
||||
print(generate_acrostic(self.data_loader.get_tokenizer(),
|
||||
self.model,
|
||||
self.device)
|
||||
self.device,
|
||||
s))
|
||||
|
||||
|
||||
def main():
|
||||
predictor = Predictor()
|
||||
|
||||
# 随机生成一首诗
|
||||
predictor.generate_random_poetry()
|
||||
# 给出部分信息的情况下,随机生成剩余部分
|
||||
predictor.generate_random_poetry('床前明月光,')
|
||||
# 生成藏头诗
|
||||
predictor.generate_acrostic('好好学习天天向上')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
gpu_utils.print_gpu_availability()
|
||||
main()
|
||||
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ class Trainer:
|
||||
self.device = gpu_utils.get_gpu_device()
|
||||
self.data_loader = PoetryDataLoader(batch_size=settings.N_BATCH_SIZE)
|
||||
self.model = Rnn(self.data_loader.get_vocab_size()).to(self.device)
|
||||
# 展示模型结构。批次为指定批次数量,通道只有一个灰度通道,大小28x28。
|
||||
# 展示模型结构。批次为指定批次数量,最大诗歌长度,同时输入一定是int32
|
||||
torchinfo.summary(self.model,
|
||||
(settings.N_BATCH_SIZE, settings.POETRY_MAX_LEN),
|
||||
dtypes=[torch.int32,])
|
||||
@@ -41,16 +41,21 @@ class Trainer:
|
||||
criterion = torch.nn.CrossEntropyLoss()
|
||||
# 创建训练器
|
||||
self.trainer = ignite.engine.create_supervised_trainer(
|
||||
self.model, optimizer, criterion, self.device)
|
||||
self.model, optimizer, criterion, self.device,
|
||||
# 由于PyTorch的交叉熵函数总是要求概率在dim=1,所以要调换一下维度才能传入。
|
||||
model_transform=lambda output: self.__adjust_for_loss(output))
|
||||
# 将训练器关联到进度条
|
||||
self.pbar = ProgressBar(persist=True)
|
||||
self.pbar.attach(self.trainer, output_transform=lambda loss: {"loss": loss})
|
||||
# 每次epoch后,作诗一首看看结果
|
||||
self.trainer.add_event_handler(
|
||||
Events.EPOCH_COMPLETED,
|
||||
lambda: generate_random_poetry(self.data_loader.get_tokenizer(), self.model, )
|
||||
lambda: print(generate_random_poetry(self.data_loader.get_tokenizer(), self.model, self.device))
|
||||
)
|
||||
|
||||
def __adjust_for_loss(self, output: torch.Tensor) -> torch.Tensor:
|
||||
return output.permute(0, 2, 1)
|
||||
|
||||
def train_model(self):
|
||||
# 训练模型
|
||||
self.trainer.run(self.data_loader.loader, max_epochs=settings.N_EPOCH)
|
||||
|
||||
Reference in New Issue
Block a user