1
0

use ignite for exp2

This commit is contained in:
2025-12-02 23:07:27 +08:00
parent 43b807679f
commit 65c56e938c
15 changed files with 246 additions and 794 deletions

View File

@@ -87,7 +87,7 @@ class PoetryDataset:
tokenizer: Tokenizer
"""分词器"""
poetry: list[str]
"""古诗词数据集"""
"""古诗词数据集,每一项是一首诗"""
def __init__(self, force_reclean: bool = False):
# 加载古诗,然后统计词频构建分词器
@@ -121,7 +121,7 @@ class PoetryDataset:
line = line.strip()
# 有且只能有一个冒号用来分割标题
if line.count(':') != 1: continue
# 获取后半部分
# 获取后半部分(删除标题)
_, last_part = line.split(':')
# 长度不能超过最大长度
if len(last_part) > PoetryDataset.MAX_SEG_LEN - 2: