- from hanlp.datasets.srl.ontonotes5.chinese import ONTONOTES5_CONLL12_CHINESE_TEST, ONTONOTES5_CONLL12_CHINESE_DEV, \
- ONTONOTES5_CONLL12_CHINESE_TRAIN
- from hanlp.layers.embeddings.contextual_word_embedding import ContextualWordEmbedding
- from hanlp.layers.transformers.relative_transformer import RelativeTransformerEncoder
- from hanlp.utils.lang.zh.char_table import HANLP_CHAR_TABLE_JSON
- from hanlp.utils.log_util import cprint
- from tests import cdroot
- cdroot()
- tasks = {
- 'tok': TaggingTokenization(
- CTB8_CWS_TRAIN,
- CTB8_CWS_DEV,
- CTB8_CWS_TEST,
- SortingSamplerBuilder(batch_size=32),
- max_seq_len=510,
- hard_constraint=True,
- char_level=True,
- tagging_scheme='BMES',
- lr=1e-3,
- transform=NormalizeCharacter(HANLP_CHAR_TABLE_JSON, 'token'),