diff --git a/machine/translation/huggingface/hugging_face_nmt_engine.py b/machine/translation/huggingface/hugging_face_nmt_engine.py index 9d43b28..4da476c 100644 --- a/machine/translation/huggingface/hugging_face_nmt_engine.py +++ b/machine/translation/huggingface/hugging_face_nmt_engine.py @@ -55,8 +55,8 @@ def __init__( self._tokenizer = AutoTokenizer.from_pretrained(self._model.name_or_path, use_fast=True) if isinstance(self._tokenizer, (NllbTokenizer, NllbTokenizerFast)): self._mpn = MosesPunctNormalizer() - self._mpn.substitutions = [ - (str(re.compile(r)), sub) + self._mpn.substitutions = [ # type: ignore + (re.compile(r), sub) for r, sub in self._mpn.substitutions if isinstance(r, str) and isinstance(sub, str) ] diff --git a/machine/translation/huggingface/hugging_face_nmt_model_trainer.py b/machine/translation/huggingface/hugging_face_nmt_model_trainer.py index f15ba4e..1192243 100644 --- a/machine/translation/huggingface/hugging_face_nmt_model_trainer.py +++ b/machine/translation/huggingface/hugging_face_nmt_model_trainer.py @@ -100,10 +100,8 @@ def __init__( self._add_unk_src_tokens = add_unk_src_tokens self._add_unk_tgt_tokens = add_unk_tgt_tokens self._mpn = MosesPunctNormalizer() - self._mpn.substitutions = [ - (str(re.compile(r)), sub) - for r, sub in self._mpn.substitutions - if isinstance(r, str) and isinstance(sub, str) + self._mpn.substitutions = [ # type: ignore + (re.compile(r), sub) for r, sub in self._mpn.substitutions if isinstance(r, str) and isinstance(sub, str) ] self._stats = TrainStats()