Skip to content

Commit

Permalink
Fix moses punctuation
Browse files Browse the repository at this point in the history
  • Loading branch information
johnml1135 committed Nov 8, 2024
1 parent 730ea67 commit 744cc68
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 6 deletions.
4 changes: 2 additions & 2 deletions machine/translation/huggingface/hugging_face_nmt_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ def __init__(
self._tokenizer = AutoTokenizer.from_pretrained(self._model.name_or_path, use_fast=True)
if isinstance(self._tokenizer, (NllbTokenizer, NllbTokenizerFast)):
self._mpn = MosesPunctNormalizer()
self._mpn.substitutions = [
(str(re.compile(r)), sub)
self._mpn.substitutions = [ # type: ignore
(re.compile(r), sub)
for r, sub in self._mpn.substitutions
if isinstance(r, str) and isinstance(sub, str)
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,8 @@ def __init__(
self._add_unk_src_tokens = add_unk_src_tokens
self._add_unk_tgt_tokens = add_unk_tgt_tokens
self._mpn = MosesPunctNormalizer()
self._mpn.substitutions = [
(str(re.compile(r)), sub)
for r, sub in self._mpn.substitutions
if isinstance(r, str) and isinstance(sub, str)
self._mpn.substitutions = [ # type: ignore
(re.compile(r), sub) for r, sub in self._mpn.substitutions if isinstance(r, str) and isinstance(sub, str)
]
self._stats = TrainStats()

Expand Down

0 comments on commit 744cc68

Please sign in to comment.