Skip to content

Commit

Permalink
rm digit related regexes
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolaspanel committed Feb 26, 2019
1 parent 5090d1c commit 149e960
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 8 deletions.
8 changes: 0 additions & 8 deletions src/corporacreator/preprocessors/fr.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,23 +36,15 @@

FR_NORMALIZATIONS = [
['Jean-Paul II', 'Jean-Paul deux'],
[re.compile(r'(^|\s)(\d+)T(\s|\.|,|\?|!|$)'), r'\1\2 tonnes\3'],
[re.compile(r'(^|\s)/an(\s|\.|,|\?|!|$)'), r'\1par an\2'],
[re.compile(r'(^|\s)(\d+)\s(0{3})(\s|\.|,|\?|!|$)'), r'\1\2\3\4'], # "123 000 …" => "123000 …"
[re.compile(r'(^|\s)km(\s|\.|,|\?|!|$)'), r'\1 kilomètres \2'],
[re.compile(r'(^|\s)0(\d)(\s|\.|,|\?|!|$)'), r'\1zéro \2 \3'],
['%', ' pourcent'],
[re.compile(r'(^|\s)\+(\s|\.|,|\?|!|$)'), r'\1 plus \2'],
[re.compile(r'(\d+)\s?m(?:2|²)(\s|\.|,|\?|!|$)'), r'\1 mètre carré\2'],
[re.compile(r'(^|\s)m(?:2|²)(\s|\.|,|\?|!|$)'), r'\1mètre carré\2'],
[re.compile(r'/\s?m(?:2|²)(\s|\.|,|\?|!|$)'), r' par mètre carré\1'],
[re.compile(r'(^|\s)(\d+),(\d{2})\s?€(\s|\.|,|\?|!|$)'), r'\1\2 euros \3 \4'],
[re.compile(r'\s?€(.+)'), r' euros\1'],
[re.compile(r'\s?€$'), r' euros'],
[re.compile(r'(^| )(n)(?:°|º|°)(\s)?', flags=re.IGNORECASE), r'\1\2uméro '],
[re.compile(r'(^|\s)(\d+)h(\d*)(\s|\.|,|$)'), r'\1\2 heure \3\4'],
[re.compile(r'(^|\s)(\d+)\s?h\s?(\d*)(\s|\.|,|$)'), r'\1\2 heure \3\4'],
[re.compile(r'(^|\s)(\d+)h(\s|\.|,|$)'), r'\1\2 heure \3'],
]


Expand Down
1 change: 1 addition & 0 deletions tests/test_preprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
('fr', '*', "bah 98%", "bah quatre-vingt-dix-huit pourcent"),
('fr', '*', "prix au m2", "prix au mètre carré"),
('fr', '*', "prix au m²", "prix au mètre carré"),
('fr', '*', "prix /m²", "prix par mètre carré"),
('fr', '*', "10 m²", "dix mètre carré"),
('fr', '*', "2éme page", "deuxième page"),
('fr', '*', "donc, ce sera 299 € + 99 €", "donc, ce sera deux cent quatre-vingt-dix-neuf euros plus quatre-vingt-dix-neuf euros"),
Expand Down

0 comments on commit 149e960

Please sign in to comment.