-
Notifications
You must be signed in to change notification settings - Fork 0
/
abbrevs_ms.py
executable file
·75 lines (67 loc) · 1.71 KB
/
abbrevs_ms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python3
"""
Pandoc filter to recognise some abbreviations and prevent sentence
spacing in ``ms``."""
from pandocfilters import toJSONFilter, RawInline
import regex # allow unicode character properties
ABBREVS = {
"all": [
r'\p{L}'
],
"de": [
r'[Ee]vtl',
r'Nr',
r'[Zz]\.Zt',
r'[Gg]gf\.',
# r'[UuOo]\.[AaäÄ]',
# r'[Uu]\.?s\.?w',
r'[Ss]',
# r'[muMUsSiI]\.e'
r'[Vv]gl',
r'Kap',
r'Fn',
r'Anm',
r'Hg',
r'Hgg',
r'Hrsg',
r'Abb',
r'\d+',
r'[Ii]nsb',
],
"en": [
r'[Pp]p?',
r'[Nn]o',
r'[Vv]ol',
# r'[Ee]\.g'
# r'[Ii]\.e'
r'[Vv]iz',
r'fig',
]
}
PATTERN = regex.compile(
r'^[\p{Pi}\p{Ps}]?(?:'
+ r'|'.join(a for l in ABBREVS for a in ABBREVS[l])
+ r')\.$'
)
MULTI_PATTERN = regex.compile(r'^(?:\p{L}+\.){2,}$')
def abbrevs(key, value, fmt, _meta):
"""French-space guessed abbreviations."""
if fmt == "latex" or fmt == "beamer":
if key == 'Str':
n = MULTI_PATTERN.match(value)
if n:
value = regex.sub(r'\p{L}+\.(?!$)', r'\g<0>\kern 0.16667ex', value)
m = PATTERN.match(value)
if m or n:
return RawInline(fmt, value + r'\ ')
if fmt == "ms":
if key == 'Str':
n = MULTI_PATTERN.match(value)
if n:
value = regex.sub(r'\p{L}+\.(?!$)', r'\g<0>\|', value)
m = PATTERN.match(value)
if m or n:
return RawInline("ms", value + r'\&')
return None # change nothing
if __name__ == "__main__":
toJSONFilter(abbrevs)