-
Notifications
You must be signed in to change notification settings - Fork 0
/
add_work_subjects.py
123 lines (98 loc) · 3.88 KB
/
add_work_subjects.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
"""Add subject classifications to TEI <bibl> elements.
This script prompts the user to select one or more subject classifications for
each <bibl> element in a TEI XML file. The selected classifications are added
as <term> elements within the <bibl> elements.
Examples:
$ python add_work_subjects.py ../medieval-mss/works.xml
$ python add_work_subjects.py -h
"""
import argparse
import sys
from tei.elements import Bibl, Category, Namespace
from tei.xml import WorksFile
class CategorySelector(list[str]):
"""Prompt the user to select a category.
Attributes:
bibl_title (str): The title of the <bibl> element
categories (list[Category]): The available categories
Methods:
__call__: Prompt the user for input and return the selected categories
_print_categories: Print the available categories in rows of three
"""
def __call__(
self, bibl_title: str, categories: list[Category]
) -> list[str]:
"""Return category IDs from the user's selection.
Args:
bibl_title (str): The title of the <bibl> element
categories (list[Category]): The available categories
Returns:
list[str]: The selected category IDs
"""
while True:
print(f"\n{bibl_title}\n")
self._print_categories(
[category.category_description for category in categories]
)
selection: str = input("\nEnter one or more category numbers: ")
try:
# return a list of the category IDs from the user's selection
return [
categories[int(index) - 1].id
for index in selection.split()
]
except ValueError:
sys.stderr.write("Please enter one or more numbers.")
continue
except IndexError:
sys.stderr.write("Please select from the numbers listed.")
continue
def _print_categories(self, category_descriptions: list[str]) -> None:
"""Print the available categories in rows of three.
Args:
category_descriptions (list[str]): Category descriptions
"""
for index, description in enumerate(category_descriptions, start=1):
print(f"{index:>2}. {description:<25}", end="")
# print a newline after every third category
if index % 3 == 0:
print("\n", end="")
def main() -> int:
"""Prompt the user to select a category for each <bibl> element.
Returns:
int: The exit code (0 for success, 1 for failure)
"""
parser = argparse.ArgumentParser(
description=__doc__.strip(),
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
dest="works_file_path",
nargs="?",
default="../medieval-mss/works.xml",
help="Path to the TEI XML file containing the <bibl> elements",
type=str,
)
args: argparse.Namespace = parser.parse_args()
works = WorksFile(args.works_file_path)
# Iterate over <bibl> elements with an xml:id but no <term> child
for bibl_element in works.tree.xpath(
"//tei:bibl[@xml:id and not(tei:term)]", namespaces=Namespace.tei
):
# Create a Work object for manipulating the <bibl> element
bibl: Bibl = Bibl(bibl_element)
# Get the user's selection of categories
selected_categories: list[str] = CategorySelector()(
bibl.title, works.categories
)
# If there is no selection, skip to the next <bibl> element
if not selected_categories:
continue
# Add <term> elements for the selected categories
for category in selected_categories:
bibl.add_term(category)
# Update the XML file
works.write()
return 0
if __name__ == "__main__":
sys.exit(main())