-
Notifications
You must be signed in to change notification settings - Fork 3
/
fix_uid_index.py
259 lines (241 loc) · 9 KB
/
fix_uid_index.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# Rebuild the UID index by clearing and reindexing.
# Also registers a new UID for items that have the same UID as another item.
# This can at least happen when you import a zexp twice, in different folders.
#
# One of them will keep its original UID, and you cannot influence which one this is.
# This might matter when there are resolveuid links in the site.
#
# For updates and more such scripts, see https://github.com/zestsoftware/plonescripts
#
# Run this with:
# bin/instance run scripts/fix_uid_index.py
#
# Note: this script only works on Python 3!
# But this is only because of f-strings, so should be easily fixable.
# Tested on Plone 5.2.
import argparse
import sys
import transaction
from plone import api
from plone.app.redirector.interfaces import IRedirectionStorage
from plone.uuid.handlers import addAttributeUUID
from plone.uuid.interfaces import ATTRIBUTE_NAME
from plone.uuid.interfaces import IUUID
from zope.component import getUtility
from zope.component.hooks import setSite
from zope.intid.interfaces import IIntIds
parser = argparse.ArgumentParser()
parser.add_argument(
"--dry-run",
action="store_true",
default=False,
dest="dry_run",
help="Dry run. No changes will be saved.",
)
parser.add_argument(
"--site",
default="",
dest="site",
help="Single site id to work on. Default is to work on all.",
)
# sys.argv will be something like:
# ['.../parts/instance/bin/interpreter', '-c',
# 'scripts/fix_uid_index.py', '--dry-run', '--site=nl']
# Ignore the first three.
options = parser.parse_args(args=sys.argv[3:])
if options.dry_run:
print("Dry run selected, will not commit changes.")
# 'app' is the Zope root.
# Get Plone Sites to work on.
if options.site:
# Get single Plone Site.
plones = [getattr(app, options.site)]
else:
# Get all Plone Sites.
plones = [
obj
for obj in app.objectValues() # noqa
if getattr(obj, "portal_type", "") == "Plone Site"
]
def commit(note):
print(note)
if options.dry_run:
print("Dry run selected, not committing.")
return
# Commit transaction and add note.
tr = transaction.get()
tr.note(note)
transaction.commit()
for site in plones:
print("")
print("Handling Plone Site %s." % site.id)
setSite(site)
catalog = api.portal.get_tool(name="portal_catalog")
actual_catalog = catalog._catalog
uncatalog_paths = []
for path in actual_catalog.uids.keys():
try:
obj = app.unrestrictedTraverse(path)
except KeyError:
print(
"The catalog has an object at path %s but nothing exists there." % path
)
uncatalog_paths.append(path)
continue
# This might find an item by acquisition.
# migration-law/migration-law/migration-law/research.htm
# may actually be migration-law/research.htm
actual_path = "/".join(obj.getPhysicalPath())
if path == actual_path:
continue
print(
"Object is indexed at %s but is actually at a different path, likely due to acquisition: %s" %
(path, actual_path)
)
uncatalog_paths.append(path)
for path in uncatalog_paths:
print("Uncataloging object at %s" % path)
actual_catalog.uncatalogObject(path)
# Problems in the UID index could also mean some objects have no intid.
intids = getUtility(IIntIds)
fixed_intid = 0
index = catalog.Indexes["UID"]
# _index: UID -> doc id
# _unindex: doc id -> UID
_index_keys = index._index.keys()
_index_values = index._index.values()
_unindex_keys = index._unindex.keys()
_unindex_values = index._unindex.values()
print(
"Number of _index uid keys: %d, unique: %d" %
(len(_index_keys), len(set(_index_keys)))
)
print(
"Number of _index doc id values: %d, unique: %d" %
(len(_index_values), len(set(_index_values)))
)
print(
"Number of _unindex doc id keys: %d, unique: %d" %
(len(_unindex_keys), len(set(_unindex_keys)))
)
print(
"Number of _unindex uid values: %d, unique: %d" %
(len(_unindex_values), len(set(_unindex_values)))
)
missing = 0
seen_uids = set()
# Gather a list of paths for which we will create a new uuid.
recreate = []
# The _index and _unindex could be inconsistent in various ways.
# Not all inconsistencies may be possible.
# It depends on what the exact problem is in our site.
# So we may do too many or too few checks here. Let's see.
for docid, uid in index._unindex.items():
if uid not in _index_keys:
# Note: I have not seen this.
path = catalog.getpath(docid)
print(
"UID %s is missing from _index keys. docid %s, path %s" %
(uid, docid, path))
missing += 1
if docid not in _index_values:
# Note: this seems the main problem.
path = catalog.getpath(docid)
print(
"Doc id %s is missing from _index values. UID %s, path %s" %
(docid, uid, path)
)
missing += 1
recreate.append(path)
if uid in seen_uids:
# This probably only happens if docid is not in _index_values
# (see previous condition), but let's check and report separately.
print("UID %s is duplicate in the _unindex values:" % uid)
for (key, value) in index._unindex.items():
if value != uid:
continue
path = catalog.getpath(key)
print("- doc id %s path %s" % (key, path))
try:
obj = app.unrestrictedTraverse(path)
except KeyError:
print("Ignoring unreachable path when checking duplicate UID: %s" % path)
continue
try:
intids.getId(obj)
except KeyError:
intids.register(obj)
fixed_intid += 1
print("- Registered intid for object at path %s" % path)
else:
seen_uids.add(uid)
if not (missing or recreate or fixed_intid or uncatalog_paths):
print(
"No UIDs are missing or need to be recreated, and no intids were added, "
"and no paths were uncataloged."
)
continue
if recreate:
print(
"We will recreate %d UIDs/UUIDs that are currently duplicate." % len(recreate)
)
print("You might need to manually fix some links.")
print(
"We have no way of knowing if a link should use resolveuid/old_uid or resolveuid/new_uid."
)
print(
"Perhaps we could query the relation catalog to see which relations an item has."
)
for path in recreate:
try:
obj = app.unrestrictedTraverse(path)
except KeyError:
print("Ignoring unreachable path to recreate UID: %s" % path)
continue
# obj.UID() would return the UID of the parent in case
# obj is a Discussion Item.
old_uuid = IUUID(obj)
# This might find an item by acquisition.
# migration-law/migration-law/migration-law/research.htm
# may actually be migration-law/research.htm
actual_path = "/".join(obj.getPhysicalPath())
if actual_path != path:
print(
"Wanted to recreate UID for path %s, but this leads to other path %s. Ignoring." %
(path, actual_path)
)
continue
# Note: currently this gives zero results,
# because the index is inconsistent for this uid:
# catalog.unrestrictedSearchResults(UID=old_uuid)
# After this fix plus index clear+reindex, it works again.
delattr(obj, ATTRIBUTE_NAME)
# Call the event handler that adds a UUID:
addAttributeUUID(obj, None)
# Reindex the UID index for this object and update its metadata in the catalog.
obj.reindexObject(idxs=["UID"])
new_uuid = IUUID(obj)
print(
"Changed UID from %s to %s for %s" %
(old_uuid, new_uuid, path)
)
# Even after the above fix, the clear and reindex is still needed.
print("Clearing UID index")
index.clear()
print("Reindexing UID index")
catalog._catalog.reindexIndex("UID", site.REQUEST)
if len(index._index) != len(index._unindex):
print(
"ERROR for site %s: after all fixes and reindexing, "
"the UID _index has %d entries "
"and its reverse _unindex has %d" %
(site.id, len(index._index), len(index._unindex))
)
print("ERROR: NOT COMMITTING ANYTHING.")
# sys.exit(1)
continue
# On a hunch, let's rebuild the redirection storage. Only takes a few seconds.
storage = getUtility(IRedirectionStorage)
storage._rebuild()
print("Committing...")
commit("Fixed inconsistencies in UID index for site %s." % site.id)