138
143
_to_escaped_map.clear()
141
class Serializer_v5(Serializer):
142
"""Version 5 serializer
146
class Serializer_v8(XMLSerializer):
147
"""This serialiser adds rich roots.
144
Packs objects into XML and vice versa.
149
Its revision format number matches its inventory number.
149
155
support_altered_by_hack = True
150
156
# This format supports the altered-by hack that reads file ids directly out
151
157
# of the versionedfile, without doing XML parsing.
153
159
supported_kinds = set(['file', 'directory', 'symlink'])
156
def write_inventory_to_string(self, inv):
157
"""Just call write_inventory with a StringIO and return the value"""
161
revision_format_num = None
163
def _check_revisions(self, inv):
164
"""Extension point for subclasses to check during serialisation.
166
:param inv: An inventory about to be serialised, to be checked.
167
:raises: AssertionError if an error has occurred.
169
if inv.revision_id is None:
170
raise AssertionError()
171
if inv.root.revision is None:
172
raise AssertionError()
174
def _check_cache_size(self, inv_size, entry_cache):
175
"""Check that the entry_cache is large enough.
177
We want the cache to be ~2x the size of an inventory. The reason is
178
because we use a FIFO cache, and how Inventory records are likely to
179
change. In general, you have a small number of records which change
180
often, and a lot of records which do not change at all. So when the
181
cache gets full, you actually flush out a lot of the records you are
182
interested in, which means you need to recreate all of those records.
183
An LRU Cache would be better, but the overhead negates the cache
186
One way to look at it, only the size of the cache > len(inv) is your
187
'working' set. And in general, it shouldn't be a problem to hold 2
188
inventories in memory anyway.
190
:param inv_size: The number of entries in an inventory.
192
if entry_cache is None:
194
# 1.5 times might also be reasonable.
195
recommended_min_cache_size = inv_size * 1.5
196
if entry_cache.cache_size() < recommended_min_cache_size:
197
recommended_cache_size = inv_size * 2
198
trace.mutter('Resizing the inventory entry cache from %d to %d',
199
entry_cache.cache_size(), recommended_cache_size)
200
entry_cache.resize(recommended_cache_size)
202
def write_inventory_to_lines(self, inv):
203
"""Return a list of lines with the encoded inventory."""
204
return self.write_inventory(inv, None)
206
def write_inventory_to_string(self, inv, working=False):
207
"""Just call write_inventory with a StringIO and return the value.
209
:param working: If True skip history data - text_sha1, text_size,
210
reference_revision, symlink_target.
158
212
sio = cStringIO.StringIO()
159
self.write_inventory(inv, sio)
213
self.write_inventory(inv, sio, working)
160
214
return sio.getvalue()
162
def write_inventory(self, inv, f):
216
def write_inventory(self, inv, f, working=False):
163
217
"""Write inventory to a file.
165
219
:param inv: the inventory to write.
166
:param f: the file to write.
220
:param f: the file to write. (May be None if the lines are the desired
222
:param working: If True skip history data - text_sha1, text_size,
223
reference_revision, symlink_target.
224
:return: The inventory as a list of lines.
168
226
_ensure_utf8_re()
227
self._check_revisions(inv)
170
229
append = output.append
171
230
self._append_inventory_root(append, inv)
174
233
root_path, root_ie = entries.next()
175
234
for path, ie in entries:
176
self._append_entry(append, ie)
235
if ie.parent_id != self.root_id:
236
parent_str = ' parent_id="'
237
parent_id = _encode_and_escape(ie.parent_id)
241
if ie.kind == 'file':
243
executable = ' executable="yes"'
247
append('<file%s file_id="%s name="%s%s%s revision="%s '
248
'text_sha1="%s" text_size="%d" />\n' % (
249
executable, _encode_and_escape(ie.file_id),
250
_encode_and_escape(ie.name), parent_str, parent_id,
251
_encode_and_escape(ie.revision), ie.text_sha1,
254
append('<file%s file_id="%s name="%s%s%s />\n' % (
255
executable, _encode_and_escape(ie.file_id),
256
_encode_and_escape(ie.name), parent_str, parent_id))
257
elif ie.kind == 'directory':
259
append('<directory file_id="%s name="%s%s%s revision="%s '
261
_encode_and_escape(ie.file_id),
262
_encode_and_escape(ie.name),
263
parent_str, parent_id,
264
_encode_and_escape(ie.revision)))
266
append('<directory file_id="%s name="%s%s%s />\n' % (
267
_encode_and_escape(ie.file_id),
268
_encode_and_escape(ie.name),
269
parent_str, parent_id))
270
elif ie.kind == 'symlink':
272
append('<symlink file_id="%s name="%s%s%s revision="%s '
273
'symlink_target="%s />\n' % (
274
_encode_and_escape(ie.file_id),
275
_encode_and_escape(ie.name),
276
parent_str, parent_id,
277
_encode_and_escape(ie.revision),
278
_encode_and_escape(ie.symlink_target)))
280
append('<symlink file_id="%s name="%s%s%s />\n' % (
281
_encode_and_escape(ie.file_id),
282
_encode_and_escape(ie.name),
283
parent_str, parent_id))
284
elif ie.kind == 'tree-reference':
285
if ie.kind not in self.supported_kinds:
286
raise errors.UnsupportedInventoryKind(ie.kind)
288
append('<tree-reference file_id="%s name="%s%s%s '
289
'revision="%s reference_revision="%s />\n' % (
290
_encode_and_escape(ie.file_id),
291
_encode_and_escape(ie.name),
292
parent_str, parent_id,
293
_encode_and_escape(ie.revision),
294
_encode_and_escape(ie.reference_revision)))
296
append('<tree-reference file_id="%s name="%s%s%s />\n' % (
297
_encode_and_escape(ie.file_id),
298
_encode_and_escape(ie.name),
299
parent_str, parent_id))
301
raise errors.UnsupportedInventoryKind(ie.kind)
177
302
append('</inventory>\n')
179
305
# Just to keep the cache from growing without bounds
180
306
# but we may actually not want to do clear the cache
183
310
def _append_inventory_root(self, append, inv):
184
311
"""Append the inventory root to output."""
186
if inv.root.file_id not in (None, ROOT_ID):
188
append(_encode_and_escape(inv.root.file_id))
189
append(' format="5"')
190
312
if inv.revision_id is not None:
191
append(' revision_id="')
192
append(_encode_and_escape(inv.revision_id))
195
def _append_entry(self, append, ie):
196
"""Convert InventoryEntry to XML element and append to output."""
197
# TODO: should just be a plain assertion
198
if ie.kind not in self.supported_kinds:
199
raise errors.UnsupportedInventoryKind(ie.kind)
204
append(' executable="yes"')
206
append(_encode_and_escape(ie.file_id))
208
append(_encode_and_escape(ie.name))
209
if self._parent_condition(ie):
210
assert isinstance(ie.parent_id, basestring)
211
append(' parent_id="')
212
append(_encode_and_escape(ie.parent_id))
213
if ie.revision is not None:
214
append(' revision="')
215
append(_encode_and_escape(ie.revision))
216
if ie.symlink_target is not None:
217
append(' symlink_target="')
218
append(_encode_and_escape(ie.symlink_target))
219
if ie.text_sha1 is not None:
220
append(' text_sha1="')
223
if ie.text_size is not None:
224
append(' text_size="%d"' % ie.text_size)
225
if getattr(ie, 'reference_revision', None) is not None:
226
append(' reference_revision="')
227
append(_encode_and_escape(ie.reference_revision))
231
def _parent_condition(self, ie):
232
return ie.parent_id != ROOT_ID
313
revid1 = ' revision_id="'
314
revid2 = _encode_and_escape(inv.revision_id)
318
append('<inventory format="%s"%s%s>\n' % (
319
self.format_num, revid1, revid2))
320
append('<directory file_id="%s name="%s revision="%s />\n' % (
321
_encode_and_escape(inv.root.file_id),
322
_encode_and_escape(inv.root.name),
323
_encode_and_escape(inv.root.revision)))
234
325
def _pack_revision(self, rev):
235
326
"""Revision object -> xml tree"""
271
364
def _pack_revision_properties(self, rev, under_element):
272
365
top_elt = SubElement(under_element, 'properties')
273
366
for prop_name, prop_value in sorted(rev.properties.items()):
274
assert isinstance(prop_name, basestring)
275
assert isinstance(prop_value, basestring)
276
367
prop_elt = SubElement(top_elt, 'property')
277
368
prop_elt.set('name', prop_name)
278
369
prop_elt.text = prop_value
279
370
prop_elt.tail = '\n'
280
371
top_elt.tail = '\n'
282
def _unpack_inventory(self, elt):
283
"""Construct from XML Element
285
assert elt.tag == 'inventory'
286
root_id = elt.get('file_id') or ROOT_ID
287
root_id = _get_utf8_or_ascii(root_id)
373
def _unpack_inventory(self, elt, revision_id=None, entry_cache=None):
374
"""Construct from XML Element"""
375
if elt.tag != 'inventory':
376
raise errors.UnexpectedInventoryFormat('Root tag is %r' % elt.tag)
289
377
format = elt.get('format')
290
if format is not None:
292
raise BzrError("invalid format version %r on inventory"
378
if format != self.format_num:
379
raise errors.UnexpectedInventoryFormat('Invalid format version %r'
294
381
revision_id = elt.get('revision_id')
295
382
if revision_id is not None:
296
383
revision_id = cache_utf8.encode(revision_id)
297
inv = Inventory(root_id, revision_id=revision_id)
384
inv = inventory.Inventory(root_id=None, revision_id=revision_id)
299
ie = self._unpack_entry(e)
300
if ie.parent_id is None:
301
ie.parent_id = root_id
386
ie = self._unpack_entry(e, entry_cache=entry_cache)
388
self._check_cache_size(len(inv), entry_cache)
305
def _unpack_entry(self, elt):
391
def _unpack_entry(self, elt, entry_cache=None):
393
file_id = elt_get('file_id')
394
revision = elt_get('revision')
395
# Check and see if we have already unpacked this exact entry
396
# Some timings for "repo.revision_trees(last_100_revs)"
398
# unmodified 4.1s 40.8s
400
# using fifo 2.83s 29.1s
404
# no_copy 2.00s 20.5s
405
# no_c,dict 1.95s 18.0s
406
# Note that a cache of 10k nodes is more than sufficient to hold all of
407
# the inventory for the last 100 revs for bzr, but not for mysql (20k
408
# is enough for mysql, which saves the same 2s as using a dict)
410
# Breakdown of mysql using time.clock()
411
# 4.1s 2 calls to element.get for file_id, revision_id
412
# 4.5s cache_hit lookup
413
# 7.1s InventoryFile.copy()
414
# 2.4s InventoryDirectory.copy()
415
# 0.4s decoding unique entries
416
# 1.6s decoding entries after FIFO fills up
417
# 0.8s Adding nodes to FIFO (including flushes)
418
# 0.1s cache miss lookups
420
# 4.1s 2 calls to element.get for file_id, revision_id
421
# 9.9s cache_hit lookup
422
# 10.8s InventoryEntry.copy()
423
# 0.3s cache miss lookus
424
# 1.2s decoding entries
425
# 1.0s adding nodes to LRU
426
if entry_cache is not None and revision is not None:
427
key = (file_id, revision)
429
# We copy it, because some operations may mutate it
430
cached_ie = entry_cache[key]
434
# Only copying directory entries drops us 2.85s => 2.35s
435
# if cached_ie.kind == 'directory':
436
# return cached_ie.copy()
438
return cached_ie.copy()
307
441
if not InventoryEntry.versionable_kind(kind):
308
442
raise AssertionError('unsupported entry kind %s' % kind)
310
444
get_cached = _get_utf8_or_ascii
312
parent_id = elt.get('parent_id')
446
file_id = get_cached(file_id)
447
if revision is not None:
448
revision = get_cached(revision)
449
parent_id = elt_get('parent_id')
313
450
if parent_id is not None:
314
451
parent_id = get_cached(parent_id)
315
file_id = get_cached(elt.get('file_id'))
317
453
if kind == 'directory':
318
454
ie = inventory.InventoryDirectory(file_id,
321
457
elif kind == 'file':
322
458
ie = inventory.InventoryFile(file_id,
325
ie.text_sha1 = elt.get('text_sha1')
326
if elt.get('executable') == 'yes':
461
ie.text_sha1 = elt_get('text_sha1')
462
if elt_get('executable') == 'yes':
327
463
ie.executable = True
328
v = elt.get('text_size')
464
v = elt_get('text_size')
329
465
ie.text_size = v and int(v)
330
466
elif kind == 'symlink':
331
467
ie = inventory.InventoryLink(file_id,
334
ie.symlink_target = elt.get('symlink_target')
470
ie.symlink_target = elt_get('symlink_target')
336
472
raise errors.UnsupportedInventoryKind(kind)
337
revision = elt.get('revision')
338
if revision is not None:
339
revision = get_cached(revision)
340
473
ie.revision = revision
474
if revision is not None and entry_cache is not None:
475
# We cache a copy() because callers like to mutate objects, and
476
# that would cause the item in cache to mutate as well.
477
# This has a small effect on many-inventory performance, because
478
# the majority fraction is spent in cache hits, not misses.
479
entry_cache[key] = ie.copy()
344
483
def _unpack_revision(self, elt):
345
484
"""XML Element -> Revision object"""
346
assert elt.tag == 'revision'
347
485
format = elt.get('format')
486
format_num = self.format_num
487
if self.revision_format_num is not None:
488
format_num = self.revision_format_num
348
489
if format is not None:
350
raise BzrError("invalid format version %r on inventory"
490
if format != format_num:
491
raise BzrError("invalid format version %r on revision"
352
493
get_cached = _get_utf8_or_ascii
353
494
rev = Revision(committer = elt.get('committer'),