Rev 3913: Add some direct tests for CHKInventory._entry_to_bytes in http://bazaar.launchpad.net/%7Ebzr/bzr/brisbane-core
John Arbash Meinel
john at arbash-meinel.com
Thu Mar 26 20:04:05 GMT 2009
At http://bazaar.launchpad.net/%7Ebzr/bzr/brisbane-core
------------------------------------------------------------
revno: 3913
revision-id: john at arbash-meinel.com-20090326195952-w0qea66iw597ipza
parent: john at arbash-meinel.com-20090326191304-w52buxewrxumpgvo
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: brisbane-core
timestamp: Thu 2009-03-26 14:59:52 -0500
message:
Add some direct tests for CHKInventory._entry_to_bytes
and _bytes_to_entry.
Also, add a new function _bytes_to_utf8name_key. I wanted to just add
_bytes_to_key, but it seems we have code that uses the name field to
check if this is a root key that should not be transmitted.
Anyway, by having this function, item_keys_introduced_by avoids a
bunch of .decode() calls, as well as not building up InventoryEntry
objects.
Also use this when gathering text_refs in GCPacker. Hopefully, we
could turn it on all the time, if it got cheap enough.
And it points us in the right directory for a StreamSource that
sends CHK pages.
-------------- next part --------------
=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py 2009-03-25 12:18:09 +0000
+++ b/bzrlib/inventory.py 2009-03-26 19:59:52 +0000
@@ -1503,6 +1503,15 @@
else:
raise ValueError("unknown kind %r" % entry.kind)
+ @staticmethod
+ def _bytes_to_utf8name_key(bytes):
+ """Get the file_id, revision_id key out of bytes."""
+ # We don't normally care about name, except for times when we want
+ # to filter out empty names because of non rich-root...
+ sections = bytes.split('\n')
+ kind, file_id = sections[0].split(': ')
+ return (sections[2], file_id, sections[3])
+
def _bytes_to_entry(self, bytes):
"""Deserialise a serialised entry."""
sections = bytes.split('\n')
@@ -1521,7 +1530,7 @@
result = InventoryLink(sections[0][9:],
sections[2].decode('utf8'),
sections[1])
- result.symlink_target = sections[4]
+ result.symlink_target = sections[4].decode('utf8')
elif sections[0].startswith("tree: "):
result = TreeReference(sections[0][6:],
sections[2].decode('utf8'),
=== modified file 'bzrlib/repofmt/groupcompress_repo.py'
--- a/bzrlib/repofmt/groupcompress_repo.py 2009-03-24 19:36:34 +0000
+++ b/bzrlib/repofmt/groupcompress_repo.py 2009-03-26 19:59:52 +0000
@@ -242,9 +242,7 @@
remaining_keys = set(keys)
counter = [0]
if self._gather_text_refs:
- # Just to get _bytes_to_entry, so we don't care about the
- # search_key_name
- inv = inventory.CHKInventory(None)
+ bytes_to_info = inventory.CHKInventory._bytes_to_utf8name_key
self._text_refs = set()
def _get_referenced_stream(root_keys, parse_leaf_nodes=False):
cur_keys = root_keys
@@ -271,8 +269,8 @@
# Store is None, because we know we have a LeafNode, and we
# just want its entries
for file_id, bytes in node.iteritems(None):
- entry = inv._bytes_to_entry(bytes)
- self._text_refs.add((entry.file_id, entry.revision))
+ name_utf8, file_id, revision_id = bytes_to_info(bytes)
+ self._text_refs.add((file_id, revision))
def next_stream():
stream = source_vf.get_record_stream(cur_keys,
'as-requested', True)
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2009-03-26 16:35:00 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2009-03-26 19:59:52 +0000
@@ -2468,23 +2468,23 @@
interesting_root_keys.add(inv.id_to_entry.key())
revision_ids = frozenset(revision_ids)
file_id_revisions = {}
+ bytes_to_info = CHKInventory._bytes_to_utf8name_key
for records, items in chk_map.iter_interesting_nodes(self.chk_bytes,
interesting_root_keys, uninteresting_root_keys,
pb=pb):
# This is cheating a bit to use the last grabbed 'inv', but it
# works
for name, bytes in items:
- # TODO: We should use something cheaper than _bytes_to_entry,
- # which has to .decode() the entry name, etc.
- # We only care about a couple of the fields in the bytes.
- entry = inv._bytes_to_entry(bytes)
- if entry.name == '' and not rich_root:
+ (name_utf8, file_id, revision_id) = bytes_to_info(bytes)
+ if not rich_root and name_utf8 == '':
continue
- if entry.revision in revision_ids:
+ if revision_id in revision_ids:
# Would we rather build this up into file_id => revision
# maps?
- s = file_id_revisions.setdefault(entry.file_id, set())
- s.add(entry.revision)
+ try:
+ file_id_revisions[file_id].add(revision_id)
+ except KeyError:
+ file_id_revisions[file_id] = set([revision_id])
for file_id, revisions in file_id_revisions.iteritems():
yield ('file', file_id, revisions)
=== modified file 'bzrlib/tests/test_inv.py'
--- a/bzrlib/tests/test_inv.py 2009-03-24 19:19:26 +0000
+++ b/bzrlib/tests/test_inv.py 2009-03-26 19:59:52 +0000
@@ -531,3 +531,108 @@
self.assertEqual(
{('', ''): 'TREE_ROOT', ('TREE_ROOT', 'file'): 'fileid'},
dict(chk_inv.parent_id_basename_to_file_id.iteritems()))
+
+ def test_file_entry_to_bytes(self):
+ inv = CHKInventory(None)
+ ie = inventory.InventoryFile('file-id', 'filename', 'parent-id')
+ ie.executable = True
+ ie.revision = 'file-rev-id'
+ ie.text_sha1 = 'abcdefgh'
+ ie.text_size = 100
+ bytes = inv._entry_to_bytes(ie)
+ self.assertEqual('file: file-id\nparent-id\nfilename\n'
+ 'file-rev-id\nabcdefgh\n100\nY', bytes)
+ ie2 = inv._bytes_to_entry(bytes)
+ self.assertEqual(ie, ie2)
+ self.assertIsInstance(ie2.name, unicode)
+ self.assertEqual(('filename', 'file-id', 'file-rev-id'),
+ inv._bytes_to_utf8name_key(bytes))
+
+ def test_file2_entry_to_bytes(self):
+ inv = CHKInventory(None)
+ # \u30a9 == 'omega'
+ ie = inventory.InventoryFile('file-id', u'\u03a9name', 'parent-id')
+ ie.executable = False
+ ie.revision = 'file-rev-id'
+ ie.text_sha1 = '123456'
+ ie.text_size = 25
+ bytes = inv._entry_to_bytes(ie)
+ self.assertEqual('file: file-id\nparent-id\n\xce\xa9name\n'
+ 'file-rev-id\n123456\n25\nN', bytes)
+ ie2 = inv._bytes_to_entry(bytes)
+ self.assertEqual(ie, ie2)
+ self.assertIsInstance(ie2.name, unicode)
+ self.assertEqual(('\xce\xa9name', 'file-id', 'file-rev-id'),
+ inv._bytes_to_utf8name_key(bytes))
+
+ def test_dir_entry_to_bytes(self):
+ inv = CHKInventory(None)
+ ie = inventory.InventoryDirectory('dir-id', 'dirname', 'parent-id')
+ ie.revision = 'dir-rev-id'
+ bytes = inv._entry_to_bytes(ie)
+ self.assertEqual('dir: dir-id\nparent-id\ndirname\ndir-rev-id', bytes)
+ ie2 = inv._bytes_to_entry(bytes)
+ self.assertEqual(ie, ie2)
+ self.assertIsInstance(ie2.name, unicode)
+ self.assertEqual(('dirname', 'dir-id', 'dir-rev-id'),
+ inv._bytes_to_utf8name_key(bytes))
+
+ def test_dir2_entry_to_bytes(self):
+ inv = CHKInventory(None)
+ ie = inventory.InventoryDirectory('dir-id', u'dir\u03a9name',
+ None)
+ ie.revision = 'dir-rev-id'
+ bytes = inv._entry_to_bytes(ie)
+ self.assertEqual('dir: dir-id\n\ndir\xce\xa9name\n'
+ 'dir-rev-id', bytes)
+ ie2 = inv._bytes_to_entry(bytes)
+ self.assertEqual(ie, ie2)
+ self.assertIsInstance(ie2.name, unicode)
+ self.assertIs(ie2.parent_id, None)
+ self.assertEqual(('dir\xce\xa9name', 'dir-id', 'dir-rev-id'),
+ inv._bytes_to_utf8name_key(bytes))
+
+ def test_symlink_entry_to_bytes(self):
+ inv = CHKInventory(None)
+ ie = inventory.InventoryLink('link-id', 'linkname', 'parent-id')
+ ie.revision = 'link-rev-id'
+ ie.symlink_target = u'target/path'
+ bytes = inv._entry_to_bytes(ie)
+ self.assertEqual('symlink: link-id\nparent-id\nlinkname\n'
+ 'link-rev-id\ntarget/path', bytes)
+ ie2 = inv._bytes_to_entry(bytes)
+ self.assertEqual(ie, ie2)
+ self.assertIsInstance(ie2.name, unicode)
+ self.assertIsInstance(ie2.symlink_target, unicode)
+ self.assertEqual(('linkname', 'link-id', 'link-rev-id'),
+ inv._bytes_to_utf8name_key(bytes))
+
+ def test_symlink2_entry_to_bytes(self):
+ inv = CHKInventory(None)
+ ie = inventory.InventoryLink('link-id', u'link\u03a9name', 'parent-id')
+ ie.revision = 'link-rev-id'
+ ie.symlink_target = u'target/\u03a9path'
+ bytes = inv._entry_to_bytes(ie)
+ self.assertEqual('symlink: link-id\nparent-id\nlink\xce\xa9name\n'
+ 'link-rev-id\ntarget/\xce\xa9path', bytes)
+ ie2 = inv._bytes_to_entry(bytes)
+ self.assertEqual(ie, ie2)
+ self.assertIsInstance(ie2.name, unicode)
+ self.assertIsInstance(ie2.symlink_target, unicode)
+ self.assertEqual(('link\xce\xa9name', 'link-id', 'link-rev-id'),
+ inv._bytes_to_utf8name_key(bytes))
+
+ def test_tree_reference_entry_to_bytes(self):
+ inv = CHKInventory(None)
+ ie = inventory.TreeReference('tree-root-id', u'tree\u03a9name',
+ 'parent-id')
+ ie.revision = 'tree-rev-id'
+ ie.reference_revision = 'ref-rev-id'
+ bytes = inv._entry_to_bytes(ie)
+ self.assertEqual('tree: tree-root-id\nparent-id\ntree\xce\xa9name\n'
+ 'tree-rev-id\nref-rev-id', bytes)
+ ie2 = inv._bytes_to_entry(bytes)
+ self.assertEqual(ie, ie2)
+ self.assertIsInstance(ie2.name, unicode)
+ self.assertEqual(('tree\xce\xa9name', 'tree-root-id', 'tree-rev-id'),
+ inv._bytes_to_utf8name_key(bytes))
More information about the bazaar-commits
mailing list