Rev 3913: Add some direct tests for CHKInventory._entry_to_bytes in http://bazaar.launchpad.net/%7Ebzr/bzr/brisbane-core

John Arbash Meinel john at arbash-meinel.com
Thu Mar 26 20:04:05 GMT 2009


At http://bazaar.launchpad.net/%7Ebzr/bzr/brisbane-core

------------------------------------------------------------
revno: 3913
revision-id: john at arbash-meinel.com-20090326195952-w0qea66iw597ipza
parent: john at arbash-meinel.com-20090326191304-w52buxewrxumpgvo
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: brisbane-core
timestamp: Thu 2009-03-26 14:59:52 -0500
message:
  Add some direct tests for CHKInventory._entry_to_bytes
  and _bytes_to_entry.
  Also, add a new function _bytes_to_utf8name_key. I wanted to just add
  _bytes_to_key, but it seems we have code that uses the name field to
  check if this is a root key that should not be transmitted.
  Anyway, by having this function, item_keys_introduced_by avoids a
  bunch of .decode() calls, as well as not building up InventoryEntry
  objects.
  Also use this when gathering text_refs in GCPacker. Hopefully, we
  could turn it on all the time, if it got cheap enough.
  And it points us in the right directory for a StreamSource that
  sends CHK pages.
-------------- next part --------------
=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py	2009-03-25 12:18:09 +0000
+++ b/bzrlib/inventory.py	2009-03-26 19:59:52 +0000
@@ -1503,6 +1503,15 @@
         else:
             raise ValueError("unknown kind %r" % entry.kind)
 
+    @staticmethod
+    def _bytes_to_utf8name_key(bytes):
+        """Get the file_id, revision_id key out of bytes."""
+        # We don't normally care about name, except for times when we want
+        # to filter out empty names because of non rich-root...
+        sections = bytes.split('\n')
+        kind, file_id = sections[0].split(': ')
+        return (sections[2], file_id, sections[3])
+
     def _bytes_to_entry(self, bytes):
         """Deserialise a serialised entry."""
         sections = bytes.split('\n')
@@ -1521,7 +1530,7 @@
             result = InventoryLink(sections[0][9:],
                 sections[2].decode('utf8'),
                 sections[1])
-            result.symlink_target = sections[4]
+            result.symlink_target = sections[4].decode('utf8')
         elif sections[0].startswith("tree: "):
             result = TreeReference(sections[0][6:],
                 sections[2].decode('utf8'),

=== modified file 'bzrlib/repofmt/groupcompress_repo.py'
--- a/bzrlib/repofmt/groupcompress_repo.py	2009-03-24 19:36:34 +0000
+++ b/bzrlib/repofmt/groupcompress_repo.py	2009-03-26 19:59:52 +0000
@@ -242,9 +242,7 @@
         remaining_keys = set(keys)
         counter = [0]
         if self._gather_text_refs:
-            # Just to get _bytes_to_entry, so we don't care about the
-            # search_key_name
-            inv = inventory.CHKInventory(None)
+            bytes_to_info = inventory.CHKInventory._bytes_to_utf8name_key
             self._text_refs = set()
         def _get_referenced_stream(root_keys, parse_leaf_nodes=False):
             cur_keys = root_keys
@@ -271,8 +269,8 @@
                     # Store is None, because we know we have a LeafNode, and we
                     # just want its entries
                     for file_id, bytes in node.iteritems(None):
-                        entry = inv._bytes_to_entry(bytes)
-                        self._text_refs.add((entry.file_id, entry.revision))
+                        name_utf8, file_id, revision_id = bytes_to_info(bytes)
+                        self._text_refs.add((file_id, revision))
                 def next_stream():
                     stream = source_vf.get_record_stream(cur_keys,
                                                          'as-requested', True)

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2009-03-26 16:35:00 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2009-03-26 19:59:52 +0000
@@ -2468,23 +2468,23 @@
             interesting_root_keys.add(inv.id_to_entry.key())
         revision_ids = frozenset(revision_ids)
         file_id_revisions = {}
+        bytes_to_info = CHKInventory._bytes_to_utf8name_key
         for records, items in chk_map.iter_interesting_nodes(self.chk_bytes,
                     interesting_root_keys, uninteresting_root_keys,
                     pb=pb):
             # This is cheating a bit to use the last grabbed 'inv', but it
             # works
             for name, bytes in items:
-                # TODO: We should use something cheaper than _bytes_to_entry,
-                #       which has to .decode() the entry name, etc.
-                #       We only care about a couple of the fields in the bytes.
-                entry = inv._bytes_to_entry(bytes)
-                if entry.name == '' and not rich_root:
+                (name_utf8, file_id, revision_id) = bytes_to_info(bytes)
+                if not rich_root and name_utf8 == '':
                     continue
-                if entry.revision in revision_ids:
+                if revision_id in revision_ids:
                     # Would we rather build this up into file_id => revision
                     # maps?
-                    s = file_id_revisions.setdefault(entry.file_id, set())
-                    s.add(entry.revision)
+                    try:
+                        file_id_revisions[file_id].add(revision_id)
+                    except KeyError:
+                        file_id_revisions[file_id] = set([revision_id])
         for file_id, revisions in file_id_revisions.iteritems():
             yield ('file', file_id, revisions)
 

=== modified file 'bzrlib/tests/test_inv.py'
--- a/bzrlib/tests/test_inv.py	2009-03-24 19:19:26 +0000
+++ b/bzrlib/tests/test_inv.py	2009-03-26 19:59:52 +0000
@@ -531,3 +531,108 @@
         self.assertEqual(
             {('', ''): 'TREE_ROOT', ('TREE_ROOT', 'file'): 'fileid'},
             dict(chk_inv.parent_id_basename_to_file_id.iteritems()))
+
+    def test_file_entry_to_bytes(self):
+        inv = CHKInventory(None)
+        ie = inventory.InventoryFile('file-id', 'filename', 'parent-id')
+        ie.executable = True
+        ie.revision = 'file-rev-id'
+        ie.text_sha1 = 'abcdefgh'
+        ie.text_size = 100
+        bytes = inv._entry_to_bytes(ie)
+        self.assertEqual('file: file-id\nparent-id\nfilename\n'
+                         'file-rev-id\nabcdefgh\n100\nY', bytes)
+        ie2 = inv._bytes_to_entry(bytes)
+        self.assertEqual(ie, ie2)
+        self.assertIsInstance(ie2.name, unicode)
+        self.assertEqual(('filename', 'file-id', 'file-rev-id'),
+                         inv._bytes_to_utf8name_key(bytes))
+
+    def test_file2_entry_to_bytes(self):
+        inv = CHKInventory(None)
+        # \u30a9 == 'omega'
+        ie = inventory.InventoryFile('file-id', u'\u03a9name', 'parent-id')
+        ie.executable = False
+        ie.revision = 'file-rev-id'
+        ie.text_sha1 = '123456'
+        ie.text_size = 25
+        bytes = inv._entry_to_bytes(ie)
+        self.assertEqual('file: file-id\nparent-id\n\xce\xa9name\n'
+                         'file-rev-id\n123456\n25\nN', bytes)
+        ie2 = inv._bytes_to_entry(bytes)
+        self.assertEqual(ie, ie2)
+        self.assertIsInstance(ie2.name, unicode)
+        self.assertEqual(('\xce\xa9name', 'file-id', 'file-rev-id'),
+                         inv._bytes_to_utf8name_key(bytes))
+
+    def test_dir_entry_to_bytes(self):
+        inv = CHKInventory(None)
+        ie = inventory.InventoryDirectory('dir-id', 'dirname', 'parent-id')
+        ie.revision = 'dir-rev-id'
+        bytes = inv._entry_to_bytes(ie)
+        self.assertEqual('dir: dir-id\nparent-id\ndirname\ndir-rev-id', bytes)
+        ie2 = inv._bytes_to_entry(bytes)
+        self.assertEqual(ie, ie2)
+        self.assertIsInstance(ie2.name, unicode)
+        self.assertEqual(('dirname', 'dir-id', 'dir-rev-id'),
+                         inv._bytes_to_utf8name_key(bytes))
+
+    def test_dir2_entry_to_bytes(self):
+        inv = CHKInventory(None)
+        ie = inventory.InventoryDirectory('dir-id', u'dir\u03a9name',
+                                          None)
+        ie.revision = 'dir-rev-id'
+        bytes = inv._entry_to_bytes(ie)
+        self.assertEqual('dir: dir-id\n\ndir\xce\xa9name\n'
+                         'dir-rev-id', bytes)
+        ie2 = inv._bytes_to_entry(bytes)
+        self.assertEqual(ie, ie2)
+        self.assertIsInstance(ie2.name, unicode)
+        self.assertIs(ie2.parent_id, None)
+        self.assertEqual(('dir\xce\xa9name', 'dir-id', 'dir-rev-id'),
+                         inv._bytes_to_utf8name_key(bytes))
+
+    def test_symlink_entry_to_bytes(self):
+        inv = CHKInventory(None)
+        ie = inventory.InventoryLink('link-id', 'linkname', 'parent-id')
+        ie.revision = 'link-rev-id'
+        ie.symlink_target = u'target/path'
+        bytes = inv._entry_to_bytes(ie)
+        self.assertEqual('symlink: link-id\nparent-id\nlinkname\n'
+                         'link-rev-id\ntarget/path', bytes)
+        ie2 = inv._bytes_to_entry(bytes)
+        self.assertEqual(ie, ie2)
+        self.assertIsInstance(ie2.name, unicode)
+        self.assertIsInstance(ie2.symlink_target, unicode)
+        self.assertEqual(('linkname', 'link-id', 'link-rev-id'),
+                         inv._bytes_to_utf8name_key(bytes))
+
+    def test_symlink2_entry_to_bytes(self):
+        inv = CHKInventory(None)
+        ie = inventory.InventoryLink('link-id', u'link\u03a9name', 'parent-id')
+        ie.revision = 'link-rev-id'
+        ie.symlink_target = u'target/\u03a9path'
+        bytes = inv._entry_to_bytes(ie)
+        self.assertEqual('symlink: link-id\nparent-id\nlink\xce\xa9name\n'
+                         'link-rev-id\ntarget/\xce\xa9path', bytes)
+        ie2 = inv._bytes_to_entry(bytes)
+        self.assertEqual(ie, ie2)
+        self.assertIsInstance(ie2.name, unicode)
+        self.assertIsInstance(ie2.symlink_target, unicode)
+        self.assertEqual(('link\xce\xa9name', 'link-id', 'link-rev-id'),
+                         inv._bytes_to_utf8name_key(bytes))
+
+    def test_tree_reference_entry_to_bytes(self):
+        inv = CHKInventory(None)
+        ie = inventory.TreeReference('tree-root-id', u'tree\u03a9name',
+                                     'parent-id')
+        ie.revision = 'tree-rev-id'
+        ie.reference_revision = 'ref-rev-id'
+        bytes = inv._entry_to_bytes(ie)
+        self.assertEqual('tree: tree-root-id\nparent-id\ntree\xce\xa9name\n'
+                         'tree-rev-id\nref-rev-id', bytes)
+        ie2 = inv._bytes_to_entry(bytes)
+        self.assertEqual(ie, ie2)
+        self.assertIsInstance(ie2.name, unicode)
+        self.assertEqual(('tree\xce\xa9name', 'tree-root-id', 'tree-rev-id'),
+                         inv._bytes_to_utf8name_key(bytes))



More information about the bazaar-commits mailing list