Rev 3173: JournalEntry -> inventory conversion. in

Robert Collins robertc at
Fri Jan 4 02:10:07 GMT 2008


revno: 3173
revision-id:robertc at
parent: robertc at
committer: Robert Collins <robertc at>
branch nick: inventory.journalled
timestamp: Fri 2008-01-04 13:10:02 +1100
  JournalEntry -> inventory conversion.
  bzrlib/ journalled_inventory-20080103020931-0ht5n40kwc0p7fy1-1
  bzrlib/tests/ test_journalled_inv.-20080103012121-ny2w9slze5jgty8i-1
  doc/developers/inventory.txt   inventory.txt-20080103013957-opkrhxy6lmywmx4i-1
=== modified file 'bzrlib/'
--- a/bzrlib/	2007-12-18 18:25:12 +0000
+++ b/bzrlib/	2008-01-04 02:10:02 +0000
@@ -1417,6 +1417,25 @@
     return factory(file_id, name, parent_id)
+def make_inv_delta(old, new):
+    """Make an inventory delta from two inventories."""
+    old_ids = set(old._byid.iterkeys())
+    new_ids = set(new._byid.iterkeys())
+    adds = new_ids - old_ids
+    deletes = old_ids - new_ids
+    common = old_ids.intersection(new_ids)
+    delta = []
+    for file_id in deletes:
+        delta.append((old.id2path(file_id), None, file_id, None))
+    for file_id in adds:
+        delta.append((None, new.id2path(file_id), file_id, new[file_id]))
+    for file_id in common:
+        if old[file_id] != new[file_id]:
+            delta.append((old.id2path(file_id), new.id2path(file_id),
+                file_id, new[file_id]))
+    return delta
 def ensure_normalized_name(name):
     """Normalize name.

=== modified file 'bzrlib/'
--- a/bzrlib/	2008-01-04 00:01:31 +0000
+++ b/bzrlib/	2008-01-04 02:10:02 +0000
@@ -26,7 +26,10 @@
 __all__ = ['EntryAccess', 'InventoryJournal']
 from bzrlib import errors
+from bzrlib.osutils import basename
+from bzrlib import inventory
 from bzrlib.revision import NULL_REVISION
+from bzrlib.tsort import topo_sort
 def _directory_content(entry):
@@ -56,7 +59,7 @@
     target = entry.symlink_target
     if target is None:
         raise errors.BzrError('Missing target for %s' % entry.file_id)
-    return "link\x00%s\x00" % target
+    return "link\x00%s\x00" % target.encode('utf8')
 def _reference_content(entry):
@@ -70,6 +73,42 @@
     return "tree\x00%s\x00" % tree_revision
+def _dir_to_entry(content, name, parent_id, file_id, last_modified,
+    _type=inventory.InventoryDirectory):
+    """Convert a dir content record to an InventoryDirectory."""
+    result = _type(file_id, name, parent_id)
+    result.revision = last_modified
+    return result
+def _file_to_entry(content, name, parent_id, file_id, last_modified,
+    _type=inventory.InventoryFile):
+    """Convert a dir content record to an InventoryFile."""
+    result = _type(file_id, name, parent_id)
+    result.revision = last_modified
+    result.text_size = int(content[1])
+    result.text_sha1 = content[2]
+    return result
+def _link_to_entry(content, name, parent_id, file_id, last_modified,
+    _type=inventory.InventoryLink):
+    """Convert a link content record to an InventoryLink."""
+    result = _type(file_id, name, parent_id)
+    result.revision = last_modified
+    result.symlink_target = content[1].decode('utf8')
+    return result
+def _tree_to_entry(content, name, parent_id, file_id, last_modified,
+    _type=inventory.TreeReference):
+    """Convert a tree content record to a TreeReference."""
+    result = _type(file_id, name, parent_id)
+    result.revision = last_modified
+    result.reference_revision = content[1]
+    return result
 class _JournalEntry(object):
     """An individual entry in a journalled inventory."""
@@ -110,6 +149,61 @@
                 self.by_id[line[0]] = line[1]
         self.parent_revision = parent_entry.parent_revision
+    def to_inventory(self):
+        """Convert this JournalEntry to an inventory object.
+        :raises BzrError: If the parent is not NULL_REVISION then not enough
+            information will be present.
+        :return: An Inventory object.
+        """
+        if self.parent_revision != NULL_REVISION:
+            raise errors.BzrError("Journal not completely replayed.")
+        # build a topo order on parent ids.
+        parent_graph = [(None, [])]
+        for utf8_path, file_id, parent_id, last_modified, content in \
+            self.by_id.itervalues():
+            if content[0] == 'tree' and not self.tree_references:
+                raise errors.BzrError("tree reference (id %s) present in "
+                    "non-tree-reference supporting journal." % file_id)
+            if utf8_path == 'None':
+                # deleted id, nothing to see here.
+                continue
+            if utf8_path == '/' and not self.versioned_root:
+                # This is a double check as its done during parsing, but it
+                # seems worthwhile.
+                if file_id != 'TREE_ROOT':
+                    raise errors.BzrError(
+                        'file_id %s is not TREE_ROOT for /' % file_id)
+                if last_modified is not None:
+                    raise errors.BzrError(
+                        'Version present for / in %s' % file_id)
+                last_modified = None
+            else:
+                if last_modified[-1] == ':':
+                    raise errors.BzrError(
+                        'special revision id in journal (id %s)' % file_id)
+            parent_graph.append((file_id, [parent_id]))
+        order = topo_sort(parent_graph)
+        if order[0] != None:
+            raise errors.BzrError(
+                'inconsistent file id parent links (%r)' % order[0])
+        result = inventory.Inventory(None)
+        entry_factory = {
+            'dir': _dir_to_entry,
+            'file': _file_to_entry,
+            'link': _link_to_entry,
+            'tree': _tree_to_entry,
+        }
+        for file_id in order[1:]:
+            utf8_path, _1, parent_id, last_modified, content = \
+                self.by_id[file_id]
+            kind = content[0]
+            path = utf8_path[1:].decode('utf8')
+            name = basename(path)
+            result.add(entry_factory[content[0]](
+                    content, name, parent_id, file_id, last_modified))
+        return result
 class EntryAccess(object):
     """Provide access to named bytesequences of the journal entries."""
@@ -152,6 +246,9 @@
         to_line = self._delta_item_to_line
         for delta_item in delta_to_new:
+            if lines[-1].__class__ != str:
+                raise errors.BzrError(
+                    'to_line generated non-str output %r' % lines[-1])
         lines[0] = "format: %s\n" % InventoryJournal.FORMAT_1
         lines[1] = "parent: %s\n" % old_inventory_name
@@ -166,7 +263,7 @@
             newpath_utf8 = 'None'
             parent_id = ''
             last_modified = NULL_REVISION
-            content = 'deleted'
+            content = 'deleted\x00\x00'
             # TODO: Test real-world utf8 cache hit rate. It may be a win.
             newpath_utf8 = '/' + newpath.encode('utf8')
@@ -212,12 +309,11 @@
             parent_id = parent_id or None
             by_id[file_id] = (newpath_utf8, file_id, parent_id,
                 last_modified, tuple(content.split('\x00')))
-            if newpath_utf8 == '/':
-                if self._versioned_root and last_modified == 'null:':
-                    raise errors.BzrError('unversioned root found: %r' % line)
-                elif not self._versioned_root and (last_modified != 'null:' or 
-                    file_id != 'TREE_ROOT'):
+            if newpath_utf8 == '/' and not self._versioned_root and (
+                last_modified != 'null:' or file_id != 'TREE_ROOT'):
                     raise errors.BzrError("Versioned root found: %r" % line)
+            elif last_modified[-1] == ':':
+                    raise errors.BzrError('special revisionid found: %r' % line)
             if not self._tree_references and content.startswith('tree\x00'):
                 raise errors.BzrError("Tree reference found: %r" % line)
         if len(by_id) + 3 != len(lines):

=== modified file 'bzrlib/tests/'
--- a/bzrlib/tests/	2008-01-04 00:01:31 +0000
+++ b/bzrlib/tests/	2008-01-04 02:10:02 +0000
@@ -39,7 +39,7 @@
 root_only_lines = """format: bzr journalled inventory v1 (bzr 1.1)
 parent: null:
 version: entry-version
-/\x00an-id\x00\x00a at e\\x00dir\x00\x00
+/\x00an-id\x00\x00a at e\xc3\\x00dir\x00\x00
 root_change_lines = """format: bzr journalled inventory v1 (bzr 1.1)
@@ -57,7 +57,7 @@
 reference_lines = """format: bzr journalled inventory v1 (bzr 1.1)
 parent: null:
 version: entry-version
-/\x00TREE_ROOT\x00\x00a at e\\x00dir\x00\x00
+/\x00TREE_ROOT\x00\x00a at e\xc3\\x00dir\x00\x00
@@ -71,28 +71,10 @@
 class TestSerializer(TestCase):
     """Test journalled inventory serialisation."""
-    def make_inv_delta(self, old, new):
-        """Make an inventory delta from two inventories."""
-        old_ids = set(old._byid.iterkeys())
-        new_ids = set(new._byid.iterkeys())
-        adds = new_ids - old_ids
-        deletes = old_ids - new_ids
-        common = old_ids.intersection(new_ids)
-        delta = []
-        for file_id in deletes:
-            delta.append((old.id2path(file_id), None, file_id, None))
-        for file_id in adds:
-            delta.append((None, new.id2path(file_id), file_id, new[file_id]))
-        for file_id in common:
-            if old[file_id] != new[file_id]:
-                delta.append((old.id2path(file_id), new.id2path(file_id),
-                    file_id, new[file_id]))
-        return delta
     def test_empty_delta_to_lines(self):
         old_inv = Inventory(None)
         new_inv = Inventory(None)
-        delta = self.make_inv_delta(old_inv, new_inv)
+        delta = inventory.make_inv_delta(old_inv, new_inv)
         journal = journalled_inventory.InventoryJournal(versioned_root=True,
@@ -102,9 +84,9 @@
         old_inv = Inventory(None)
         new_inv = Inventory(None)
         root = new_inv.make_entry('directory', '', None, 'an-id')
-        root.revision = 'a at e\'
+        root.revision = 'a at e\xc3\'
-        delta = self.make_inv_delta(old_inv, new_inv)
+        delta = inventory.make_inv_delta(old_inv, new_inv)
         journal = journalled_inventory.InventoryJournal(versioned_root=True,
@@ -115,7 +97,7 @@
         new_inv = Inventory(None)
         root = new_inv.make_entry('directory', '', None, 'TREE_ROOT')
-        delta = self.make_inv_delta(old_inv, new_inv)
+        delta = inventory.make_inv_delta(old_inv, new_inv)
         journal = journalled_inventory.InventoryJournal(versioned_root=False,
@@ -125,11 +107,11 @@
         old_inv = Inventory(None)
         new_inv = Inventory(None)
         root = new_inv.make_entry('directory', '', None, 'TREE_ROOT')
-        root.revision = 'a at e\'
+        root.revision = 'a at e\xc3\'
         non_root = new_inv.make_entry('directory', 'foo', root.file_id, 'id')
-        delta = self.make_inv_delta(old_inv, new_inv)
+        delta = inventory.make_inv_delta(old_inv, new_inv)
         journal = journalled_inventory.InventoryJournal(versioned_root=True,
@@ -140,7 +122,7 @@
         new_inv = Inventory(None)
         root = new_inv.make_entry('directory', '', None, 'TREE_ROOT')
-        delta = self.make_inv_delta(old_inv, new_inv)
+        delta = inventory.make_inv_delta(old_inv, new_inv)
         journal = journalled_inventory.InventoryJournal(versioned_root=True,
@@ -150,9 +132,9 @@
         old_inv = Inventory(None)
         new_inv = Inventory(None)
         root = new_inv.make_entry('directory', '', None, 'TREE_ROOT')
-        root.revision = 'a at e\'
+        root.revision = 'a at e\xc3\'
-        delta = self.make_inv_delta(old_inv, new_inv)
+        delta = inventory.make_inv_delta(old_inv, new_inv)
         journal = journalled_inventory.InventoryJournal(versioned_root=False,
@@ -163,7 +145,7 @@
         new_inv = Inventory(None)
         root = new_inv.make_entry('directory', '', None, 'my-rich-root-id')
-        delta = self.make_inv_delta(old_inv, new_inv)
+        delta = inventory.make_inv_delta(old_inv, new_inv)
         journal = journalled_inventory.InventoryJournal(versioned_root=False,
@@ -179,7 +161,7 @@
         non_root.revision = 'changed'
         non_root.kind = 'strangelove'
-        delta = self.make_inv_delta(old_inv, new_inv)
+        delta = inventory.make_inv_delta(old_inv, new_inv)
         journal = journalled_inventory.InventoryJournal(versioned_root=True,
         # we expect keyerror because there is little value wrapping this.
@@ -191,14 +173,14 @@
         old_inv = Inventory(None)
         new_inv = Inventory(None)
         root = new_inv.make_entry('directory', '', None, 'TREE_ROOT')
-        root.revision = 'a at e\'
+        root.revision = 'a at e\xc3\'
         non_root = new_inv.make_entry(
             'tree-reference', 'foo', root.file_id, 'id')
         non_root.revision = 'changed'
         non_root.reference_revision = 'subtree-version'
-        delta = self.make_inv_delta(old_inv, new_inv)
+        delta = inventory.make_inv_delta(old_inv, new_inv)
         journal = journalled_inventory.InventoryJournal(versioned_root=True,
         # we expect keyerror because there is little value wrapping this.
@@ -210,14 +192,14 @@
         old_inv = Inventory(None)
         new_inv = Inventory(None)
         root = new_inv.make_entry('directory', '', None, 'TREE_ROOT')
-        root.revision = 'a at e\'
+        root.revision = 'a at e\xc3\'
         non_root = new_inv.make_entry(
             'tree-reference', 'foo', root.file_id, 'id')
         non_root.revision = 'changed'
         non_root.reference_revision = 'subtree-version'
-        delta = self.make_inv_delta(old_inv, new_inv)
+        delta = inventory.make_inv_delta(old_inv, new_inv)
         journal = journalled_inventory.InventoryJournal(versioned_root=True,
@@ -271,8 +253,8 @@
 """format: bzr journalled inventory v1 (bzr 1.1)
 parent: null:
 version: null:
-/\x00an-id\x00\x00a at e\\x00dir\x00\x00
-/\x00an-id\x00\x00a at e\\x00dir\x00\x00
+/\x00an-id\x00\x00a at e\xc3\\x00dir\x00\x00
+/\x00an-id\x00\x00a at e\xc3\\x00dir\x00\x00
             journal.parse_text_bytes, double_root_lines)
@@ -282,20 +264,31 @@
         journal_entry = journal.parse_text_bytes(root_only_lines)
-            'an-id':('/', 'an-id', None, 'a at e\',
+            'an-id':('/', 'an-id', None, 'a at e\xc3\',
                      ('dir', '', '')),
         self.assertEqual(NULL_REVISION, journal_entry.parent_revision)
         self.assertEqual('entry-version', journal_entry.version)
+    def test_parse_special_revid_not_valid_last_mod(self):
+        journal = journalled_inventory.InventoryJournal(versioned_root=False,
+            tree_references=True)
+        root_only_lines = """format: bzr journalled inventory v1 (bzr 1.1)
+parent: null:
+version: null:
+        self.assertRaises(errors.BzrError,
+            journal.parse_text_bytes, root_only_lines)
     def test_parse_versioned_root_versioned_disabled(self):
         journal = journalled_inventory.InventoryJournal(versioned_root=False,
         root_only_lines = """format: bzr journalled inventory v1 (bzr 1.1)
 parent: null:
 version: null:
-/\x00TREE_ROOT\x00\x00a at e\\x00dir\x00\x00
+/\x00TREE_ROOT\x00\x00a at e\xc3\\x00dir\x00\x00
             journal.parse_text_bytes, root_only_lines)
@@ -335,7 +328,6 @@
         a_root = journal.parse_text_bytes(root_only_lines)
         null = journal.parse_text_bytes(empty_lines)
         root = journal.parse_text_bytes(root_only_lines)
-        print root.parent_revision
         self.assertJournalEntriesEqual(a_root, root)
@@ -364,12 +356,109 @@
         change_tree = journal.parse_text_bytes(change_tree_lines)
-            'TREE_ROOT': ('/', 'TREE_ROOT', None, 'a at e\',
+            'TREE_ROOT': ('/', 'TREE_ROOT', None, 'a at e\xc3\',
                       ('dir', '', '')),
             'id': ('/foo', 'id', 'TREE_ROOT', 'changed-twice',
                    ('tree', 'subtree-version2', '')),
             }, change_tree.by_id)
+    def test_to_inventory_non_null_parent(self):
+        journal_entry = journalled_inventory._JournalEntry(
+            'something', 'a-change', {}, True, True)
+        self.assertRaises(errors.BzrError, journal_entry.to_inventory)
+    def test_to_inventory_root_id_versioned_not_permitted(self):
+        entries = {
+            'TREE_ROOT': ('/', 'TREE_ROOT', None, 'a-version-id',
+                ('dir', '', '')),
+        }
+        journal_entry = journalled_inventory._JournalEntry(
+            'something', NULL_REVISION, entries, False, True)
+        self.assertRaises(errors.BzrError, journal_entry.to_inventory)
+    def test_to_inventory_root_id_unique_not_permitted(self):
+        entries = {
+            'TREE_ROOT': ('/', 'TREE_ROOT', None, 'a-version-id',
+                ('dir', '', '')),
+        }
+        journal_entry = journalled_inventory._JournalEntry(
+            'something', NULL_REVISION, entries, False, True)
+        self.assertRaises(errors.BzrError, journal_entry.to_inventory)
+    def test_to_inventory_root_id_not_versioned(self):
+        entries = {
+            'an-id': ('/', 'an-id', None, 'null:',
+                ('dir', '', '')),
+        }
+        journal_entry = journalled_inventory._JournalEntry(
+            'something', NULL_REVISION, entries, True, True)
+        self.assertRaises(errors.BzrError, journal_entry.to_inventory)
+    def test_to_inventory_has_tree_not_meant_to(self):
+        entries = {
+            'an-id': ('/', 'an-id', None, 'changed-in',
+                ('dir', '', '')),
+            'a-ref': ('/foo', 'ref-id', 'an-id', 'changed-in',
+                ('tree', 'ref-revision', '')),
+            # a file that followed the root move
+        }
+        journal_entry = journalled_inventory._JournalEntry(
+            'something', NULL_REVISION, entries, True, False)
+        self.assertRaises(errors.BzrError, journal_entry.to_inventory)
+    def test_to_inventory_torture(self):
+        # this journal is crafted to have all the following:
+        # - deletes
+        # - renamed roots
+        # - deep dirs
+        # - files moved after parent dir was renamed
+        entries = {
+            # current root:
+            'an-id': ('/', 'an-id', None, 'changed-in',
+                ('dir', '', '')),
+            # an old root:
+            'TREE_ROOT': ('/subdir-now', 'TREE_ROOT', 'an-id', 'moved-root',
+                ('dir', '', '')),
+            # a file that followed the root move
+            'moved-id': ('/underoldroot', 'moved-id', 'TREE_ROOT', 'old-rev',
+                ('file', '30', 'some-sha')),
+            # a deleted path 
+            'deleted-id': ('None', 'deleted-id', None, NULL_REVISION,
+                ('deleted', '', '')),
+            # a tree reference moved to the new root
+            'ref-id': ('/ref', 'ref-id', 'an-id', 'new-rev',
+                ('tree', 'tree-reference-id', '')),
+            # a symlink now in a deep dir
+            'link-id': ('/link', 'link-id', 'deep-id', 'new-rev',
+                ('link', 'target', '')),
+            # a deep dir
+            'deep-id': ('/dir', 'deep-id', 'TREE_ROOT', 'new-rev',
+                ('dir', '', '')),
+        }
+        journal_entry = journalled_inventory._JournalEntry(
+            'something', NULL_REVISION, entries, True, True)
+        inv = journal_entry.to_inventory()
+        # easiest way to check we got the right inventory is to serialise it
+        # and compare the serialised forms.
+        expected = """format: bzr journalled inventory v1 (bzr 1.1)
+parent: null:
+version: something
+        old_inv = Inventory(None)
+        delta = inventory.make_inv_delta(old_inv, inv)
+        journal = journalled_inventory.InventoryJournal(versioned_root=True,
+            tree_references=True)
+        lines = journal.delta_to_lines(NULL_REVISION, 'something', delta)
+        serialised = ''.join(lines)
+        self.assertIsInstance(serialised, str)
+        self.assertEqual(expected, serialised)
 class TestContent(TestCase):
@@ -409,6 +498,12 @@
+    def test_link_unicode_target(self):
+        entry = inventory.make_entry('symlink', 'a link', None)
+        entry.symlink_target = ' \xc3\xa5'.decode('utf8')
+        self.assertEqual('link\x00 \xc3\xa5\x00',
+            journalled_inventory._link_content(entry))
     def test_link_space_target(self):
         entry = inventory.make_entry('symlink', 'a link', None)
         entry.symlink_target = ' '
@@ -428,8 +523,8 @@
     def test_reference_revision(self):
         entry = inventory.make_entry('tree-reference', 'a tree', None)
-        entry.reference_revision = 'foo@\xe5b-lah'
-        self.assertEqual('tree\x00foo@\xe5b-lah\x00',
+        entry.reference_revision = 'foo@\xc3\xa5b-lah'
+        self.assertEqual('tree\x00foo@\xc3\xa5b-lah\x00',
     def test_reference_no_reference(self):

=== modified file 'doc/developers/inventory.txt'
--- a/doc/developers/inventory.txt	2008-01-04 00:02:21 +0000
+++ b/doc/developers/inventory.txt	2008-01-04 02:10:02 +0000
@@ -121,7 +121,7 @@
 Some explanation is in order. When NEWPATH is 'None' a delete has been
 recorded, and because this journalled inventory is not attempting to be a
-reversible journal, the only other valid fields is 'file-id'. PARENT_ID is ''
+reversible journal, the only other valid field is 'file-id'. PARENT_ID is ''
 when a delete has been recorded or when recording a new root entry. Content
 always has 2 NULL delimiters in it to allow easy parsing.

More information about the bazaar-commits mailing list