Rev 4509: Repeated path/id corruption detected. in http://people.ubuntu.com/~robertc/baz2.0/pending/apply-inventory-delta

Robert Collins robertc at robertcollins.net
Wed Jul 8 03:25:44 BST 2009


At http://people.ubuntu.com/~robertc/baz2.0/pending/apply-inventory-delta

------------------------------------------------------------
revno: 4509
revision-id: robertc at robertcollins.net-20090708022539-lse8fiw0zbmdhgu0
parent: robertc at robertcollins.net-20090707043425-3cs9g40rei1siu1q
committer: Robert Collins <robertc at robertcollins.net>
branch nick: apply-inventory-delta
timestamp: Wed 2009-07-08 12:25:39 +1000
message:
  Repeated path/id corruption detected.
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py	2009-06-22 14:32:48 +0000
+++ b/bzrlib/dirstate.py	2009-07-08 02:25:39 +0000
@@ -1400,6 +1400,9 @@
         # inventory entries to dirstate.
         root_only = ('', '')
         for old_path, new_path, file_id, inv_entry in delta:
+            if inv_entry is not None and file_id != inv_entry.file_id:
+                raise errors.InconsistentDelta(new_path, file_id,
+                    "mismatched entry file_id %r" % inv_entry)
             if old_path is None:
                 adds.append((None, encode(new_path), file_id,
                     inv_to_entry(inv_entry), True))
@@ -1453,12 +1456,20 @@
                 changes.append((encode(old_path), encode(new_path), file_id,
                     inv_to_entry(inv_entry)))
 
-        # Finish expunging deletes/first half of renames.
-        self._update_basis_apply_deletes(deletes)
-        # Reinstate second half of renames and new paths.
-        self._update_basis_apply_adds(adds)
-        # Apply in-situ changes.
-        self._update_basis_apply_changes(changes)
+        try:
+            # Finish expunging deletes/first half of renames.
+            self._update_basis_apply_deletes(deletes)
+            # Reinstate second half of renames and new paths.
+            self._update_basis_apply_adds(adds)
+            # Apply in-situ changes.
+            self._update_basis_apply_changes(changes)
+        except errors.BzrError:
+            # _get_entry raises BzrError when a request is inconsistent; we
+            # want such errors to be shown as InconsistentDelta - and that 
+            # fits the behaviour we trigger. Partof this is driven by dirstate
+            # only supporting deltas that turn the basis into a closer fit to
+            # the active tree.
+            raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
 
         self._dirblock_state = DirState.IN_MEMORY_MODIFIED
         self._header_state = DirState.IN_MEMORY_MODIFIED

=== modified file 'bzrlib/inventory.py'
--- a/bzrlib/inventory.py	2009-07-07 04:34:25 +0000
+++ b/bzrlib/inventory.py	2009-07-08 02:25:39 +0000
@@ -1127,12 +1127,11 @@
         """
         # Check that the delta is legal. It would be nice if this could be
         # done within the loops below but it's safer to validate the delta
-        # before starting to mutate the inventory.
-        unique_file_ids = set([f for _, _, f, _ in delta])
-        if len(unique_file_ids) != len(delta):
-            raise errors.InconsistentDeltaDelta(delta,
-                "a file-id appears multiple times")
-        del unique_file_ids
+        # before starting to mutate the inventory, as there isn't a rollback
+        # facility.
+        list(_check_delta_unique_ids(_check_delta_unique_new_paths(
+            _check_delta_unique_old_paths(_check_delta_ids_match_entry(
+            delta)))))
 
         children = {}
         # Remove all affected items which were in the original inventory,
@@ -1269,9 +1268,10 @@
                                entry.parent_id)
 
             if entry.name in parent.children:
-                raise BzrError("%s is already versioned" %
-                        osutils.pathjoin(self.id2path(parent.file_id),
-                        entry.name).encode('utf-8'))
+                raise errors.InconsistentDelta(
+                    self.id2path(parent.children[entry.name].file_id),
+                    entry.file_id,
+                    "Path already versioned")
             parent.children[entry.name] = entry
         return self._add_child(entry)
 
@@ -1619,9 +1619,17 @@
             result.parent_id_basename_to_file_id = None
         result.root_id = self.root_id
         id_to_entry_delta = []
-        id_set = set()
+        # inventory_delta is only traversed once, so we just update the
+        # variable.
+        # Check for repeated file ids
+        inventory_delta = _check_delta_unique_ids(inventory_delta)
+        # Repeated old paths
+        inventory_delta = _check_delta_unique_old_paths(inventory_delta)
+        # Check for repeated new paths
+        inventory_delta = _check_delta_unique_new_paths(inventory_delta)
+        # Check for entries that don't match the fileid
+        inventory_delta = _check_delta_ids_match_entry(inventory_delta)
         for old_path, new_path, file_id, entry in inventory_delta:
-            id_set.add(file_id)
             # file id changes
             if new_path == '':
                 result.root_id = file_id
@@ -1663,9 +1671,6 @@
                     # If the two keys are the same, the value will be unchanged
                     # as its always the file id.
                     parent_id_basename_delta.append((old_key, new_key, new_value))
-        if len(id_set) != len(inventory_delta):
-            raise errors.InconsistentDeltaDelta(inventory_delta,
-                'repeated file id')
         result.id_to_entry.apply_delta(id_to_entry_delta)
         if parent_id_basename_delta:
             result.parent_id_basename_to_file_id.apply_delta(parent_id_basename_delta)
@@ -2077,3 +2082,64 @@
         _NAME_RE = re.compile(r'^[^/\\]+$')
 
     return bool(_NAME_RE.match(name))
+
+
+def _check_delta_unique_ids(delta):
+    """Decorate a delta and check that the file ids in it are unique.
+
+    :return: A generator over delta.
+    """
+    ids = set()
+    for item in delta:
+        length = len(ids) + 1
+        ids.add(item[2])
+        if len(ids) != length:
+            raise errors.InconsistentDelta(item[0] or item[1], item[2],
+                "repeated file_id")
+        yield item
+
+
+def _check_delta_unique_new_paths(delta):
+    """Decorate a delta and check that the new paths in it are unique.
+
+    :return: A generator over delta.
+    """
+    paths = set()
+    for item in delta:
+        length = len(paths) + 1
+        path = item[1]
+        if path is not None:
+            paths.add(path)
+            if len(paths) != length:
+                raise errors.InconsistentDelta(path, item[2], "repeated path")
+        yield item
+
+
+def _check_delta_unique_old_paths(delta):
+    """Decorate a delta and check that the old paths in it are unique.
+
+    :return: A generator over delta.
+    """
+    paths = set()
+    for item in delta:
+        length = len(paths) + 1
+        path = item[0]
+        if path is not None:
+            paths.add(path)
+            if len(paths) != length:
+                raise errors.InconsistentDelta(path, item[2], "repeated path")
+        yield item
+
+
+def _check_delta_ids_match_entry(delta):
+    """Decorate a delta and check that the ids in it match the entry.file_id.
+
+    :return: A generator over delta.
+    """
+    for item in delta:
+        entry = item[3]
+        if entry is not None:
+            if entry.file_id != item[2]:
+                raise errors.InconsistentDelta(item[0] or item[1], item[2],
+                    "mismatched id with %r" % entry)
+        yield item

=== modified file 'bzrlib/tests/test_inv.py'
--- a/bzrlib/tests/test_inv.py	2009-07-07 04:34:25 +0000
+++ b/bzrlib/tests/test_inv.py	2009-07-08 02:25:39 +0000
@@ -75,6 +75,10 @@
     """Apply delta to basis and return the result.
 
     This sets the parent and then calls update_basis_by_delta.
+    It also puts the basis in the repository under both 'basis' and 'result' to
+    allow safety checks made by the WT to succeed, and finally ensures that all
+    items in the delta with a new path are present in the WT before calling
+    update_basis_by_delta.
     
     :param basis: An inventory to be used as the basis.
     :param delta: The inventory delta to apply:
@@ -109,6 +113,30 @@
         # This reads basis from the repo and puts it into the tree's local
         # cache, if it has one.
         tree.set_parent_ids(['basis'])
+        inv = tree.inventory
+        paths = {}
+        parents = set()
+        for old, new, id, entry in delta:
+            if entry is None:
+                continue
+            paths[new] = (entry.file_id, entry.kind)
+            parents.add(osutils.dirname(new))
+        parents = osutils.minimum_path_selection(parents)
+        parents.discard('')
+        if parents:
+            # Put place holders in the tree to permit adding the other entries.
+            import pdb;pdb.set_trace()
+        if paths:
+            # Many deltas may cause this mini-apply to fail, but we want to see what
+            # the delta application code says, not the prep that we do to deal with 
+            # limitations of dirstate's update_basis code.
+            for path, (file_id, kind) in sorted(paths.items()):
+                try:
+                    tree.add([path], [file_id], [kind])
+                except (KeyboardInterrupt, SystemExit):
+                    raise
+                except:
+                    pass
     finally:
         tree.unlock()
     # Fresh lock, reads disk again.
@@ -218,6 +246,52 @@
         self.assertRaises(errors.InconsistentDelta, self.apply_delta, self,
             inv, delta)
 
+    def test_repeated_new_path(self):
+        inv = self.get_empty_inventory()
+        file1 = inventory.InventoryFile('id1', 'path', inv.root.file_id)
+        file1.revision = 'result'
+        file1.text_size = 0
+        file1.text_sha1 = ""
+        file2 = inventory.InventoryFile('id2', 'path', inv.root.file_id)
+        file2.revision = 'result'
+        file2.text_size = 0
+        file2.text_sha1 = ""
+        delta = [(None, 'path', 'id1', file1), (None, 'path', 'id2', file2)]
+        self.assertRaises(errors.InconsistentDelta, self.apply_delta, self,
+            inv, delta)
+
+    def test_repeated_old_path(self):
+        inv = self.get_empty_inventory()
+        file1 = inventory.InventoryFile('id1', 'path', inv.root.file_id)
+        file1.revision = 'result'
+        file1.text_size = 0
+        file1.text_sha1 = ""
+        # We can't *create* a source inventory with the same path, but
+        # a badly generated partial delta might claim the same source twice.
+        # This would be buggy in two ways: the path is repeated in the delta,
+        # And the path for one of the file ids doesn't match the source
+        # location. Alternatively, we could have a repeated fileid, but that
+        # is separately checked for.
+        file2 = inventory.InventoryFile('id2', 'path2', inv.root.file_id)
+        file2.revision = 'result'
+        file2.text_size = 0
+        file2.text_sha1 = ""
+        inv.add(file1)
+        inv.add(file2)
+        delta = [('path', None, 'id1', None), ('path', None, 'id2', None)]
+        self.assertRaises(errors.InconsistentDelta, self.apply_delta, self,
+            inv, delta)
+
+    def test_mismatched_id_entry_id(self):
+        inv = self.get_empty_inventory()
+        file1 = inventory.InventoryFile('id1', 'path', inv.root.file_id)
+        file1.revision = 'result'
+        file1.text_size = 0
+        file1.text_sha1 = ""
+        delta = [(None, 'path', 'id', file1)]
+        self.assertRaises(errors.InconsistentDelta, self.apply_delta, self,
+            inv, delta)
+
 
 class TestInventoryEntry(TestCase):
 




More information about the bazaar-commits mailing list