Rev 4117: (robertc) Handle inconsistent inventories in fetch more robustly. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Wed Mar 11 22:53:25 GMT 2009


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 4117
revision-id: pqm at pqm.ubuntu.com-20090311225320-7y2picz4muszgkma
parent: pqm at pqm.ubuntu.com-20090311220149-lp5k8bw0rqr2a7sx
parent: robertc at robertcollins.net-20090311064400-amcoxhotnw63d9vl
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Wed 2009-03-11 22:53:20 +0000
message:
  (robertc) Handle inconsistent inventories in fetch more robustly.
  	(Robert Collins)
modified:
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/interrepository_implementations/test_fetch.py test_fetch.py-20080425213627-j60cjh782ufm83ry-1
  bzrlib/tests/per_repository/test_fileid_involved.py test_file_involved.py-20051215205901-728a172d1014daaa
  bzrlib/tests/test_fetch.py     testfetch.py-20050825090644-f73e07e7dfb1765a
  bzrlib/weave.py                knit.py-20050627021749-759c29984154256b
    ------------------------------------------------------------
    revno: 4098.4.3
    revision-id: robertc at robertcollins.net-20090311064400-amcoxhotnw63d9vl
    parent: robertc at robertcollins.net-20090311052614-v57k6mz9hqcxzgf6
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: fetch
    timestamp: Wed 2009-03-11 17:44:00 +1100
    message:
      Change fetch effort tests to reflect the new change to read the adjacent inventories to ensure accurate fetching.
    modified:
      bzrlib/tests/test_fetch.py     testfetch.py-20050825090644-f73e07e7dfb1765a
    ------------------------------------------------------------
    revno: 4098.4.2
    revision-id: robertc at robertcollins.net-20090311052614-v57k6mz9hqcxzgf6
    parent: robertc at robertcollins.net-20090310074723-jgctuly1ziw23r7e
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: fetch
    timestamp: Wed 2009-03-11 16:26:14 +1100
    message:
      Weaves were reporting way too many lines altered... fix so that the fetch changes actually emit the right revisions and tests pass.
    modified:
      bzrlib/tests/per_repository/test_fileid_involved.py test_file_involved.py-20051215205901-728a172d1014daaa
      bzrlib/weave.py                knit.py-20050627021749-759c29984154256b
    ------------------------------------------------------------
    revno: 4098.4.1
    revision-id: robertc at robertcollins.net-20090310074723-jgctuly1ziw23r7e
    parent: pqm at pqm.ubuntu.com-20090309084556-9i2m12qlud2qcrtw
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: fetch
    timestamp: Tue 2009-03-10 18:47:23 +1100
    message:
      Handle inconsistent inventory data more gracefully at a small performance cost during fetch.
    modified:
      bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
      bzrlib/tests/interrepository_implementations/test_fetch.py test_fetch.py-20080425213627-j60cjh782ufm83ry-1
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2009-03-06 10:01:37 +0000
+++ b/bzrlib/repository.py	2009-03-10 07:47:23 +0000
@@ -1453,6 +1453,26 @@
                 result[key] = True
         return result
 
+    def _inventory_xml_lines_for_keys(self, keys):
+        """Get a line iterator of the sort needed for findind references.
+
+        Not relevant for non-xml inventory repositories.
+
+        Ghosts in revision_keys are ignored.
+
+        :param revision_keys: The revision keys for the inventories to inspect.
+        :return: An iterator over (inventory line, revid) for the fulltexts of
+            all of the xml inventories specified by revision_keys.
+        """
+        stream = self.inventories.get_record_stream(keys, 'unordered', True)
+        for record in stream:
+            if record.storage_kind != 'absent':
+                chunks = record.get_bytes_as('chunked')
+                revid = record.key[-1]
+                lines = osutils.chunks_to_lines(chunks)
+                for line in lines:
+                    yield line, revid
+
     def _find_file_ids_from_xml_inventory_lines(self, line_iterator,
         revision_ids):
         """Helper routine for fileids_altered_by_revision_ids.
@@ -1468,15 +1488,20 @@
         revision_ids. Each altered file-ids has the exact revision_ids that
         altered it listed explicitly.
         """
+        seen = set(self._find_text_key_references_from_xml_inventory_lines(
+                line_iterator).iterkeys())
+        # Note that revision_ids are revision keys.
+        parent_maps = self.revisions.get_parent_map(revision_ids)
+        parents = set()
+        map(parents.update, parent_maps.itervalues())
+        parents.difference_update(revision_ids)
+        parent_seen = set(self._find_text_key_references_from_xml_inventory_lines(
+            self._inventory_xml_lines_for_keys(parents)))
+        new_keys = seen - parent_seen
         result = {}
         setdefault = result.setdefault
-        for key in \
-            self._find_text_key_references_from_xml_inventory_lines(
-                line_iterator).iterkeys():
-            # once data is all ensured-consistent; then this is
-            # if revision_id == version_id
-            if key[-1:] in revision_ids:
-                setdefault(key[0], set()).add(key[-1])
+        for key in new_keys:
+            setdefault(key[0], set()).add(key[-1])
         return result
 
     def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
@@ -3165,10 +3190,7 @@
                         # We don't copy the text for the root node unless the
                         # target supports_rich_root.
                         continue
-                    # TODO: Do we need:
-                    #       "if entry.revision == current_revision_id" ?
-                    if entry.revision == current_revision_id:
-                        text_keys.add((file_id, entry.revision))
+                    text_keys.add((file_id, entry.revision))
             revision = self.source.get_revision(current_revision_id)
             pending_deltas.append((basis_id, delta,
                 current_revision_id, revision.parent_ids))

=== modified file 'bzrlib/tests/interrepository_implementations/test_fetch.py'
--- a/bzrlib/tests/interrepository_implementations/test_fetch.py	2009-01-17 01:30:58 +0000
+++ b/bzrlib/tests/interrepository_implementations/test_fetch.py	2009-03-10 07:47:23 +0000
@@ -20,8 +20,10 @@
 import bzrlib
 from bzrlib import (
     errors,
+    inventory,
+    osutils,
     repository,
-    osutils,
+    versionedfile,
     )
 from bzrlib.errors import (
     NoSuchRevision,
@@ -73,6 +75,56 @@
         repo_b = self.make_to_repository('b')
         check_push_rev1(repo_b)
 
+    def test_fetch_inconsistent_last_changed_entries(self):
+        """If an inventory has odd data we should still get what it references.
+        
+        This test tests that we do fetch a file text created in a revision not
+        being fetched, but referenced from the revision we are fetching when the
+        adjacent revisions to the one being fetched do not reference that text.
+        """
+        tree = self.make_branch_and_tree('source')
+        revid = tree.commit('old')
+        to_repo = self.make_to_repository('to_repo')
+        to_repo.fetch(tree.branch.repository, revid)
+        # Make a broken revision and fetch it.
+        source = tree.branch.repository
+        source.lock_write()
+        self.addCleanup(source.unlock)
+        source.start_write_group()
+        try:
+            # We need two revisions: OLD and NEW. NEW will claim to need a file
+            # 'FOO' changed in 'OLD'. OLD will not have that file at all.
+            source.texts.insert_record_stream([
+                versionedfile.FulltextContentFactory(('foo', revid), (), None,
+                'contents')])
+            basis = source.revision_tree(revid)
+            parent_id = basis.path2id('')
+            entry = inventory.make_entry('file', 'foo-path', parent_id, 'foo')
+            entry.revision = revid
+            entry.text_size = len('contents')
+            entry.text_sha1 = osutils.sha_string('contents')
+            inv_sha1, _ = source.add_inventory_by_delta(revid, [
+                (None, 'foo-path', 'foo', entry)], 'new', [revid])
+            rev = Revision(timestamp=0,
+                           timezone=None,
+                           committer="Foo Bar <foo at example.com>",
+                           message="Message",
+                           inventory_sha1=inv_sha1,
+                           revision_id='new',
+                           parent_ids=[revid])
+            source.add_revision(rev.revision_id, rev)
+        except:
+            source.abort_write_group()
+            raise
+        else:
+            source.commit_write_group()
+        to_repo.fetch(source, 'new')
+        to_repo.lock_read()
+        self.addCleanup(to_repo.unlock)
+        self.assertEqual('contents',
+            to_repo.texts.get_record_stream([('foo', revid)],
+            'unordered', True).next().get_bytes_as('fulltext'))
+
     def test_fetch_missing_basis_text(self):
         """If fetching a delta, we should die if a basis is not present."""
         tree = self.make_branch_and_tree('tree')

=== modified file 'bzrlib/tests/per_repository/test_fileid_involved.py'
--- a/bzrlib/tests/per_repository/test_fileid_involved.py	2009-01-17 01:30:58 +0000
+++ b/bzrlib/tests/per_repository/test_fileid_involved.py	2009-03-11 05:26:14 +0000
@@ -141,6 +141,7 @@
             print set(self.branch.repository.get_ancestry(new)).difference(set(self.branch.repository.get_ancestry(old)))
         self.branch.lock_read()
         self.addCleanup(self.branch.unlock)
+        self.branch.repository.fileids_altered_by_revision_ids(["rev-J","rev-K"])
         self.assertEqual(
             {'b-file-id-2006-01-01-defg':set(['rev-J']),
              'c-funky<file-id>quiji%bo':set(['rev-K'])

=== modified file 'bzrlib/tests/test_fetch.py'
--- a/bzrlib/tests/test_fetch.py	2009-03-02 04:45:02 +0000
+++ b/bzrlib/tests/test_fetch.py	2009-03-11 06:44:00 +0000
@@ -330,21 +330,19 @@
 
 class TestKnitToPackFetch(TestCaseWithTransport):
 
-    def find_get_record_stream(self, calls):
-        """In a list of calls, find 'get_record_stream' calls.
+    def find_get_record_stream(self, calls, expected_count=1):
+        """In a list of calls, find the last 'get_record_stream'.
 
-        This also ensures that there is only one get_record_stream call.
+        :param expected_count: The number of calls we should exepect to find.
+            If a different number is found, an assertion is raised.
         """
         get_record_call = None
+        call_count = 0
         for call in calls:
             if call[0] == 'get_record_stream':
-                self.assertIs(None, get_record_call,
-                              "there should only be one call to"
-                              " get_record_stream")
+                call_count += 1
                 get_record_call = call
-        self.assertIsNot(None, get_record_call,
-                         "there should be exactly one call to "
-                         " get_record_stream")
+        self.assertEqual(expected_count, call_count)
         return get_record_call
 
     def test_fetch_with_deltas_no_delta_closure(self):
@@ -370,8 +368,8 @@
                           target._format._fetch_order, False),
                          self.find_get_record_stream(source.texts.calls))
         self.assertEqual(('get_record_stream', [('rev-one',)],
-                          target._format._fetch_order, False),
-                         self.find_get_record_stream(source.inventories.calls))
+          target._format._fetch_order, False),
+          self.find_get_record_stream(source.inventories.calls, 2))
         self.assertEqual(('get_record_stream', [('rev-one',)],
                           target._format._fetch_order, False),
                          self.find_get_record_stream(source.revisions.calls))
@@ -414,8 +412,8 @@
                           target._format._fetch_order, True),
                          self.find_get_record_stream(source.texts.calls))
         self.assertEqual(('get_record_stream', [('rev-one',)],
-                          target._format._fetch_order, True),
-                         self.find_get_record_stream(source.inventories.calls))
+            target._format._fetch_order, True),
+            self.find_get_record_stream(source.inventories.calls, 2))
         self.assertEqual(('get_record_stream', [('rev-one',)],
                           target._format._fetch_order, True),
                          self.find_get_record_stream(source.revisions.calls))

=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py	2009-02-23 15:29:35 +0000
+++ b/bzrlib/weave.py	2009-03-11 05:26:14 +0000
@@ -575,10 +575,7 @@
             version_ids = self.versions()
         version_ids = set(version_ids)
         for lineno, inserted, deletes, line in self._walk_internal(version_ids):
-            # if inserted not in version_ids then it was inserted before the
-            # versions we care about, but because weaves cannot represent ghosts
-            # properly, we do not filter down to that
-            # if inserted not in version_ids: continue
+            if inserted not in version_ids: continue
             if line[-1] != '\n':
                 yield line + '\n', inserted
             else:




More information about the bazaar-commits mailing list