Rev 4438: Cherrypick the chk-map fixes for bug #390563 to the 1.16 branch. in http://bazaar.launchpad.net/~jameinel/bzr/1.16-bug-390563

John Arbash Meinel john at arbash-meinel.com
Thu Jun 25 16:08:53 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/1.16-bug-390563

------------------------------------------------------------
revno: 4438
revision-id: john at arbash-meinel.com-20090625150838-7y1yihh092et5klj
parent: pqm at pqm.ubuntu.com-20090618112922-6t54pt9kyuglz22m
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.16-bug-390563
timestamp: Thu 2009-06-25 10:08:38 -0500
message:
  Cherrypick the chk-map fixes for bug #390563 to the 1.16 branch.
-------------- next part --------------
=== modified file 'NEWS'
--- a/NEWS	2009-06-18 10:29:00 +0000
+++ b/NEWS	2009-06-25 15:08:38 +0000
@@ -6,6 +6,24 @@
 .. contents:: List of Releases
    :depth: 1
 
+bzr 1.16.1
+##########
+
+Bug Fixes
+*********
+
+* We now properly request a more minimal set of file texts when fetching
+  multiple revisions. (Robert Collins, John Arbash Meinel, #390563)
+
+* ``chk_map.iter_interesting_nodes`` now properly uses the *intersection*
+  of referenced nodes rather than the *union* to determine what
+  uninteresting pages we still need to look at. Prior to this,
+  incrementally pushing to stacked branch would push the minimal data, but
+  fetching everything would request extra texts. There are some unhandled
+  cases wrt trees of different depths, but this fixes the common cases.
+  (Robert Collins, John Arbash Meinel, #390563)
+
+
 bzr 1.16 "It's yesterday in California" 2009-06-18
 ##################################################
 :Codename: yesterday-in-california

=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py	2009-05-13 21:59:57 +0000
+++ b/bzrlib/chk_map.py	2009-06-25 15:08:38 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2008 Canonical Ltd
+# Copyright (C) 2008, 2009 Canonical Ltd
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -1292,6 +1292,7 @@
     chks_to_read = uninteresting_keys.union(interesting_keys)
     next_uninteresting = set()
     next_interesting = set()
+    next_interesting_intersection = None
     uninteresting_items = set()
     interesting_items = set()
     interesting_to_yield = []
@@ -1313,11 +1314,17 @@
         else:
             interesting_to_yield.append(record.key)
             if type(node) is InternalNode:
+                if next_interesting_intersection is None:
+                    next_interesting_intersection = set(node.refs())
+                else:
+                    next_interesting_intersection = \
+                        next_interesting_intersection.intersection(node.refs())
                 next_interesting.update(node.refs())
             else:
                 interesting_items.update(node.iteritems(None))
     return (next_uninteresting, uninteresting_items,
-            next_interesting, interesting_to_yield, interesting_items)
+            next_interesting, interesting_to_yield, interesting_items,
+            next_interesting_intersection)
 
 
 def _find_all_uninteresting(store, interesting_root_keys,
@@ -1338,16 +1345,21 @@
     # uninteresting set
     (uninteresting_keys, uninteresting_items,
      interesting_keys, interesting_to_yield,
-     interesting_items) = _find_children_info(store, interesting_root_keys,
+     interesting_items, interesting_intersection,
+     ) = _find_children_info(store, interesting_root_keys,
                                               uninteresting_root_keys,
                                               pb=pb)
     all_uninteresting_chks.update(uninteresting_keys)
     all_uninteresting_items.update(uninteresting_items)
     del uninteresting_items
-    # Note: Exact matches between interesting and uninteresting do not need
-    #       to be search further. Non-exact matches need to be searched in case
-    #       there is a future exact-match
-    uninteresting_keys.difference_update(interesting_keys)
+    # Do not examine in detail pages common to all interesting trees.
+    # Pages that are common to all interesting trees will have their
+    # older versions found via the uninteresting tree traversal. Some pages
+    # found via the interesting trees traversal will be uninteresting for
+    # other of the interesting trees, which is why we require the pages to be
+    # common for us to trim them.
+    if interesting_intersection is not None:
+        uninteresting_keys.difference_update(interesting_intersection)
 
     # Second, find the full set of uninteresting bits reachable by the
     # uninteresting roots

=== modified file 'bzrlib/tests/test_chk_map.py'
--- a/bzrlib/tests/test_chk_map.py	2009-05-13 21:59:57 +0000
+++ b/bzrlib/tests/test_chk_map.py	2009-06-25 15:08:38 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2008 Canonical Ltd
+# Copyright (C) 2008, 2009 Canonical Ltd
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -1836,8 +1836,8 @@
                                     self).get_chk_bytes()
         return self._chk_bytes
 
-    def get_map_key(self, a_dict):
-        c_map = self._get_map(a_dict, maximum_size=10,
+    def get_map_key(self, a_dict, maximum_size=10):
+        c_map = self._get_map(a_dict, maximum_size=maximum_size,
                               chk_bytes=self.get_chk_bytes())
         return c_map.key()
 
@@ -2060,3 +2060,149 @@
              (aac_key, [(('aac',), 'target1')]),
              (bba_key, [(('bba',), 'target2')]),
             ], [target1, target2], [basis1, basis2])
+
+    def test_multiple_maps_overlapping_common_new(self):
+        # Test that when a node found through the interesting_keys iteration
+        # for *some roots* and also via the uninteresting keys iteration, that
+        # it is still scanned for uninteresting refs and items, because its
+        # not truely new. This requires 2 levels of InternalNodes to expose,
+        # because of the way the bootstrap in _find_children_info works.
+        # This suggests that the code is probably amenable to/benefit from
+        # consolidation.
+        # How does this test work?
+        # 1) We need a second level InternalNode present in a basis tree.
+        # 2) We need a left side new tree that uses that InternalNode
+        # 3) We need a right side new tree that does not use that InternalNode
+        #    at all but that has an unchanged *value* that was reachable inside
+        #    that InternalNode
+        basis = self.get_map_key({
+            # InternalNode, unchanged in left:
+            ('aaa',): 'left',
+            ('abb',): 'right',
+            # Forces an internalNode at 'a'
+            ('ccc',): 'common',
+            })
+        left = self.get_map_key({
+            # All of basis unchanged
+            ('aaa',): 'left',
+            ('abb',): 'right',
+            ('ccc',): 'common',
+            # And a new top level node so the root key is different
+            ('ddd',): 'change',
+            })
+        right = self.get_map_key({
+            # A value that is unchanged from basis and thus should be filtered
+            # out.
+            ('abb',): 'right'
+            })
+        basis_map = CHKMap(self.get_chk_bytes(), basis)
+        self.assertEqualDiff(
+            "'' InternalNode\n"
+            "  'a' InternalNode\n"
+            "    'aa' LeafNode\n"
+            "      ('aaa',) 'left'\n"
+            "    'ab' LeafNode\n"
+            "      ('abb',) 'right'\n"
+            "  'c' LeafNode\n"
+            "      ('ccc',) 'common'\n",
+            basis_map._dump_tree())
+        # Get left expected data
+        left_map = CHKMap(self.get_chk_bytes(), left)
+        self.assertEqualDiff(
+            "'' InternalNode\n"
+            "  'a' InternalNode\n"
+            "    'aa' LeafNode\n"
+            "      ('aaa',) 'left'\n"
+            "    'ab' LeafNode\n"
+            "      ('abb',) 'right'\n"
+            "  'c' LeafNode\n"
+            "      ('ccc',) 'common'\n"
+            "  'd' LeafNode\n"
+            "      ('ddd',) 'change'\n",
+            left_map._dump_tree())
+        # Keys from left side target
+        l_d_key = left_map._root_node._items['d'].key()
+        # Get right expected data
+        right_map = CHKMap(self.get_chk_bytes(), right)
+        self.assertEqualDiff(
+            "'' LeafNode\n"
+            "      ('abb',) 'right'\n",
+            right_map._dump_tree())
+        # Keys from the right side target - none, the root is enough.
+        # Test behaviour
+        self.expectFailure("we don't properly filter different depths",
+            self.assertIterInteresting,
+            [(left, []),
+             (right, []),
+             (l_d_key, [(('ddd',), 'change')]),
+            ], [left, right], [basis])
+        self.assertIterInteresting(
+            [(left, []),
+             (right, []),
+             (l_d_key, [(('ddd',), 'change')]),
+            ], [left, right], [basis])
+
+    def test_multiple_maps_similar(self):
+        # We want to have a depth=2 tree, with multiple entries in each leaf
+        # node
+        basis = self.get_map_key({
+            ('aaa',): 'unchanged',
+            ('abb',): 'will change left',
+            ('caa',): 'unchanged',
+            ('cbb',): 'will change right',
+            }, maximum_size=60)
+        left = self.get_map_key({
+            ('aaa',): 'unchanged',
+            ('abb',): 'changed left',
+            ('caa',): 'unchanged',
+            ('cbb',): 'will change right',
+            }, maximum_size=60)
+        right = self.get_map_key({
+            ('aaa',): 'unchanged',
+            ('abb',): 'will change left',
+            ('caa',): 'unchanged',
+            ('cbb',): 'changed right',
+            }, maximum_size=60)
+        basis_map = CHKMap(self.get_chk_bytes(), basis)
+        self.assertEqualDiff(
+            "'' InternalNode\n"
+            "  'a' LeafNode\n"
+            "      ('aaa',) 'unchanged'\n"
+            "      ('abb',) 'will change left'\n"
+            "  'c' LeafNode\n"
+            "      ('caa',) 'unchanged'\n"
+            "      ('cbb',) 'will change right'\n",
+            basis_map._dump_tree())
+        # Get left expected data
+        left_map = CHKMap(self.get_chk_bytes(), left)
+        self.assertEqualDiff(
+            "'' InternalNode\n"
+            "  'a' LeafNode\n"
+            "      ('aaa',) 'unchanged'\n"
+            "      ('abb',) 'changed left'\n"
+            "  'c' LeafNode\n"
+            "      ('caa',) 'unchanged'\n"
+            "      ('cbb',) 'will change right'\n",
+            left_map._dump_tree())
+        # Keys from left side target
+        l_a_key = left_map._root_node._items['a'].key()
+        l_c_key = left_map._root_node._items['c'].key()
+        # Get right expected data
+        right_map = CHKMap(self.get_chk_bytes(), right)
+        self.assertEqualDiff(
+            "'' InternalNode\n"
+            "  'a' LeafNode\n"
+            "      ('aaa',) 'unchanged'\n"
+            "      ('abb',) 'will change left'\n"
+            "  'c' LeafNode\n"
+            "      ('caa',) 'unchanged'\n"
+            "      ('cbb',) 'changed right'\n",
+            right_map._dump_tree())
+        r_a_key = right_map._root_node._items['a'].key()
+        r_c_key = right_map._root_node._items['c'].key()
+        self.assertIterInteresting(
+            [(left, []),
+             (right, []),
+             (l_a_key, [(('abb',), 'changed left')]),
+             (r_c_key, [(('cbb',), 'changed right')]),
+            ], [left, right], [basis])



More information about the bazaar-commits mailing list