Rev 4510: Major rework, simplify what is put into the queues. in http://bazaar.launchpad.net/~jameinel/bzr/1.17-chk-multilevel

John Arbash Meinel john at arbash-meinel.com
Wed Jul 1 20:01:49 BST 2009


At http://bazaar.launchpad.net/~jameinel/bzr/1.17-chk-multilevel

------------------------------------------------------------
revno: 4510
revision-id: john at arbash-meinel.com-20090701190139-opukkj3tlpm58wo8
parent: john at arbash-meinel.com-20090701183714-1kb1acs6np4d4a4h
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.17-chk-multilevel
timestamp: Wed 2009-07-01 14:01:39 -0500
message:
  Major rework, simplify what is put into the queues.
  Now we only put refs into the regular queue, and move items into
  their own queue.
  We also don't need to prioritize the queue anymore, so we use simple 'append'
  rather than anything fancier, and we don't need to include the search key
  prefix anymore.
-------------- next part --------------
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py	2009-07-01 18:37:14 +0000
+++ b/bzrlib/chk_map.py	2009-07-01 19:01:39 +0000
@@ -1427,6 +1427,9 @@
         # The uninteresting and interesting nodes to be searched
         self._uninteresting_queue = []
         self._interesting_queue = []
+        # Holds the (key, value) items found when processing the root nodes,
+        # waiting for the uninteresting nodes to be walked
+        self._interesting_item_queue = []
         self._state = None
 
     def _read_nodes_from_store(self, keys):
@@ -1461,27 +1464,28 @@
         if not self._uninteresting_root_keys:
             # TODO: when there are no _uninteresting_root_keys we can shortcut
             # a lot of the code
-            self._interesting_queue = [('', None, key) for key
-                                       in self._interesting_root_keys]
+            self._interesting_queue = list(self._interesting_root_keys)
             return
         # Read the uninteresting nodes first, we would like to read them
         # simultaneously, but that requires buffering the interesting nodes
         # until all uninteresting ones have been read
         uninteresting_chks_to_enqueue = []
+        all_uninteresting_chks = self._all_uninteresting_chks
         for record, node, prefix_refs, items in \
-            self._read_nodes_from_store(self._uninteresting_root_keys):
+                self._read_nodes_from_store(self._uninteresting_root_keys):
             # Uninteresting node
-            new_refs = [p_k for p_k in prefix_refs
-                             if p_k[1] not in self._all_uninteresting_chks]
-            self._all_uninteresting_chks.update([k for _,k in new_refs])
+            prefix_refs = [p_r for p_r in prefix_refs
+                                if p_r[1] not in all_uninteresting_chks]
+            new_refs = [p_r[1] for p_r in prefix_refs]
+            all_uninteresting_chks.update(new_refs)
             self._all_uninteresting_items.update(items)
             # Queue up the uninteresting references
             # Don't actually put them in the 'to-read' queue until we have
             # finished checking the interesting references
-            uninteresting_chks_to_enqueue.extend(new_refs)
+            uninteresting_chks_to_enqueue.extend(prefix_refs)
         # filter out any root keys that are already known to be uninteresting
         interesting_keys = set(self._interesting_root_keys).difference(
-                                self._all_uninteresting_chks)
+                                all_uninteresting_chks)
         # These references are common to *all* interesting nodes, and thus we
         # know that we have perfect overlap with uninteresting, without queue
         # up any of them
@@ -1490,35 +1494,30 @@
         # added a second time
         self._processed_interesting_refs.update(interesting_keys)
         for record, node, prefix_refs, items in \
-            self._read_nodes_from_store(interesting_keys):
+                self._read_nodes_from_store(interesting_keys):
             # At this level, we now know all the uninteresting references
             # So we can go ahead and filter, and queue up whatever is remaining
             for prefix, ref in prefix_refs:
-                if (ref in self._all_uninteresting_chks
+                if (ref in all_uninteresting_chks
                     or ref in self._processed_interesting_refs):
                     # Either in the uninteresting set, or added by another root
                     continue
                 self._processed_interesting_refs.add(ref)
                 interesting_prefixes.update(
                     [prefix[:i+1] for i in xrange(len(prefix))])
-                self._interesting_queue.append((prefix, None, ref))
-                # heapq.heappush(self._interesting_queue,
-                #                (prefix, None, ref))
+                self._interesting_queue.append(ref)
+            # TODO: We can potentially get multiple items here, however the
+            #       current design allows for this, as callers will do the work
+            #       to make the results unique. We might profile whether we
+            #       gain anything by ensuring unique return values for items
             for item in items:
                 if item in self._all_uninteresting_items:
                     continue
-                key, value = item
-                # We can't yield 'items' yet, because we haven't dug deep
-                # enough on the uninteresting set
+                self._interesting_item_queue.append(item)
                 # Key is a real key, we need search key
-                search_prefix = self._search_key_func(key)
-                self._interesting_queue.append((search_prefix, key, value))
-                # heapq.heappush(self._interesting_queue,
-                #                (search_prefix, key, value))
+                search_prefix = self._search_key_func(item[0])
                 interesting_prefixes.update(
                     [search_prefix[:i+1] for i in xrange(len(search_prefix))])
-                interesting_prefixes.add(search_prefix)
-                interesting_prefixes.add(search_prefix[0])
             yield record
         # At this point, we have read all the uninteresting and interesting
         # items, so we can queue up the uninteresting stuff, knowing that we've
@@ -1532,38 +1531,23 @@
             if not_interesting:
                 # This prefix is not part of the remaining 'interesting set'
                 continue
-            self._uninteresting_queue.append((prefix, ref))
+            self._uninteresting_queue.append(ref)
 
     def _flush_interesting_queue(self):
         # No need to maintain the heap invariant anymore, just pull things out
         # and process them
-        interesting = self._interesting_queue
+        refs = set(self._interesting_queue)
         self._interesting_queue = []
         # First pass, flush all interesting items and convert to using direct refs
-        items = []
-        items_append = items.append
-        refs = set()
-        refs_add = refs.add
-        for prefix, key, value in interesting:
-            if key is not None:
-                item = (key, value)
-                if item not in self._all_uninteresting_items:
-                    items_append(item)
-            else:
-                refs_add(value)
-        if items:
-            yield None, items
-        refs = refs.difference(self._all_uninteresting_chks)
-
         all_uninteresting_chks = self._all_uninteresting_chks
         processed_interesting_refs = self._processed_interesting_refs
         all_uninteresting_items = self._all_uninteresting_items
+        interesting_items = [item for item in self._interesting_item_queue
+                                   if item not in all_uninteresting_items]
+        if interesting_items:
+            yield None, interesting_items
+        refs = refs.difference(all_uninteresting_chks)
         while refs:
-            # TODO: add a test for this
-            #       The idea is that we saw an item in the queue (say during
-            #       the root load), and since then we have seen that we
-            #       shouldn't walk it after all.
-            # refs = refs.difference(all_uninteresting_chks)
             next_refs = set()
             next_refs_update = next_refs.update
             # Inlining _read_nodes_from_store improves 'bzr branch bzr.dev'
@@ -1588,16 +1572,14 @@
         # For now, since we don't ever abort from loading uninteresting items,
         # just read the whole thing in at once, so that we get a single
         # request, instead of multiple
-        refs = [pr[1] for pr in self._uninteresting_queue]
+        refs = self._uninteresting_queue
         self._uninteresting_queue = []
         all_uninteresting_chks = self._all_uninteresting_chks
         for record, _, prefix_refs, items in self._read_nodes_from_store(refs):
             self._all_uninteresting_items.update(items)
-            prefix_refs = [pr for pr in prefix_refs
-                           if pr[1] not in all_uninteresting_chks]
-            self._uninteresting_queue.extend(prefix_refs)
-            all_uninteresting_chks.update([pr[1] for pr in prefix_refs])
-        # heapq.heapify(self._uninteresting_queue)
+            refs = [r for _,r in prefix_refs if r not in all_uninteresting_chks]
+            self._uninteresting_queue.extend(refs)
+            all_uninteresting_chks.update(refs)
 
     def _process_queues(self):
         # Finish processing all of the items in the queue, for simplicity, we

=== modified file 'bzrlib/tests/test_chk_map.py'
--- a/bzrlib/tests/test_chk_map.py	2009-06-30 22:28:21 +0000
+++ b/bzrlib/tests/test_chk_map.py	2009-07-01 19:01:39 +0000
@@ -2153,9 +2153,9 @@
         self.assertEqual([key2], root_results)
         # We should have queued up only items that aren't in the uninteresting
         # set
-        search_key_aaa = search_key_func(('aaa',))
-        self.assertEqual([(search_key_aaa, ('aaa',), 'new aaa content')],
-                         iterator._interesting_queue)
+        self.assertEqual([(('aaa',), 'new aaa content')],
+                         iterator._interesting_item_queue)
+        self.assertEqual([], iterator._interesting_queue)
         # And there are no uninteresting references, so that queue should be
         # empty
         self.assertEqual([], iterator._uninteresting_queue)
@@ -2190,8 +2190,9 @@
         self.assertEqual([key2], root_results)
         # At this point, we should have queued up only the 'a' Leaf on both
         # sides, both 'c' and 'd' are known to not have changed on both sides
-        self.assertEqual([('a', None, key2_a)], iterator._interesting_queue)
-        self.assertEqual([('a', key1_a)], iterator._uninteresting_queue)
+        self.assertEqual([key2_a], iterator._interesting_queue)
+        self.assertEqual([], iterator._interesting_item_queue)
+        self.assertEqual([key1_a], iterator._uninteresting_queue)
 
     def test__read_all_roots_multi_interesting_prepares_queues(self):
         c_map = self.make_one_deep_map(chk_map._search_key_plain)
@@ -2214,11 +2215,10 @@
         root_results = [record.key for record in iterator._read_all_roots()]
         self.assertEqual(sorted([key2, key3]), sorted(root_results))
         # We should have queued up key2_a, and key3_c, but not key2_c or key3_c
-        self.assertEqual([('a', None, key2_a), ('c', None, key3_c)],
-                         iterator._interesting_queue)
+        self.assertEqual([key2_a, key3_c], iterator._interesting_queue)
+        self.assertEqual([], iterator._interesting_item_queue)
         # And we should have queued up both a and c for the uninteresting set
-        self.assertEqual([('a', key1_a), ('c', key1_c)],
-                         iterator._uninteresting_queue)
+        self.assertEqual([key1_a, key1_c], iterator._uninteresting_queue)
 
     def test__read_all_roots_different_depths(self):
         c_map = self.make_two_deep_map(chk_map._search_key_plain)
@@ -2239,19 +2239,17 @@
         self.assertEqual([key2], root_results)
         # Only the 'a' subset should be queued up, since 'c' and 'd' cannot be
         # present
-        self.assertEqual([('a', key1_a)], iterator._uninteresting_queue)
-        self.assertEqual([('aa', None, key2_aa), ('ad', None, key2_ad)],
-                         iterator._interesting_queue)
+        self.assertEqual([key1_a], iterator._uninteresting_queue)
+        self.assertEqual([key2_aa, key2_ad], iterator._interesting_queue)
+        self.assertEqual([], iterator._interesting_item_queue)
 
         iterator = self.get_iterator([key1], [key2], chk_map._search_key_plain)
         root_results = [record.key for record in iterator._read_all_roots()]
         self.assertEqual([key1], root_results)
 
-        self.assertEqual([('aa', key2_aa), ('ad', key2_ad)],
-                         iterator._uninteresting_queue)
-        self.assertEqual([('a', None, key1_a), ('c', None, key1_c),
-                          ('d', None, key1_d),
-                         ], iterator._interesting_queue)
+        self.assertEqual([key2_aa, key2_ad], iterator._uninteresting_queue)
+        self.assertEqual([key1_a, key1_c, key1_d], iterator._interesting_queue)
+        self.assertEqual([], iterator._interesting_item_queue)
 
     def test__read_all_roots_different_depths_16(self):
         c_map = self.make_two_deep_map(chk_map._search_key_16)
@@ -2274,21 +2272,20 @@
         root_results = [record.key for record in iterator._read_all_roots()]
         self.assertEqual([key2], root_results)
         # Only the subset of keys that may be present should be queued up.
-        self.assertEqual([('F', key1_F)], iterator._uninteresting_queue)
-        self.assertEqual([('F0', None, key2_F0), ('F3', None, key2_F3),
-                          ('F4', None, key2_F4), ('FD', None, key2_FD),
-                         ], sorted(iterator._interesting_queue))
+        self.assertEqual([key1_F], iterator._uninteresting_queue)
+        self.assertEqual(sorted([key2_F0, key2_F3, key2_F4, key2_FD]),
+                         sorted(iterator._interesting_queue))
+        self.assertEqual([], iterator._interesting_item_queue)
 
         iterator = self.get_iterator([key1], [key2], chk_map._search_key_16)
         root_results = [record.key for record in iterator._read_all_roots()]
         self.assertEqual([key1], root_results)
 
-        self.assertEqual([('F0', key2_F0), ('F3', key2_F3),
-                          ('F4', key2_F4), ('FD', key2_FD),
-                         ], sorted(iterator._uninteresting_queue))
-        self.assertEqual([('2', None, key1_2), ('4', None, key1_4),
-                          ('C', None, key1_C), ('F', None, key1_F),
-                         ], sorted(iterator._interesting_queue))
+        self.assertEqual(sorted([key2_F0, key2_F3, key2_F4, key2_FD]),
+                         sorted(iterator._uninteresting_queue))
+        self.assertEqual(sorted([key1_2, key1_4, key1_C, key1_F]),
+                         sorted(iterator._interesting_queue))
+        self.assertEqual([], iterator._interesting_item_queue)
 
     def test__read_all_roots_mixed_depth(self):
         c_map = self.make_one_deep_two_prefix_map(chk_map._search_key_plain)
@@ -2309,7 +2306,8 @@
         # 'ad' matches exactly 'a' on the other side, so it should be removed,
         # and neither side should have it queued for walking
         self.assertEqual([], iterator._uninteresting_queue)
-        self.assertEqual([('b', None, key2_b)], iterator._interesting_queue)
+        self.assertEqual([key2_b], iterator._interesting_queue)
+        self.assertEqual([], iterator._interesting_item_queue)
 
         iterator = self.get_iterator([key1], [key2], chk_map._search_key_plain)
         root_results = [record.key for record in iterator._read_all_roots()]
@@ -2320,9 +2318,10 @@
         #       than one interesting key, so for now, we live with this
         #       Consider revising, though benchmarking showing it to be a
         #       real-world issue should be done
-        self.assertEqual([('a', key2_a)], iterator._uninteresting_queue)
+        self.assertEqual([key2_a], iterator._uninteresting_queue)
         # self.assertEqual([], iterator._uninteresting_queue)
-        self.assertEqual([('aa', None, key1_aa)], iterator._interesting_queue)
+        self.assertEqual([key1_aa], iterator._interesting_queue)
+        self.assertEqual([], iterator._interesting_item_queue)
 
     def test__read_all_roots_yields_extra_deep_records(self):
         # This is slightly controversial, as we will yield a chk page that we
@@ -2353,10 +2352,10 @@
         # However, even though we have yielded the root node to be fetched,
         # we should have enqued all of the chk pages to be walked, so that we
         # can find the keys if they are present
-        self.assertEqual([('a', key1_a)], iterator._uninteresting_queue)
-        self.assertEqual([('acc', ('acc',), 'initial acc content'),
-                          ('ace', ('ace',), 'initial ace content'),
-                         ], iterator._interesting_queue)
+        self.assertEqual([key1_a], iterator._uninteresting_queue)
+        self.assertEqual([(('acc',), 'initial acc content'),
+                          (('ace',), 'initial ace content'),
+                         ], iterator._interesting_item_queue)
 
     def test__read_all_roots_multiple_targets(self):
         c_map = self.make_root_only_map()
@@ -2376,9 +2375,9 @@
         self.assertEqual([], iterator._uninteresting_queue)
         # the key 'd' is interesting from key2 and key3, but should only be
         # entered into the queue 1 time
-        self.assertEqual(sorted([('c', None, key2_c), ('c', None, key3_c),
-                                 ('d', None, key2_d)]),
+        self.assertEqual(sorted([key2_c, key3_c, key2_d]),
                          sorted(iterator._interesting_queue))
+        self.assertEqual([], iterator._interesting_item_queue)
 
     def test__read_all_roots_no_uninteresting(self):
         # This is the 'initial branch' case. With nothing in the uninteresting
@@ -2390,7 +2389,8 @@
         root_results = [record.key for record in iterator._read_all_roots()]
         self.assertEqual([], root_results)
         self.assertEqual([], iterator._uninteresting_queue)
-        self.assertEqual([('', None, key1)], iterator._interesting_queue)
+        self.assertEqual([key1], iterator._interesting_queue)
+        self.assertEqual([], iterator._interesting_item_queue)
 
         c_map2 = self.make_one_deep_map()
         key2 = c_map2.key()
@@ -2399,8 +2399,9 @@
         root_results = [record.key for record in iterator._read_all_roots()]
         self.assertEqual([], root_results)
         self.assertEqual([], iterator._uninteresting_queue)
-        self.assertEqual(sorted([('', None, key1), ('', None, key2)]),
+        self.assertEqual(sorted([key1, key2]),
                          sorted(iterator._interesting_queue))
+        self.assertEqual([], iterator._interesting_item_queue)
 
     def test__read_all_roots_no_uninteresting_16(self):
         c_map = self.make_two_deep_map(chk_map._search_key_16)
@@ -2409,7 +2410,8 @@
         root_results = [record.key for record in iterator._read_all_roots()]
         self.assertEqual([], root_results)
         self.assertEqual([], iterator._uninteresting_queue)
-        self.assertEqual([('', None, key1)], iterator._interesting_queue)
+        self.assertEqual([key1], iterator._interesting_queue)
+        self.assertEqual([], iterator._interesting_item_queue)
 
         c_map2 = self.make_one_deep_map(chk_map._search_key_16)
         key2 = c_map2.key()
@@ -2418,8 +2420,9 @@
         root_results = [record.key for record in iterator._read_all_roots()]
         self.assertEqual([], root_results)
         self.assertEqual([], iterator._uninteresting_queue)
-        self.assertEqual(sorted([('', None, key1), ('', None, key2)]),
+        self.assertEqual(sorted([key1, key2]),
                          sorted(iterator._interesting_queue))
+        self.assertEqual([], iterator._interesting_item_queue)
 
     def test__read_all_roots_multiple_uninteresting(self):
         c_map = self.make_two_deep_map()
@@ -2438,9 +2441,9 @@
         self.assertEqual([key3], root_results)
         # the 'a' keys should not be queued up 2 times, since they are
         # identical
-        self.assertEqual([('a', key1_a)],
-                         iterator._uninteresting_queue)
-        self.assertEqual([('a', None, key3_a)], iterator._interesting_queue)
+        self.assertEqual([key1_a], iterator._uninteresting_queue)
+        self.assertEqual([key3_a], iterator._interesting_queue)
+        self.assertEqual([], iterator._interesting_item_queue)
 
     def test__process_next_uninteresting_batched_no_dupes(self):
         c_map = self.make_two_deep_map()
@@ -2463,16 +2466,15 @@
                                      chk_map._search_key_plain)
         root_results = [record.key for record in iterator._read_all_roots()]
         self.assertEqual([key3], root_results)
-        self.assertEqual(sorted([('a', key1_a), ('a', key2_a)]),
+        self.assertEqual(sorted([key1_a, key2_a]),
                          sorted(iterator._uninteresting_queue))
-        self.assertEqual([('a', None, key3_a)], iterator._interesting_queue)
+        self.assertEqual([key3_a], iterator._interesting_queue)
+        self.assertEqual([], iterator._interesting_item_queue)
         iterator._process_next_uninteresting()
         # All of the uninteresting records should be brought in and queued up,
         # but we should not have any duplicates
-        self.assertEqual(sorted([('aa', key1_aa), ('ab', key1_ab),
-                                 ('ac', key1_ac), ('ad', key1_ad),
-                                 ('aa', key2_aa),
-                                ]), sorted(iterator._uninteresting_queue))
+        self.assertEqual(sorted([key1_aa, key1_ab, key1_ac, key1_ad, key2_aa]),
+                         sorted(iterator._uninteresting_queue))
 
 
 class TestIterInterestingNodes(TestCaseWithExampleMaps):



More information about the bazaar-commits mailing list