Rev 4513: Some small code cleanup passes in http://bazaar.launchpad.net/~jameinel/bzr/1.17-chk-multilevel

Wed Jul 1 21:12:29 BST 2009

At http://bazaar.launchpad.net/~jameinel/bzr/1.17-chk-multilevel

------------------------------------------------------------
revno: 4513
revision-id: john at arbash-meinel.com-20090701201216-dqjobvo0140j61bl
parent: john at arbash-meinel.com-20090701192557-c74fbemabmmqu1jj
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 1.17-chk-multilevel
timestamp: Wed 2009-07-01 15:12:16 -0500
message:
  Some small code cleanup passes
-------------- next part --------------
=== modified file 'bzrlib/chk_map.py'

--- a/bzrlib/chk_map.py	2009-07-01 19:25:57 +0000
+++ b/bzrlib/chk_map.py	2009-07-01 20:12:16 +0000
@@ -1411,15 +1411,14 @@
         self._interesting_root_keys = interesting_root_keys
         self._uninteresting_root_keys = uninteresting_root_keys
         self._pb = pb
-        # Note: It would be possible to be smarter about
-        #       all_uninteresting_chks. Instead of having one giant set, we
-        #       could have sets based on the possible prefixes that could reach
-        #       that key. However, there isn't much to be gained by splitting
-        #       it out, yet.
+        # All uninteresting chks that we have seen. By the time they are added
+        # here, they should be either fully ignored, or queued up for
+        # processing
         self._all_uninteresting_chks = set(self._uninteresting_root_keys)
+        # All items that we have seen from the uninteresting_root_keys
         self._all_uninteresting_items = set()
         # These are interesting items which were either read, or already in the
-        # interesting queue (so don't add these refs again)
+        # interesting queue (so we don't need to walk them again)
         self._processed_interesting_refs = set()
         self._search_key_func = search_key_func
 
@@ -1432,8 +1431,11 @@
         self._state = None
 
     def _read_nodes_from_store(self, keys):
-        # TODO: make use of _page_cache, or firmly decide not to because we
-        #       want to use 'record' objects.
+        # We chose not to use _page_cache, because we think in terms of records
+        # to be yielded. Also, we expect to touch each page only 1 time during
+        # this code. (We may want to evaluate saving the raw bytes into the
+        # page cache, which would allow a working tree update after the fetch
+        # to not have to read the bytes again.)
         stream = self._store.get_record_stream(keys, 'unordered', True)
         for record in stream:
             if self._pb is not None:
@@ -1453,24 +1455,9 @@
                 items = node._items.items()
             yield record, node, prefix_refs, items
 
-    def _read_all_roots(self):
-        """Read the root pages.
-
-        This is structured as a generator, so that the root records can be
-        yielded up to whoever needs them without any buffering.
-        """
-        # This is the bootstrap phase
-        if not self._uninteresting_root_keys:
-            # TODO: when there are no _uninteresting_root_keys we can shortcut
-            # a lot of the code
-            self._interesting_queue = list(self._interesting_root_keys)
-            return
-        # Read the uninteresting nodes first, we would like to read them
-        # simultaneously, but that requires buffering the interesting nodes
-        # until all uninteresting ones have been read
+    def _read_uninteresting_roots(self):
         uninteresting_chks_to_enqueue = []
         all_uninteresting_chks = self._all_uninteresting_chks
-        processed_interesting_refs = self._processed_interesting_refs
         for record, node, prefix_refs, items in \
                 self._read_nodes_from_store(self._uninteresting_root_keys):
             # Uninteresting node
@@ -1483,22 +1470,53 @@
             # Don't actually put them in the 'to-read' queue until we have
             # finished checking the interesting references
             uninteresting_chks_to_enqueue.extend(prefix_refs)
+        return uninteresting_chks_to_enqueue
+
+    def _enqueue_uninteresting(self, interesting_prefixes,
+                               uninteresting_chks_to_enqueue):
+        # At this point, we have read all the uninteresting and interesting
+        # items, so we can queue up the uninteresting stuff, knowing that we've
+        # handled the interesting ones
+        for prefix, ref in uninteresting_chks_to_enqueue:
+            not_interesting = True
+            for i in xrange(len(prefix), 0, -1):
+                if prefix[:i] in interesting_prefixes:
+                    not_interesting = False
+                    break
+            if not_interesting:
+                # This prefix is not part of the remaining 'interesting set'
+                continue
+            self._uninteresting_queue.append(ref)
+
+    def _read_all_roots(self):
+        """Read the root pages.
+
+        This is structured as a generator, so that the root records can be
+        yielded up to whoever needs them without any buffering.
+        """
+        # This is the bootstrap phase
+        if not self._uninteresting_root_keys:
+            # With no uninteresting_root_keys we can just shortcut and be ready
+            # for _flush_interesting_queue
+            self._interesting_queue = list(self._interesting_root_keys)
+            return
+        uninteresting_chks_to_enqueue = self._read_uninteresting_roots()
         # filter out any root keys that are already known to be uninteresting
         interesting_keys = set(self._interesting_root_keys).difference(
-                                all_uninteresting_chks)
-        # These references are common to *all* interesting nodes, and thus we
-        # know that we have perfect overlap with uninteresting, without queue
-        # up any of them
+                                self._all_uninteresting_chks)
+        # These are prefixes that are present in interesting_keys that we are
+        # thinking to yield
         interesting_prefixes = set()
         # We are about to yield all of these, so we don't want them getting
         # added a second time
+        processed_interesting_refs = self._processed_interesting_refs
         processed_interesting_refs.update(interesting_keys)
         for record, node, prefix_refs, items in \
                 self._read_nodes_from_store(interesting_keys):
             # At this level, we now know all the uninteresting references
             # So we filter and queue up whatever is remaining
             prefix_refs = [p_r for p_r in prefix_refs
-                           if p_r[1] not in all_uninteresting_chks
+                           if p_r[1] not in self._all_uninteresting_chks
                               and p_r[1] not in processed_interesting_refs]
             refs = [p_r[1] for p_r in prefix_refs]
             interesting_prefixes.update([p_r[0] for p_r in prefix_refs])
@@ -1521,20 +1539,8 @@
         for prefix in list(interesting_prefixes):
             interesting_prefixes.update([prefix[:i]
                                          for i in xrange(1, len(prefix))])
-
-        # At this point, we have read all the uninteresting and interesting
-        # items, so we can queue up the uninteresting stuff, knowing that we've
-        # handled the interesting ones
-        for prefix, ref in uninteresting_chks_to_enqueue:
-            not_interesting = True
-            for i in xrange(len(prefix), 0, -1):
-                if prefix[:i] in interesting_prefixes:
-                    not_interesting = False
-                    break
-            if not_interesting:
-                # This prefix is not part of the remaining 'interesting set'
-                continue
-            self._uninteresting_queue.append(ref)
+        self._enqueue_uninteresting(interesting_prefixes,
+                                    uninteresting_chks_to_enqueue)
 
     def _flush_interesting_queue(self):
         # No need to maintain the heap invariant anymore, just pull things out
@@ -1547,6 +1553,7 @@
         all_uninteresting_items = self._all_uninteresting_items
         interesting_items = [item for item in self._interesting_item_queue
                                    if item not in all_uninteresting_items]
+        self._interesting_item_queue = []
         if interesting_items:
             yield None, interesting_items
         refs = refs.difference(all_uninteresting_chks)
@@ -1555,26 +1562,19 @@
             next_refs_update = next_refs.update
             # Inlining _read_nodes_from_store improves 'bzr branch bzr.dev'
             # from 1m54s to 1m51s. Consider it.
-            for record, _, prefix_refs, items in \
-                    self._read_nodes_from_store(refs):
+            for record, _, p_refs, items in self._read_nodes_from_store(refs):
                 items = [item for item in items
                          if item not in all_uninteresting_items]
                 yield record, items
-                next_refs_update([p_r[1] for p_r in prefix_refs])
+                next_refs_update([p_r[1] for p_r in p_refs])
             next_refs = next_refs.difference(all_uninteresting_chks)
             next_refs = next_refs.difference(processed_interesting_refs)
             processed_interesting_refs.update(next_refs)
             refs = next_refs
 
     def _process_next_uninteresting(self):
-        # TODO: We really should be filtering uninteresting requests a bit more
-        #       Specifically, past the root level, we should be able to filter
-        #       out uninteresting nodes that are not referenced by interesting
-        #       items (we *do* currently filter out uninteresting nodes
-        #       referenced from the root.)
-        # For now, since we don't ever abort from loading uninteresting items,
-        # just read the whole thing in at once, so that we get a single
-        # request, instead of multiple
+        # Since we don't filter uninteresting any further than during
+        # _read_all_roots, process the whole queue in a single pass.
         refs = self._uninteresting_queue
         self._uninteresting_queue = []
         all_uninteresting_chks = self._all_uninteresting_chks
@@ -1585,13 +1585,16 @@
             all_uninteresting_chks.update(refs)
 
     def _process_queues(self):
-        # Finish processing all of the items in the queue, for simplicity, we
-        # just do things one node at a time
-        # TODO: We should be able to do a lot more 'in parallel'
         while self._uninteresting_queue:
             self._process_next_uninteresting()
         return self._flush_interesting_queue()
 
+    def process(self):
+        for record in self._read_all_roots():
+            yield record, []
+        for record, items in self._process_queues():
+            yield record, items
+
 
 def iter_interesting_nodes(store, interesting_root_keys,
                            uninteresting_root_keys, pb=None):
@@ -1611,10 +1614,7 @@
                                        uninteresting_root_keys,
                                        search_key_func=store._search_key_func,
                                        pb=pb)
-    for record in iterator._read_all_roots():
-        yield record, []
-    for record, items in iterator._process_queues():
-        yield record, items
+    return iterator.process()
 
 
 try: