Rev 3244: Reduce the number of cache misses by caching known heads answers in http://bzr.arbash-meinel.com/branches/bzr/1.3-dev/annotate_cleanup

Thu Mar 6 09:20:48 GMT 2008

At http://bzr.arbash-meinel.com/branches/bzr/1.3-dev/annotate_cleanup

------------------------------------------------------------
revno: 3244
revision-id:john at arbash-meinel.com-20080306092048-cgeay6ixt2sepk2w
parent: john at arbash-meinel.com-20080305235303-glnaa7hedza0xcz1
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: annotate_cleanup
timestamp: Thu 2008-03-06 09:20:48 +0000
message:
  Reduce the number of cache misses by caching known heads answers
  (drops requests from 877 => ~600, though doesn't drop speed by a lot)
  Also switch to a class that assumes you won't try to mess with the returned values.
modified:
  bzrlib/annotate.py             annotate.py-20050922133147-7c60541d2614f022
  bzrlib/graph.py                graph_walker.py-20070525030359-y852guab65d4wtn0-1
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
-------------- next part --------------
=== modified file 'bzrlib/annotate.py'

--- a/bzrlib/annotate.py	2008-02-19 23:18:16 +0000
+++ b/bzrlib/annotate.py	2008-03-06 09:20:48 +0000
@@ -285,10 +285,16 @@
                         else:
                             heads = heads_provider.heads((left[0], right[0]))
                             if len(heads) == 1:
-                                lines_append((heads.pop(), left[1]))
+                                lines_append((iter(heads).next(), left[1]))
                             else:
                                 # Both claim different origins
                                 lines_append((new_revision_id, left[1]))
+                                # We know that new_revision_id is the head for
+                                # left and right, so cache it
+                                heads_provider.cache((new_revision_id, left[0]),
+                                                     (new_revision_id,))
+                                heads_provider.cache((new_revision_id, right[0]),
+                                                     (new_revision_id,))
                 last_jj = jj + nn
         last_i = i + n
         last_j = j + n

=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py	2008-02-03 22:55:08 +0000
+++ b/bzrlib/graph.py	2008-03-06 09:20:48 +0000
@@ -473,6 +473,38 @@
             return set(heads)
 
 
+class FrozenHeadsCache(object):
+    """Cache heads() calls, assuming the caller won't modify them."""
+
+    def __init__(self, graph):
+        self.graph = graph
+        self._heads = {}
+
+    def heads(self, keys):
+        """Return the heads of keys.
+
+        This matches the API of Graph.heads(), specifically the return value is
+        a set which can be mutated, and ordering of the input is not preserved
+        in the output.
+
+        :see also: Graph.heads.
+        :param keys: The keys to calculate heads for.
+        :return: A set containing the heads, which may be mutated without
+            affecting future lookups.
+        """
+        keys = frozenset(keys)
+        try:
+            return self._heads[keys]
+        except KeyError:
+            heads = frozenset(self.graph.heads(keys))
+            self._heads[keys] = heads
+            return heads
+
+    def cache(self, keys, heads):
+        """Store a known value."""
+        self._heads[frozenset(keys)] = frozenset(heads)
+
+
 class _BreadthFirstSearcher(object):
     """Parallel search breadth-first the ancestry of revisions.
 

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2008-03-05 23:53:03 +0000
+++ b/bzrlib/knit.py	2008-03-06 09:20:48 +0000
@@ -3085,7 +3085,7 @@
         parent_provider = _mod_graph.DictParentsProvider(
             self._revision_id_graph)
         graph_obj = _mod_graph.Graph(parent_provider)
-        head_cache = _mod_graph.HeadsCache(graph_obj)
+        head_cache = _mod_graph.FrozenHeadsCache(graph_obj)
         self._heads_provider = head_cache
         return head_cache