Rev 417: Change get_changes to use a bulk approach to grabbing revid => revno. in http://bazaar.launchpad.net/~jameinel/loggerhead/history_db

John Arbash Meinel john at arbash-meinel.com
Mon May 3 21:34:53 BST 2010


At http://bazaar.launchpad.net/~jameinel/loggerhead/history_db

------------------------------------------------------------
revno: 417
revision-id: john at arbash-meinel.com-20100503203439-hmztxy1pa9nw6d7s
parent: john at arbash-meinel.com-20100503175259-tu52g55tgasz46lw
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: history_db
timestamp: Mon 2010-05-03 15:34:39 -0500
message:
  Change get_changes to use a bulk approach to grabbing revid => revno.
  
  Makes /files go from 9+s => 1.2s on first request, cached we are still ~0.5s.
-------------- next part --------------
=== modified file 'loggerhead/history.py'
--- a/loggerhead/history.py	2010-05-03 17:52:59 +0000
+++ b/loggerhead/history.py	2010-05-03 20:34:39 +0000
@@ -209,7 +209,7 @@
             self._lock.release()
 
 
-_revno_revid_cache = lru_cache.LRUCache(10000)
+_raw_revno_revid_cache = lru_cache.LRUCache(10000)
 _revno_revid_lock = threading.RLock()
 
 
@@ -233,6 +233,8 @@
     def get(self, key):
         """Return the data associated with `key`.
         Otherwise return None.
+
+        :param key: Can be a revno_str or a revid.
         """
         self._lock.acquire()
         try:
@@ -241,12 +243,15 @@
             self._lock.release()
         return cached
 
-    def set(self, key, data):
+    def set(self, revid, revno_str):
         """Store `data` under `key`.
         """
         self._lock.acquire()
         try:
-            self._cache[(self._branch_tip, key)] = data
+            # TODO: StaticTuples ? Probably only useful if we cache more than
+            #       10k of them. 100k/1M is probably useful.
+            self._cache[(self._branch_tip, revid)] = revno_str
+            self._cache[(self._branch_tip, revno_str)] = revid
         finally:
             self._lock.release()
 
@@ -288,7 +293,6 @@
         self._inventory_cache = {}
         # Map from (tip_revision, revision_id) => revno_str
         # and from (tip_revisino, revno_str) => revision_id
-        self._revno_revid_cache = RevInfoMemoryCache(whole_history_data_cache)
         self._querier = _get_querier(branch)
         if self._querier is None:
             assert cache_path is not None
@@ -303,10 +307,11 @@
         #       simpler...
         self._querier.ensure_branch_tip()
         self._branch_nick = self._branch.get_config().get_nickname()
+        self._show_merge_points = show_merge_points
         self.log = logging.getLogger('loggerhead.%s' % (self._branch_nick,))
 
         self.last_revid = branch.last_revision()
-        self._revno_revid_cache = RevnoRevidMemoryCache(_revno_revid_cache,
+        self._revno_revid_cache = RevnoRevidMemoryCache(_raw_revno_revid_cache,
             _revno_revid_lock, self._branch.last_revision())
 
     @property
@@ -322,31 +327,46 @@
         revno_str = self._revno_revid_cache.get(revid)
         if revno_str is not None:
             return revno_str
-        try:
-            revnos = self._querier.get_dotted_revno_range_multi([revid])
-            dotted_revno = revnos[revid]
-        except: # ???
-            import pdb; pdb.set_trace()
-            import sys
-            e = sys.exc_info()
-            return 'unknown'
+        revnos = self._querier.get_dotted_revno_range_multi([revid])
+        # TODO: Should probably handle KeyError?
+        dotted_revno = revnos[revid]
         revno_str = '.'.join(map(str, dotted_revno))
-        self._revno_revid_cache.set(revno_str, revid)
         self._revno_revid_cache.set(revid, revno_str)
         return revno_str
 
+    def get_revnos(self, revids):
+        """Get a map of revid => revno for all revisions."""
+        revno_map = {}
+        unknown = []
+        for revid in revids:
+            if revid is None:
+                revno_map[revid] = 'unknown'
+                continue
+            revno_str = self._revno_revid_cache.get(revid)
+            if revno_str is not None:
+                revno_map[revid] = revno_str
+                continue
+            unknown.append(revid)
+        if not unknown:
+            return revno_map
+        # querier returns dotted revno tuples
+        query_revno_map = self._querier.get_dotted_revno_range_multi(
+                            unknown)
+        for revid, dotted_revno in query_revno_map.iteritems():
+            revno_str = '.'.join(map(str, dotted_revno))
+            self._revno_revid_cache.set(revid, revno_str)
+            revno_map[revid] = revno_str
+        return revno_map
+
     def get_revid_for_revno(self, revno_str):
-        # TODO: Create a memory cache, doing bi-directional mapping, possibly
-        #       persisting between HTTP requests.
-        rev_id = self._revno_revid_cache.get(revno_str)
-        if rev_id is not None:
-            return rev_id
+        revid = self._revno_revid_cache.get(revno_str)
+        if revid is not None:
+            return revid
         dotted_revno = tuple(map(int, revno_str.split('.')))
         revnos = self._querier.get_revision_ids([dotted_revno])
         revnos = dict([('.'.join(map(str, drn)), ri)
                        for drn, ri in revnos.iteritems()])
         for revno_str, revid in revnos:
-            self._revno_revid_cache.set(revno_str, revid)
             self._revno_revid_cache.set(revid, revno_str)
         return revnos[revno_str]
 
@@ -385,8 +405,8 @@
             #       grabs the full history, and we now support stopping early.
             history = self._branch.repository.iter_reverse_revision_history(
                             start_revid)
-            for rev_id in history:
-                yield rev_id
+            for revid in history:
+                yield revid
             return
         revid_set = set(revid_list)
 
@@ -611,21 +631,30 @@
         if len(changes) == 0:
             return changes
 
-        # some data needs to be recalculated each time, because it may
-        # change as new revisions are added.
+        needed_revnos = set()
         for change in changes:
+            needed_revnos.add(change.revid)
+            needed_revnos.update([p_id for p_id in change.parents])
+        revno_map = self.get_revnos(needed_revnos)
+
+        def merge_points_callback(a_change, attr):
             merge_revids = self.simplify_merge_point_list(
-                               self.get_merge_point_list(change.revid))
-            change.merge_points = [
-                util.Container(revid=r,
-                revno=self.get_revno(r)) for r in merge_revids]
+                               self.get_merge_point_list(a_change.revid))
+            if not merge_revids:
+                return []
+            revno_map = self.get_revnos(merge_revids)
+            return [util.Container(revid=r, revno=revno_map[r])
+                    for r in merge_revids]
+        parity = 0
+        for change in changes:
+            if self._show_merge_points:
+                change._set_property('merge_points', merge_points_callback)
+            else:
+                change.merge_points = []
             if len(change.parents) > 0:
-                change.parents = [util.Container(revid=r,
-                    revno=self.get_revno(r)) for r in change.parents]
-            change.revno = self.get_revno(change.revid)
-
-        parity = 0
-        for change in changes:
+                change.parents = [util.Container(revid=r, revno=revno_map[r])
+                                  for r in change.parents]
+            change.revno = revno_map[change.revid]
             change.parity = parity
             parity ^= 1
 
@@ -635,7 +664,7 @@
         # FIXME: deprecated method in getting a null revision
         revid_list = filter(lambda revid: not bzrlib.revision.is_null(revid),
                             revid_list)
-        parent_map = self._branch.repository.get_graph().get_parent_map(
+        parent_map = self._branch.repository.get_parent_map(
                          revid_list)
         # We need to return the answer in the same order as the input,
         # less any ghosts.



More information about the bazaar-commits mailing list