Rev 418: Bring in the updated history-db changes. in http://bazaar.launchpad.net/~jameinel/loggerhead/integration
John Arbash Meinel
john at arbash-meinel.com
Mon May 3 21:43:10 BST 2010
At http://bazaar.launchpad.net/~jameinel/loggerhead/integration
------------------------------------------------------------
revno: 418 [merge]
revision-id: john at arbash-meinel.com-20100503204255-xqqrtvdt33k3v768
parent: john at arbash-meinel.com-20100430222947-dmvjm1w2ve317p7q
parent: john at arbash-meinel.com-20100503203439-hmztxy1pa9nw6d7s
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: integration
timestamp: Mon 2010-05-03 15:42:55 -0500
message:
Bring in the updated history-db changes.
modified:
loggerhead/history.py history.py-20061211064342-102iqirsciyvgtcf-5
-------------- next part --------------
=== modified file 'loggerhead/history.py'
--- a/loggerhead/history.py 2010-04-30 22:29:47 +0000
+++ b/loggerhead/history.py 2010-05-03 20:42:55 +0000
@@ -36,6 +36,7 @@
import textwrap
import threading
+from bzrlib import lru_cache
import bzrlib.branch
import bzrlib.delta
import bzrlib.errors
@@ -207,6 +208,53 @@
finally:
self._lock.release()
+
+_raw_revno_revid_cache = lru_cache.LRUCache(10000)
+_revno_revid_lock = threading.RLock()
+
+
+class RevnoRevidMemoryCache(object):
+ """A store that maps revnos to revids based on the branch it is in.
+ """
+
+ def __init__(self, cache, lock, branch_tip):
+ # Note: what we'd really like is something that knew how long it takes
+ # to produce a revno * how often it is accessed. Since some revnos
+ # take 100x longer to produce than others. Could we cheat and just loop
+ # on __getitem__ ?
+ # There are also other possible layouts. A per-branch cache, with an
+ # LRU around the whole thing, etc. I chose this for simplicity.
+ self._branch_tip = branch_tip
+ self._cache = cache
+ # lru_cache is not thread-safe, so we need to lock all accesses.
+ # It is even modified when doing a get() on it.
+ self._lock = lock
+
+ def get(self, key):
+ """Return the data associated with `key`.
+ Otherwise return None.
+
+ :param key: Can be a revno_str or a revid.
+ """
+ self._lock.acquire()
+ try:
+ cached = self._cache.get((self._branch_tip, key))
+ finally:
+ self._lock.release()
+ return cached
+
+ def set(self, revid, revno_str):
+ """Store `data` under `key`.
+ """
+ self._lock.acquire()
+ try:
+ # TODO: StaticTuples ? Probably only useful if we cache more than
+ # 10k of them. 100k/1M is probably useful.
+ self._cache[(self._branch_tip, revid)] = revno_str
+ self._cache[(self._branch_tip, revno_str)] = revid
+ finally:
+ self._lock.release()
+
# Used to store locks that prevent multiple threads from building a
# revision graph for the same branch at the same time, because that can
# cause severe performance issues that are so bad that the system seems
@@ -245,6 +293,8 @@
self._branch = branch
self._branch_tags = None
self._inventory_cache = {}
+ # Map from (tip_revision, revision_id) => revno_str
+ # and from (tip_revisino, revno_str) => revision_id
self._querier = _get_querier(branch)
if self._querier is None:
assert cache_path is not None
@@ -263,6 +313,8 @@
self.log = logging.getLogger('loggerhead.%s' % (self._branch_nick,))
self.last_revid = branch.last_revision()
+ self._revno_revid_cache = RevnoRevidMemoryCache(_raw_revno_revid_cache,
+ _revno_revid_lock, self._branch.last_revision())
@property
def has_revisions(self):
@@ -274,20 +326,51 @@
def get_revno(self, revid):
if revid is None:
return 'unknown'
- try:
- revnos = self._querier.get_dotted_revno_range_multi([revid])
- dotted_revno = revnos[revid]
- except: # ???
- import pdb; pdb.set_trace()
- import sys
- e = sys.exc_info()
- return 'unknown'
- return '.'.join(map(str, dotted_revno))
-
- def get_dotted_to_rev_id(self, dotted_revnos):
- # TODO: Create a memory cache, doing bi-directional mapping, possibly
- # persisting between HTTP requests.
- return self._querier.get_revision_ids(dotted_revnos)
+ revno_str = self._revno_revid_cache.get(revid)
+ if revno_str is not None:
+ return revno_str
+ revnos = self._querier.get_dotted_revno_range_multi([revid])
+ # TODO: Should probably handle KeyError?
+ dotted_revno = revnos[revid]
+ revno_str = '.'.join(map(str, dotted_revno))
+ self._revno_revid_cache.set(revid, revno_str)
+ return revno_str
+
+ def get_revnos(self, revids):
+ """Get a map of revid => revno for all revisions."""
+ revno_map = {}
+ unknown = []
+ for revid in revids:
+ if revid is None:
+ revno_map[revid] = 'unknown'
+ continue
+ revno_str = self._revno_revid_cache.get(revid)
+ if revno_str is not None:
+ revno_map[revid] = revno_str
+ continue
+ unknown.append(revid)
+ if not unknown:
+ return revno_map
+ # querier returns dotted revno tuples
+ query_revno_map = self._querier.get_dotted_revno_range_multi(
+ unknown)
+ for revid, dotted_revno in query_revno_map.iteritems():
+ revno_str = '.'.join(map(str, dotted_revno))
+ self._revno_revid_cache.set(revid, revno_str)
+ revno_map[revid] = revno_str
+ return revno_map
+
+ def get_revid_for_revno(self, revno_str):
+ revid = self._revno_revid_cache.get(revno_str)
+ if revid is not None:
+ return revid
+ dotted_revno = tuple(map(int, revno_str.split('.')))
+ revnos = self._querier.get_revision_ids([dotted_revno])
+ revnos = dict([('.'.join(map(str, drn)), ri)
+ for drn, ri in revnos.iteritems()])
+ for revno_str, revid in revnos:
+ self._revno_revid_cache.set(revid, revno_str)
+ return revnos[revno_str]
def _get_lh_parent(self, revid):
"""Get the left-hand parent of a given revision id."""
@@ -324,8 +407,8 @@
# grabs the full history, and we now support stopping early.
history = self._branch.repository.iter_reverse_revision_history(
start_revid)
- for rev_id in history:
- yield rev_id
+ for revid in history:
+ yield revid
return
revid_set = set(revid_list)
@@ -367,8 +450,7 @@
return self.last_revid
try:
if self.revno_re.match(revid):
- revno = tuple(map(int, revid.split('.')))
- val = self.get_dotted_to_rev_id([revno])[revno]
+ val = self.get_revid_for_revno([revid])[revid]
# XXX: Do this more cleanly
if val is None:
raise KeyError
@@ -584,26 +666,31 @@
if len(changes) == 0:
return changes
- # some data needs to be recalculated each time, because it may
- # change as new revisions are added.
+ needed_revnos = set()
+ for change in changes:
+ needed_revnos.add(change.revid)
+ needed_revnos.update([p_id for p_id in change.parents])
+ revno_map = self.get_revnos(needed_revnos)
+
def merge_points_callback(a_change, attr):
merge_revids = self.simplify_merge_point_list(
self.get_merge_point_list(a_change.revid))
- return [util.Container(revid=r, revno=self.get_revno(r))
+ if not merge_revids:
+ return []
+ revno_map = self.get_revnos(merge_revids)
+ return [util.Container(revid=r, revno=revno_map[r])
for r in merge_revids]
+ parity = 0
for change in changes:
if self._show_merge_points:
change._set_property('merge_points', merge_points_callback)
else:
change.merge_points = []
if len(change.parents) > 0:
- change.parents = [util.Container(revid=r,
- revno=self.get_revno(r)) for r in change.parents]
- change.revno = self.get_revno(change.revid)
-
- parity = 0
- for change in changes:
+ change.parents = [util.Container(revid=r, revno=revno_map[r])
+ for r in change.parents]
+ change.revno = revno_map[change.revid]
change.parity = parity
parity ^= 1
@@ -613,7 +700,7 @@
# FIXME: deprecated method in getting a null revision
revid_list = filter(lambda revid: not bzrlib.revision.is_null(revid),
revid_list)
- parent_map = self._branch.repository.get_graph().get_parent_map(
+ parent_map = self._branch.repository.get_parent_map(
revid_list)
# We need to return the answer in the same order as the input,
# less any ghosts.
More information about the bazaar-commits
mailing list