Rev 416: Create a dotted revno cache, just as its own entity. in http://bazaar.launchpad.net/~jameinel/loggerhead/history_db
John Arbash Meinel
john at arbash-meinel.com
Mon May 3 18:53:15 BST 2010
At http://bazaar.launchpad.net/~jameinel/loggerhead/history_db
------------------------------------------------------------
revno: 416
revision-id: john at arbash-meinel.com-20100503175259-tu52g55tgasz46lw
parent: john at arbash-meinel.com-20100430224137-sijt3o96cms5n1jv
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: history_db
timestamp: Mon 2010-05-03 12:52:59 -0500
message:
Create a dotted revno cache, just as its own entity.
-------------- next part --------------
=== modified file 'loggerhead/history.py'
--- a/loggerhead/history.py 2010-04-30 22:41:37 +0000
+++ b/loggerhead/history.py 2010-05-03 17:52:59 +0000
@@ -36,6 +36,7 @@
import textwrap
import threading
+from bzrlib import lru_cache
import bzrlib.branch
import bzrlib.delta
import bzrlib.errors
@@ -207,6 +208,48 @@
finally:
self._lock.release()
+
+_revno_revid_cache = lru_cache.LRUCache(10000)
+_revno_revid_lock = threading.RLock()
+
+
+class RevnoRevidMemoryCache(object):
+ """A store that maps revnos to revids based on the branch it is in.
+ """
+
+ def __init__(self, cache, lock, branch_tip):
+ # Note: what we'd really like is something that knew how long it takes
+ # to produce a revno * how often it is accessed. Since some revnos
+ # take 100x longer to produce than others. Could we cheat and just loop
+ # on __getitem__ ?
+ # There are also other possible layouts. A per-branch cache, with an
+ # LRU around the whole thing, etc. I chose this for simplicity.
+ self._branch_tip = branch_tip
+ self._cache = cache
+ # lru_cache is not thread-safe, so we need to lock all accesses.
+ # It is even modified when doing a get() on it.
+ self._lock = lock
+
+ def get(self, key):
+ """Return the data associated with `key`.
+ Otherwise return None.
+ """
+ self._lock.acquire()
+ try:
+ cached = self._cache.get((self._branch_tip, key))
+ finally:
+ self._lock.release()
+ return cached
+
+ def set(self, key, data):
+ """Store `data` under `key`.
+ """
+ self._lock.acquire()
+ try:
+ self._cache[(self._branch_tip, key)] = data
+ finally:
+ self._lock.release()
+
# Used to store locks that prevent multiple threads from building a
# revision graph for the same branch at the same time, because that can
# cause severe performance issues that are so bad that the system seems
@@ -243,11 +286,9 @@
self._branch = branch
self._branch_tags = None
self._inventory_cache = {}
- # TODO: These could all be cached globally in a thread-safe LRUCache
- # which then used (tip_revision, revid) or (tip_revision, revno)
- # as the key.
- self._revno_revid_cache = {}
- self._revid_revno_cache = {}
+ # Map from (tip_revision, revision_id) => revno_str
+ # and from (tip_revisino, revno_str) => revision_id
+ self._revno_revid_cache = RevInfoMemoryCache(whole_history_data_cache)
self._querier = _get_querier(branch)
if self._querier is None:
assert cache_path is not None
@@ -265,6 +306,8 @@
self.log = logging.getLogger('loggerhead.%s' % (self._branch_nick,))
self.last_revid = branch.last_revision()
+ self._revno_revid_cache = RevnoRevidMemoryCache(_revno_revid_cache,
+ _revno_revid_lock, self._branch.last_revision())
@property
def has_revisions(self):
@@ -276,8 +319,9 @@
def get_revno(self, revid):
if revid is None:
return 'unknown'
- if revid in self._revid_revno_cache:
- return self._revid_revno_cache[revid]
+ revno_str = self._revno_revid_cache.get(revid)
+ if revno_str is not None:
+ return revno_str
try:
revnos = self._querier.get_dotted_revno_range_multi([revid])
dotted_revno = revnos[revid]
@@ -287,21 +331,23 @@
e = sys.exc_info()
return 'unknown'
revno_str = '.'.join(map(str, dotted_revno))
- self._revno_revid_cache[revno_str] = revid
- self._revid_revno_cache[revid] = revno_str
+ self._revno_revid_cache.set(revno_str, revid)
+ self._revno_revid_cache.set(revid, revno_str)
return revno_str
def get_revid_for_revno(self, revno_str):
# TODO: Create a memory cache, doing bi-directional mapping, possibly
# persisting between HTTP requests.
- if revno_str in self._revno_revid_cache:
- return self._revno_revid_cache[revno_str]
+ rev_id = self._revno_revid_cache.get(revno_str)
+ if rev_id is not None:
+ return rev_id
dotted_revno = tuple(map(int, revno_str.split('.')))
revnos = self._querier.get_revision_ids([dotted_revno])
- revnos = dict([('.'.join(map(str, drn)), ri) for drn, ri in revnos])
- self._revno_revid_cache.update(revnos)
- self._revid_revno_cache.update(
- [(ri, rn) for rn, ri in revnos.iteritems()])
+ revnos = dict([('.'.join(map(str, drn)), ri)
+ for drn, ri in revnos.iteritems()])
+ for revno_str, revid in revnos:
+ self._revno_revid_cache.set(revno_str, revid)
+ self._revno_revid_cache.set(revid, revno_str)
return revnos[revno_str]
def _get_lh_parent(self, revid):
More information about the bazaar-commits
mailing list