Rev 74: Caching the mainline info as python objects. in http://bzr.arbash-meinel.com/plugins/history_db
John Arbash Meinel
john at arbash-meinel.com
Fri Apr 9 19:37:10 BST 2010
At http://bzr.arbash-meinel.com/plugins/history_db
------------------------------------------------------------
revno: 74
revision-id: john at arbash-meinel.com-20100409183653-wodbju2xhh3eq90s
parent: john at arbash-meinel.com-20100409182525-uuwulalrud5wsxc4
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: history_db
timestamp: Fri 2010-04-09 13:36:53 -0500
message:
Caching the mainline info as python objects.
Increases memory slightly, but speeds things up significantly (2m21s).
-------------- next part --------------
=== modified file 'history_db.py'
--- a/history_db.py 2010-04-09 18:25:25 +0000
+++ b/history_db.py 2010-04-09 18:36:53 +0000
@@ -96,6 +96,7 @@
self._db_id_to_rev_id = {}
self._stats = defaultdict(lambda: 0)
# A cache of entries in the dotted_revno table
+ # TODO: This would probably be better as an LRU cache
self._dotted_revno_cache = {}
# Map child_id => [parent_db_ids]
self._db_parent_map = {}
@@ -137,17 +138,25 @@
# Not importing anything because the data is already present
return False
self._stats['total_nodes_inserted'] += len(nodes)
- tip_db_id = self._rev_id_to_db_id[tip_rev_id]
+ rev_to_db = self._rev_id_to_db_id
+ tip_db_id = rev_to_db[tip_rev_id]
revno_entries = []
+ to_cache_entry = []
+ st = static_tuple.StaticTuple
for node in nodes:
# TODO: Do we need to track the 'end_of_merge' and 'merge_depth'
# fields?
+ db_id = rev_to_db[node.key[0]]
revno_entries.append((tip_db_id,
- self._rev_id_to_db_id[node.key[0]],
+ db_id,
'.'.join(map(str, node.revno)),
node.end_of_merge,
node.merge_depth))
+ to_cache_entry.append(st(db_id, st(st.from_sequence(node.revno),
+ node.end_of_merge,
+ node.merge_depth)))
schema.create_dotted_revnos(self._cursor, revno_entries)
+ self._dotted_revno_cache[tip_db_id] = to_cache_entry
return True
def _update_parents(self, nodes):
@@ -554,8 +563,8 @@
# Information from the dotted_revno table for revisions that are in the
# already-imported mainline.
self._imported_dotted_revno = {}
- # Map from (dotted,revno,) => db_id
- self._dotted_to_db_id = {}
+ # What dotted revnos have been loaded
+ self._known_dotted = set()
# This is the gdfo of the current mainline revision search tip. This is
# the threshold such that
self._imported_gdfo = None
@@ -736,14 +745,23 @@
def _step_mainline(self):
"""Move the mainline pointer by one, updating the data."""
self._stats['step mainline'] += 1
- res = self._cursor.execute(
- "SELECT merged_revision, revno, end_of_merge, merge_depth"
- " FROM dotted_revno WHERE tip_revision = ?",
- [self._imported_mainline_id]).fetchall()
- stuple = static_tuple.StaticTuple.from_sequence
- st = static_tuple.StaticTuple
- dotted_info = [(r[0], st(stuple(map(int, r[1].split('.'))), r[2], r[3]))
- for r in res]
+ if self._imported_mainline_id in self._importer._dotted_revno_cache:
+ self._stats['step mainline cached'] += 1
+ dotted_info = self._importer._dotted_revno_cache[
+ self._imported_mainline_id]
+ else:
+ res = self._cursor.execute(
+ "SELECT merged_revision, revno, end_of_merge, merge_depth"
+ " FROM dotted_revno WHERE tip_revision = ?",
+ [self._imported_mainline_id]).fetchall()
+ stuple = static_tuple.StaticTuple.from_sequence
+ st = static_tuple.StaticTuple
+ dotted_info = [st(r[0], st(stuple(map(int, r[1].split('.'))),
+ r[2], r[3]))
+ for r in res]
+ self._stats['step mainline cache missed'] += 1
+ self._importer._dotted_revno_cache[self._imported_mainline_id] = \
+ dotted_info
self._stats['step mainline added'] += len(dotted_info)
self._update_info_from_dotted_revno(dotted_info)
# TODO: We could remove search tips that show up as newly merged
@@ -856,7 +874,7 @@
# TODO: We can move this iterator into a parameter, and have it
# continuously updated from _step_mainline()
self._imported_dotted_revno.update(dotted_info)
- self._dotted_to_db_id.update([(i[1][0], i[0]) for i in dotted_info])
+ self._known_dotted.update([i[1][0] for i in dotted_info])
for db_id, (revno, eom, depth) in dotted_info:
if len(revno) > 1: # dotted revno, make sure branch count is right
base_revno = revno[0]
@@ -967,7 +985,7 @@
"""
self._stats['step to latest'] += 1
while self._imported_mainline_id is not None:
- if (base_revno,) in self._dotted_to_db_id:
+ if (base_revno,) in self._known_dotted:
# We have walked far enough to load the original revision,
# which means we've loaded all children.
self._stats['step to latest found base'] += 1
@@ -978,7 +996,7 @@
root_of_branch_revno = (base_revno, branch_count, 1)
# Note: if branch_count == 0, that means we haven't seen any
# other branches for this revision.
- if root_of_branch_revno in self._dotted_to_db_id:
+ if root_of_branch_revno in self._known_dotted:
break
self._stats['step mainline to-latest'] += 1
if base_revno == 0:
More information about the bazaar-commits
mailing list