Rev 3720: Switch back to using a single dict. in http://bzr.arbash-meinel.com/branches/bzr/1.8-dev/lighter_log_file
John Arbash Meinel
john at arbash-meinel.com
Thu Sep 18 20:39:47 BST 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.8-dev/lighter_log_file
------------------------------------------------------------
revno: 3720
revision-id: john at arbash-meinel.com-20080918193947-0nbfx6f2yu3zvi3k
parent: john at arbash-meinel.com-20080918193505-d1jihbz38y6ooty3
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: lighter_log_file
timestamp: Thu 2008-09-18 14:39:47 -0500
message:
Switch back to using a single dict.
The algorithm is simpler, and though I rember differently, the
numbers show that it performs identically.
-------------- next part --------------
=== modified file 'bzrlib/log.py'
--- a/bzrlib/log.py 2008-09-18 19:35:05 +0000
+++ b/bzrlib/log.py 2008-09-18 19:39:47 +0000
@@ -600,44 +600,36 @@
# ancestry_values contains a pointer from a revision_id to either a tuple,
# or a frozenset() of a given per-file ancestry.
ancestry_values = {_mod_revision.NULL_REVISION: frozenset()}
- # ancestry_pointers maps from a whole-tree revision_id into the
- # ancestry_values dictionary for the given revision. (So given a whole-tree
- # revision_id, what is the per-file ancestry for that revision.)
- ancestry_pointers = {}
_hits = [0, 0, 0, 0, 0, 0, 0]
for rev in sorted_rev_list:
parents = parent_map[rev]
- rev_pointer = None
rev_ancestry = None
if rev in modified_text_revisions:
# We know this will be a new node
rev_ancestry = [rev]
- rev_pointer = rev
for parent in parents:
- if parent not in ancestry_pointers:
+ if parent not in ancestry_values:
# parent is a Ghost, which won't be present in
# sorted_rev_list, but we may access it later, so create an
# empty node for it
- ancestry_pointers[parent] = _mod_revision.NULL_REVISION
- if rev_pointer is None:
+ ancestry_values[parent] = frozenset()
+ if rev_ancestry is None:
# We don't have anything worked out yet, so just copy this
# parent's ancestry_pointer
- rev_pointer = ancestry_pointers[parent]
+ rev_ancestry = ancestry_values[parent]
else:
# Check to see if this other parent introduces any new
# revisions. If it doesn't then we don't need to create a new
# set.
- parent_pointer = ancestry_pointers[parent]
- if parent_pointer == rev_pointer:
+ parent_ancestry = ancestry_values[parent]
+ if rev_ancestry is None:
+ rev_ancestry = ancestry_values[rev_pointer]
+ if parent_ancestry is rev_ancestry:
# They both point to the same ancestry value, so we know
# there is nothing new
_hits[5] += 1
continue
- if rev_ancestry is None:
- rev_ancestry = ancestry_values[rev_pointer]
- parent_ancestry = frozenset(ancestry_values[parent_pointer])
- # We had to compute the frozenset, so just cache it
- # ancestry_values[parent_pointer] = parent_ancestry
+ parent_ancestry = frozenset(parent_ancestry)
new_revisions = parent_ancestry.difference(rev_ancestry)
if new_revisions:
# We need something that we can modify. We can use a simple
@@ -647,20 +639,13 @@
_hits[6] += 1
rev_ancestry = list(rev_ancestry)
rev_ancestry.extend(new_revisions)
- # We will be creating a new value
- rev_pointer = rev
- if rev_pointer is None:
- rev_pointer = _mod_revision.NULL_REVISION
- elif rev_pointer == rev:
- # We are creating a new node update the value cache.
- assert rev_ancestry is not None
- # we use 'tuple()' here to save memory (at the cost of re-computing
- # the ancestry set on demand). Alternatively, we can use
- # frozenset() because we know that we will need to compute it for
- # 99% of the revisions.
- ancestry_values[rev] = frozenset(rev_ancestry)
- assert rev_pointer in ancestry_values
- ancestry_pointers[rev] = rev_pointer
+ if rev_ancestry is None:
+ rev_ancestry = frozenset()
+ elif isinstance(rev_ancestry, list):
+ # We can use "tuple()" here to save memory at the cost of CPU, or
+ # use frozenset() which saves CPU but consumes RAM.
+ rev_ancestry = frozenset(rev_ancestry)
+ ancestry_values[rev] = rev_ancestry
trev_ancestry = time.time()
@@ -670,26 +655,22 @@
if len(parents) > 1:
_hits[1] += 1
leftparent = parents[0]
- left_pointer = ancestry_pointers[leftparent]
- left_ancestry = None
+ left_ancestry = ancestry_values[leftparent]
for rightparent in parents[1:]:
- right_pointer = ancestry_pointers[rightparent]
- if right_pointer != left_pointer:
- _hits[3] += 1
- if left_ancestry is None:
- left_ancestry = ancestry_values[left_pointer]
- left_ancestry = frozenset(left_ancestry)
- right_ancestry = ancestry_values[right_pointer]
- if not left_ancestry.issuperset(right_ancestry):
- _hits[4] += 1
- return True
- else:
+ right_ancestry = ancestry_values[rightparent]
+ if left_ancestry is right_ancestry:
_hits[2] += 1
+ continue
+ _hits[3] += 1
+ left_ancestry = frozenset(left_ancestry)
+ if not left_ancestry.issuperset(right_ancestry):
+ _hits[4] += 1
+ return True
return False
trace.note('Found %s nodes, and %s unique values for %s view_revs'
' modified_text_revisions: %s',
- len(ancestry_pointers), len(ancestry_values),
+ 0, len(ancestry_values),
len(view_revs_iter), len(modified_text_revisions))
# filter from the view the revisions that did not change or merge
More information about the bazaar-commits
mailing list