Rev 6030: Small tweaks to search performance, though still at depth=100 the primary time in http://bazaar.launchpad.net/~jameinel/bzr/2.4-too-much-walking-388269
John Arbash Meinel
john at arbash-meinel.com
Fri Aug 12 12:12:54 UTC 2011
At http://bazaar.launchpad.net/~jameinel/bzr/2.4-too-much-walking-388269
------------------------------------------------------------
revno: 6030
revision-id: john at arbash-meinel.com-20110812121235-25016ifvrftzt8qm
parent: john at arbash-meinel.com-20110812083925-lnz0pz1auiwgstcv
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.4-too-much-walking-388269
timestamp: Fri 2011-08-12 14:12:35 +0200
message:
Small tweaks to search performance, though still at depth=100 the primary time
is spent on the server re-walking the history.
-------------- next part --------------
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py 2011-08-12 08:39:25 +0000
+++ b/bzrlib/graph.py 2011-08-12 12:12:35 +0000
@@ -61,7 +61,7 @@
def get_parent_map(self, keys):
"""See StackedParentsProvider.get_parent_map"""
ancestry = self.ancestry
- return dict((k, ancestry[k]) for k in keys if k in ancestry)
+ return dict([(k, ancestry[k]) for k in keys if k in ancestry])
class StackedParentsProvider(object):
@@ -1926,7 +1926,6 @@
# Any given parent is likely to have only a small handful
# of children, many will have only one. So we avoid mem overhead of
# a list, in exchange for extra copying of tuples
- # TODO: Use static_tuple
if p not in child_map:
child_map[p] = (child,)
else:
@@ -1935,27 +1934,35 @@
def _find_possible_heads(parent_map, tip_keys, depth):
- """Walk backwards through the parent_map, finding possible heads."""
+ """Walk backwards (towards children) through the parent_map.
+
+ This finds 'heads' that will hopefully succinctly describe our search
+ graph.
+ """
child_map = invert_parent_map(parent_map)
heads = set()
- current_roots = set(tip_keys)
+ current_roots = tip_keys
walked = set(current_roots)
- while current_roots and depth:
+ while current_roots and depth > 0:
depth -= 1
children = set()
+ children_update = children.update
for p in current_roots:
# Is it better to pre- or post- filter the children?
try:
- children.update(child_map[p])
+ children_update(child_map[p])
except KeyError:
heads.add(p)
- # TODO: Use the right set notation here since 'walked' grows large,
- # while 'children' should usually be small
- # Don't walk keys we've already walked
- children.difference_update(walked)
+ # If we've seen a key before, we don't want to walk it again. Note that
+ # 'children' stays relatively small while 'walked' grows large. So
+ # don't use 'difference_update' here which has to walk all of 'walked'.
+ # '.difference' is smart enough to walk only children and compare it to
+ # walked.
+ children = children.difference(walked)
walked.update(children)
current_roots = children
if current_roots:
+ # We walked to the end of depth, so these are the new tips.
heads.update(current_roots)
return heads
=== modified file 'bzrlib/vf_repository.py'
--- a/bzrlib/vf_repository.py 2011-08-12 08:39:25 +0000
+++ b/bzrlib/vf_repository.py 2011-08-12 12:12:35 +0000
@@ -16,6 +16,7 @@
"""Repository formats built around versioned files."""
+import sys
import time
from bzrlib.lazy_import import lazy_import
More information about the bazaar-commits
mailing list