[RFC][PATCH] Speed improvement on the Weave.join() function
Goffredo Baroncelli
kreijack at alice.it
Sat Dec 10 18:05:29 GMT 2005
Hi all,
the patch below contains an optimization for the function
Wave.join( ), so this function doesn't perform unnecessary content expansion if the
revision to be merged is already present. For example if you have to merge your
last 30 revisions of the bzr.dev main branch you don't have to expand the text
of all the ~2900 revisions present in the inventory weave. The gain is showed in the
example below, where we merge an inventory of 2094 revs with one of 2904
$ cat jointest.py
import sys
from bzrlib.weavefile import read_weave
w1 = read_weave(sys.argv[2], 'rb'))
w2 = read_weave(sys.argv[3], 'rb'))
w1.join(w2)
write_weave(w1,sys.stdout)
$ # with the patch
$ time PYTHONPATH=../bzr-ghigo python jointest.py \
inventory.weave-2094revisions \
inventory.weave-2902revisions >inventory.weave-fast
real 5m33.283s
user 4m31.000s
sys 0m4.008s
$ # withoput the patch
$ time PYTHONPATH=../bzr.dev python jointest.py \
inventory.weave-2094revisions \
inventory.weave-2902revisions >inventory.weave-slow
real 11m36.515s
user 9m56.713s
sys 0m7.032s
$ md5sum inventory.weave-{fast,slow}
69d0568f442f3d6076473b47de81874c inventory.weave-fast
69d0568f442f3d6076473b47de81874c inventory.weave-slow
--
=== modified file 'bzrlib/weave.py'
--- bzrlib/weave.py
+++ bzrlib/weave.py
@@ -739,8 +739,9 @@
'killed-both'), \
state
-
+
def join(self, other):
+ import sys, time
"""Integrate versions from other into this weave.
The resulting weave contains all the history of both weaves;
@@ -756,18 +757,37 @@
# will be ok
# work through in index order to make sure we get all dependencies
for other_idx, name in enumerate(other._names):
- if self._check_version_consistent(other, other_idx, name):
- continue
+ self._check_version_consistent(other, other_idx, name)
+
+ merged = 0
+ processed = 0
+ time0 = time.time( )
for other_idx, name in enumerate(other._names):
- # TODO: If all the parents of the other version are already
+ # TODO: If all the parents of the other version are already
# present then we can avoid some work by just taking the delta
# and adjusting the offsets.
new_parents = self._imported_parents(other, other_idx)
+ sha1 = other._sha1s[other_idx]
+
+ processed += 1
+
+ if name in self._names:
+ idx = self.lookup(name)
+ n1 = map(other.idx_to_name, other._parents[other_idx] )
+ n2 = map(self.idx_to_name, self._parents[other_idx] )
+ if sha1 == self._sha1s[idx] and n1 == n2:
+ continue
+
+ merged += 1
lines = other.get_lines(other_idx)
- sha1 = other._sha1s[other_idx]
self.add(name, new_parents, lines, sha1)
-
+ mutter("merged = %d, processed = %d, file_id=%s; deltat=%d"%(
+ merged,processed,self._weave_name, time.time( )-time0))
+
+
+
+
def _imported_parents(self, other, other_idx):
"""Return list of parents in self corresponding to indexes in other."""
new_parents = []
--
gpg key@ keyserver.linux.it: Goffredo Baroncelli (ghigo) <kreijack at inwind_DOT_it>
Key fingerprint = CE3C 7E01 6782 30A3 5B87 87C0 BB86 505C 6B2A CFF9
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
Url : https://lists.ubuntu.com/archives/bazaar/attachments/20051210/ac7452c0/attachment.pgp
More information about the bazaar
mailing list