Rev 5675: Add flags for enabling --delta computation. in http://bazaar.launchpad.net/~jameinel/bzr/2.4-log-subdir

John Arbash Meinel john at arbash-meinel.com
Tue Feb 22 22:06:34 UTC 2011


At http://bazaar.launchpad.net/~jameinel/bzr/2.4-log-subdir

------------------------------------------------------------
revno: 5675
revision-id: john at arbash-meinel.com-20110222220628-vnvresjvzvt0i9mx
parent: john at arbash-meinel.com-20110222210114-1h9rkncrxpyt67p5
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.4-log-subdir
timestamp: Tue 2011-02-22 16:06:28 -0600
message:
  Add flags for enabling --delta computation.
  
  Something isn't 100% correct, because post-filter we end up with different counts.
  The performance is pretty darn good, though.
  [--filter] Took 4.184s for 100 revisions (610 changes, 88005 file_ids) and path: bzrlib
  vs
  Took 0.702s for 100 revisions (599 changes, 88005 file_ids) and path: bzrlib
  vs
  [--filter] Took 3.268s for 100 revisions (0 changes, 88005 file_ids) and path: bzrlib
  and
  Took 0.288s for 100 revisions (0 changes, 88005 file_ids) and path: bzrlib
  Note also that iter_changes is faster in the chk form, than the pre-filtered form.
  Not huge, but 0.7-0.28 = 0.414, vs 4.184-3.268=0.916.
-------------- next part --------------
=== modified file 'time_delta_search.py'
--- a/time_delta_search.py	2011-02-22 21:01:14 +0000
+++ b/time_delta_search.py	2011-02-22 22:06:28 +0000
@@ -1,6 +1,6 @@
 import optparse
 import time
-from bzrlib import branch, initialize, trace, ui
+from bzrlib import branch, initialize, trace, ui, revisiontree
 
 p = optparse.OptionParser()
 p.add_option('--path', type='str', default='bzrlib', help='Set the search path')
@@ -12,6 +12,8 @@
              help='Show the count of file_ids per revision.')
 p.add_option('--no-cache', dest='cache', action='store_false', default=True,
              help='Disable caching based on pid_map chk')
+p.add_option('--delta', action='store_true', default=False,
+             help='Compute the delta for each revision.')
 opts, args = p.parse_args()
 
 with initialize():
@@ -23,17 +25,31 @@
     count = 0
     total_count = 0
     pb = ui.ui_factory.nested_progress_bar()
+    new_tree = None
+    change_count = 0
     for idx, rev_id in enumerate(
             b.repository.iter_reverse_revision_history(b.last_revision())):
         tree = b.repository.revision_tree(rev_id)
         if file_id is None:
             file_id = tree.path2id(opts.path)
         if opts.filter:
-            file_ids = tree.inventory.filter([file_id])._byid.keys()
+            filtered_inv = tree.inventory.filter([file_id])
+            file_ids = filtered_inv._byid.keys()
+            filtered_tree = revisiontree.RevisionTree(b.repository,
+                                                      filtered_inv, rev_id)
+            if opts.delta and new_tree is not None:
+                changes = list(new_tree.iter_changes(filtered_tree))
+                change_count += len(changes)
+            new_tree = filtered_tree
         else:
             file_ids = search._expand_file_ids(tree, [file_id])
             if not opts.cache:
                 search = b.repository.get_delta_searcher()
+            if opts.delta and new_tree is not None:
+                changes = new_tree.iter_changes(tree)
+                filtered_changes = [r for r in changes if r[0] in file_ids]
+                change_count += len(filtered_changes)
+            new_tree = tree
         if opts.show_count:
             print rev_id, len(file_ids)
         count += 1
@@ -43,6 +59,5 @@
             break
     tend = time.time()
 
-    trace.note('Found total %d file_ids' % (total_count,))
-    trace.note('Took %.3fs for %d revisions and path: %s'
-               % (tend - tstart, count, opts.path))
+    trace.note('Took %.3fs for %d revisions (%d changes, %d file_ids) and path: %s'
+               % (tend - tstart, count, change_count, total_count, opts.path))



More information about the bazaar-commits mailing list