Rev 44: Push more into the get_text function, and add another fixture. in http://bzr.arbash-meinel.com/plugins/index2

Wed Jul 16 16:18:15 BST 2008

At http://bzr.arbash-meinel.com/plugins/index2

------------------------------------------------------------
revno: 44
revision-id: john at arbash-meinel.com-20080716151805-h8kzp0e7199ujwbn
parent: john at arbash-meinel.com-20080716150353-alnnb5j6njppqmtv
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: index2
timestamp: Wed 2008-07-16 10:18:05 -0500
message:
  Push more into the get_text function, and add another fixture.
  
  test10 only extracts 10 texts instead of extracting 2000. On my bzr.dev repository
  this shows the advantage of blooms (about 2x faster).
  The problem with 2000 random texts in my bzr.dev is that it reads almost all the
  leaf nodes anyway, so even though the blooms are actively filtering for certain texts
  the leaves get read anyway because of the other texts.
-------------- next part --------------
=== modified file 'indexbench.py'

--- a/indexbench.py	2008-07-16 15:03:53 +0000
+++ b/indexbench.py	2008-07-16 15:18:05 +0000
@@ -52,6 +52,7 @@
         suffix = '.callgrind'
     else:
         suffix = '.txt'
+        stats.sort()
     fname = class_name + '.' + fixture_name + suffix
     stats.save(fname)
     return value
@@ -138,11 +139,17 @@
 # text extraction simulation (follow a compression graph) for text_keys
     if 'text' in fixtures:
         text_names = [name for name in names if name.endswith('.tix')]
-        text_indices = [index for name, index in iter_indices(text_names, target, factory)]
-        text_index = CombinedGraphIndex(text_indices)
-        reset_hit_counts()
-        drop_cache()
-        run(label, 'text', get_text, label, text_index, text_keys)
+        drop_cache()
+        run(label, 'text', get_text, label, text_names, target, factory, text_keys)
+
+# text extraction simulation (follow a compression graph) for a smaller set of
+# keys
+    if 'text10' in fixtures:
+        text_names = [name for name in names if name.endswith('.tix')]
+        drop_cache()
+        run(label, 'text10', get_text, label, text_names, target, factory,
+            text_keys[:10])
+
 # follow a revision graph
     if 'revision' in fixtures:
         test_ancestry = run(label, 'revision', revision_search, label,
@@ -191,7 +198,10 @@
     print "%s: %s in %0.3f,%s" % (label, fixture_label, finish - now, hits())
 
 
-def get_text(label, text_index, text_keys):
+def get_text(label, text_names, target, factory, text_keys):
+        text_indices = [index for name, index in iter_indices(text_names, target, factory)]
+        text_index = CombinedGraphIndex(text_indices)
+        reset_hit_counts()
         now = time.time()
         # VersionedFiles can do multi-key extraction, so we can use that.
         
@@ -275,7 +285,7 @@
             drop_cache=False,
             fixture=None, lspro=False, calltree=True, test_area=None):
         if not fixture:
-            fixture = ['all', 'shuffle', 'text', 'revision', 'miss', 'random_reload']
+            fixture = ['all', 'shuffle', 'text', 'text10', 'revision', 'miss', 'random_reload']
         global use_calltree
         use_calltree = calltree
         from bzrlib.branch import Branch