Rev 2426: Add the ability to cache the split output in http://bzr.arbash-meinel.com/branches/bzr/experimental/dirstate

John Arbash Meinel john at arbash-meinel.com
Tue Feb 27 00:41:16 GMT 2007


At http://bzr.arbash-meinel.com/branches/bzr/experimental/dirstate

------------------------------------------------------------
revno: 2426
revision-id: john at arbash-meinel.com-20070227004109-o0rc0nwaqjzxaved
parent: john at arbash-meinel.com-20070227003412-6i0cl98a6f3l71ue
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate
timestamp: Mon 2007-02-26 18:41:09 -0600
message:
  Add the ability to cache the split output
  rather than having to always split paths.
modified:
  bzrlib/dirstate.py             dirstate.py-20060728012006-d6mvoihjb3je9peu-1
  bzrlib/tests/test_dirstate.py  test_dirstate.py-20060728012006-d6mvoihjb3je9peu-2
-------------- next part --------------
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py	2007-02-27 00:34:12 +0000
+++ b/bzrlib/dirstate.py	2007-02-27 00:41:09 +0000
@@ -1965,7 +1965,7 @@
             raise errors.ObjectNotLocked(self)
 
 
-def bisect_split(a, x, lo=0, hi=None):
+def bisect_split(a, x, lo=0, hi=None, cache={}):
     """Return the index where to insert item x in list a, assuming a is sorted.
 
     The return value i is such that all e in a[:i] have e < x, and all e in
@@ -1980,10 +1980,21 @@
     """
     if hi is None:
         hi = len(a)
+    try:
+        x_split = cache[x]
+    except KeyError:
+        x_split = x.split('/')
+        cache[x] = x_split
     x_split = x.split('/')
     while lo < hi:
         mid = (lo+hi)//2
-        if a[mid].split('/') < x_split: lo = mid+1
+        cur = a[mid]
+        try:
+            cur_split = cache[cur]
+        except KeyError:
+            cur_split = cur.split('/')
+            cache[cur] = cur_split
+        if cur_split < x_split: lo = mid+1
         else: hi = mid
     return lo
 

=== modified file 'bzrlib/tests/test_dirstate.py'
--- a/bzrlib/tests/test_dirstate.py	2007-02-27 00:34:12 +0000
+++ b/bzrlib/tests/test_dirstate.py	2007-02-27 00:41:09 +0000
@@ -1299,14 +1299,14 @@
     the path by directory sections, rather than using a raw comparison.
     """
 
-    def assertBisect(self, paths, split_paths, key, *args):
+    def assertBisect(self, paths, split_paths, key, *args, **kwargs):
         """Assert that bisect_split works like bisect_left on the split paths.
 
         :param paths: The plain list of paths. It should be split sorted
         :param split_paths: A list of split paths sorted identical to paths.
         :param key: The path we are bisecting for.
         """
-        bisect_split_idx = dirstate.bisect_split(paths, key, *args)
+        bisect_split_idx = dirstate.bisect_split(paths, key, *args, **kwargs)
         bisect_left_idx = bisect.bisect_left(split_paths, key.split('/'),
                                              *args)
         self.assertEqual(bisect_left_idx, bisect_split_idx,
@@ -1321,6 +1321,15 @@
         split_paths = [[''], ['a'], ['b'], ['c'], ['d']]
         for path in paths:
             self.assertBisect(paths, split_paths, path)
+        self.assertBisect(paths, split_paths, '_')
+        self.assertBisect(paths, split_paths, 'aa')
+        self.assertBisect(paths, split_paths, 'bb')
+        self.assertBisect(paths, split_paths, 'cc')
+        self.assertBisect(paths, split_paths, 'dd')
+        self.assertBisect(paths, split_paths, 'a/a')
+        self.assertBisect(paths, split_paths, 'b/b')
+        self.assertBisect(paths, split_paths, 'c/c')
+        self.assertBisect(paths, split_paths, 'd/d')
 
     def test_involved(self):
         """This is where bisect_left diverges slightly."""
@@ -1336,3 +1345,19 @@
         self.assertEqual(split_paths, sorted(split_paths))
         for path in paths:
             self.assertBisect(paths, split_paths, path)
+
+    def test_involved_cached(self):
+        """This is where bisect_left diverges slightly."""
+        paths = ['', 'a',
+                 'a/a', 'a/a/a', 'a/a/z', 'a/a-a', 'a/a-z',
+                 'a/z', 'a/z/a', 'a/z/z', 'a/z-a', 'a/z-z',
+                 'a-a', 'a-z',
+                 'z', 'z/a/a', 'z/a/z', 'z/a-a', 'z/a-z',
+                 'z/z', 'z/z/a', 'z/z/z', 'z/z-a', 'z/z-z',
+                 'z-a', 'z-z',
+                ]
+        cache = {}
+        split_paths = [p.split('/') for p in paths]
+        self.assertEqual(split_paths, sorted(split_paths))
+        for path in paths:
+            self.assertBisect(paths, split_paths, path, cache=cache)



More information about the bazaar-commits mailing list