Rev 2425: Add a helper function, which allows us to store keys as plain paths, in http://bzr.arbash-meinel.com/branches/bzr/experimental/dirstate

John Arbash Meinel john at arbash-meinel.com
Tue Feb 27 00:34:19 GMT 2007


At http://bzr.arbash-meinel.com/branches/bzr/experimental/dirstate

------------------------------------------------------------
revno: 2425
revision-id: john at arbash-meinel.com-20070227003412-6i0cl98a6f3l71ue
parent: john at arbash-meinel.com-20070226230506-8l3jxhu3ed4ruiek
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate
timestamp: Mon 2007-02-26 18:34:12 -0600
message:
  Add a helper function, which allows us to store keys as plain paths,
  but bisect across them as split paths.
modified:
  bzrlib/dirstate.py             dirstate.py-20060728012006-d6mvoihjb3je9peu-1
  bzrlib/tests/test_dirstate.py  test_dirstate.py-20060728012006-d6mvoihjb3je9peu-2
-------------- next part --------------
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py	2007-02-26 22:08:14 +0000
+++ b/bzrlib/dirstate.py	2007-02-27 00:34:12 +0000
@@ -1964,6 +1964,30 @@
         if not self._lock_token:
             raise errors.ObjectNotLocked(self)
 
+
+def bisect_split(a, x, lo=0, hi=None):
+    """Return the index where to insert item x in list a, assuming a is sorted.
+
+    The return value i is such that all e in a[:i] have e < x, and all e in
+    a[i:] have e >= x.  So if x already appears in the list, i points just
+    before the leftmost x already there.
+
+    Optional args lo (default 0) and hi (default len(a)) bound the
+    slice of a to be searched.
+
+    The one trick is that we compare entries by spliting them into directory
+    path chunks, rather than by individual strings.
+    """
+    if hi is None:
+        hi = len(a)
+    x_split = x.split('/')
+    while lo < hi:
+        mid = (lo+hi)//2
+        if a[mid].split('/') < x_split: lo = mid+1
+        else: hi = mid
+    return lo
+
+
 def pack_stat(st, _encode=base64.encodestring, _pack=struct.pack):
     """Convert stat values into a packed representation."""
     # jam 20060614 it isn't really worth removing more entries if we
@@ -1976,4 +2000,3 @@
     return _encode(_pack('>llllll'
         , st.st_size, st.st_mtime, st.st_ctime
         , st.st_dev, st.st_ino, st.st_mode))[:-1]
-

=== modified file 'bzrlib/tests/test_dirstate.py'
--- a/bzrlib/tests/test_dirstate.py	2007-02-25 14:45:50 +0000
+++ b/bzrlib/tests/test_dirstate.py	2007-02-27 00:34:12 +0000
@@ -16,6 +16,7 @@
 
 """Tests of the dirstate functionality being built for WorkingTreeFormat4."""
 
+import bisect
 import os
 
 from bzrlib import (
@@ -24,7 +25,7 @@
     osutils,
     )
 from bzrlib.memorytree import MemoryTree
-from bzrlib.tests import TestCaseWithTransport
+from bzrlib.tests import TestCase, TestCaseWithTransport
 
 
 # TODO:
@@ -1290,3 +1291,48 @@
                                               'b/d/e', 'b/g', 'h', 'h/e'],
                                    state, ['b'])
 
+
+class TestBisectSplit(TestCase):
+    """Test that bisect_split() returns the expected values.
+
+    bisect_split is intended to work like bisect.bisect_left() except it splits
+    the path by directory sections, rather than using a raw comparison.
+    """
+
+    def assertBisect(self, paths, split_paths, key, *args):
+        """Assert that bisect_split works like bisect_left on the split paths.
+
+        :param paths: The plain list of paths. It should be split sorted
+        :param split_paths: A list of split paths sorted identical to paths.
+        :param key: The path we are bisecting for.
+        """
+        bisect_split_idx = dirstate.bisect_split(paths, key, *args)
+        bisect_left_idx = bisect.bisect_left(split_paths, key.split('/'),
+                                             *args)
+        self.assertEqual(bisect_left_idx, bisect_split_idx,
+                         'bisect_split disagreed. %s != %s'
+                         ' for key %s'
+                         % (bisect_left_idx, bisect_split_idx, key)
+                         )
+
+    def test_simple(self):
+        """In the simple case it works just like bisect_left"""
+        paths = ['', 'a', 'b', 'c', 'd']
+        split_paths = [[''], ['a'], ['b'], ['c'], ['d']]
+        for path in paths:
+            self.assertBisect(paths, split_paths, path)
+
+    def test_involved(self):
+        """This is where bisect_left diverges slightly."""
+        paths = ['', 'a',
+                 'a/a', 'a/a/a', 'a/a/z', 'a/a-a', 'a/a-z',
+                 'a/z', 'a/z/a', 'a/z/z', 'a/z-a', 'a/z-z',
+                 'a-a', 'a-z',
+                 'z', 'z/a/a', 'z/a/z', 'z/a-a', 'z/a-z',
+                 'z/z', 'z/z/a', 'z/z/z', 'z/z-a', 'z/z-z',
+                 'z-a', 'z-z',
+                ]
+        split_paths = [p.split('/') for p in paths]
+        self.assertEqual(split_paths, sorted(split_paths))
+        for path in paths:
+            self.assertBisect(paths, split_paths, path)



More information about the bazaar-commits mailing list