Rev 2427: Rather than using split hunks, implement a bisect_dirblocks in http://bzr.arbash-meinel.com/branches/bzr/experimental/dirstate
John Arbash Meinel
john at arbash-meinel.com
Tue Feb 27 01:08:12 GMT 2007
At http://bzr.arbash-meinel.com/branches/bzr/experimental/dirstate
------------------------------------------------------------
revno: 2427
revision-id: john at arbash-meinel.com-20070227010804-nb6bg1c5fvs2nsri
parent: john at arbash-meinel.com-20070227004109-o0rc0nwaqjzxaved
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate
timestamp: Mon 2007-02-26 19:08:04 -0600
message:
Rather than using split hunks, implement a bisect_dirblocks
which knows that it is bisecting dirblocks and only compares the first
hunk, and does the comparison in split mode.
modified:
bzrlib/dirstate.py dirstate.py-20060728012006-d6mvoihjb3je9peu-1
bzrlib/tests/test_dirstate.py test_dirstate.py-20060728012006-d6mvoihjb3je9peu-2
bzrlib/workingtree_4.py workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
-------------- next part --------------
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py 2007-02-27 00:41:09 +0000
+++ b/bzrlib/dirstate.py 2007-02-27 01:08:04 +0000
@@ -201,11 +201,11 @@
from bzrlib import (
errors,
+ inventory,
lock,
+ osutils,
trace,
)
-import bzrlib.inventory
-from bzrlib import osutils
from bzrlib.osutils import (
pathjoin,
sha_file,
@@ -248,6 +248,8 @@
NULLSTAT = 'x' * 32
NULL_PARENT_DETAILS = ('a', '', 0, False, '')
+ HEADER_FORMAT_2 = '#bazaar dirstate flat format 2\n'
+
def __init__(self, path):
"""Create a DirState object.
@@ -302,7 +304,7 @@
# find the location in the block.
# check its not there
# add it.
- #------- copied from bzrlib.inventory.make_entry
+ #------- copied from inventory.make_entry
# --- normalized_filename wants a unicode basename only, so get one.
dirname, basename = osutils.split(path)
# we dont import normalized_filename directly because we want to be
@@ -945,7 +947,7 @@
"""
if key[0:2] == ('', ''):
return 0, True
- block_index = bisect.bisect_left(self._dirblocks, (key[0], []), 1)
+ block_index = bisect_dirblock(self._dirblocks, key[0], 1)
# _right returns one-past-where-key is so we have to subtract
# one to use it. we use _right here because there are two
# '' blocks - the root, and the contents of root
@@ -1232,7 +1234,7 @@
empty_tree_dirblocks = [('', []), ('', [])]
# a new root directory, with a NULLSTAT.
empty_tree_dirblocks[0][1].append(
- (('', '', bzrlib.inventory.ROOT_ID), [
+ (('', '', inventory.ROOT_ID), [
('d', '', 0, False, DirState.NULLSTAT),
]))
result.lock_write()
@@ -1298,7 +1300,7 @@
:param lines: A sequece of lines containing the parents list and the
path lines.
"""
- output_lines = ['#bazaar dirstate flat format 2\n']
+ output_lines = [DirState.HEADER_FORMAT_2]
lines.append('') # a final newline
inventory_text = '\0\n\0'.join(lines)
output_lines.append('adler32: %s\n' % (zlib.adler32(inventory_text),))
@@ -1469,7 +1471,7 @@
and their ids. Followed by a newline.
"""
header = self._state_file.readline()
- assert header == '#bazaar dirstate flat format 2\n', \
+ assert header == DirState.HEADER_FORMAT_2, \
'invalid header line: %r' % (header,)
adler_line = self._state_file.readline()
assert adler_line.startswith('adler32: '), 'missing adler32 checksum'
@@ -1794,7 +1796,7 @@
# Remove it, its meaningless.
block = self._find_block(current_old[0])
entry_index, present = self._find_entry_index(current_old[0], block[1])
- assert present
+ assert present, 'could not find entry for %s' % (current_old,)
block[1].pop(entry_index)
# if we have an id_index in use, remove this key from it for this id.
if self._id_index is not None:
@@ -1806,10 +1808,10 @@
for update_key in all_remaining_keys:
update_block_index, present = \
self._find_block_index_from_key(update_key)
- assert present
+ assert present, 'could not find block for %s' % (update_key,)
update_entry_index, present = \
self._find_entry_index(update_key, self._dirblocks[update_block_index][1])
- assert present
+ assert present, 'could not find entry for %s' % (update_key,)
update_tree_details = self._dirblocks[update_block_index][1][update_entry_index][1]
# it must not be absent at the moment
assert update_tree_details[0][0] != 'a' # absent
@@ -1859,9 +1861,14 @@
# the test for existing kinds is different: this can be
# factored out to a helper though.
other_block_index, present = self._find_block_index_from_key(other_key)
- assert present
- other_entry_index, present = self._find_entry_index(other_key, self._dirblocks[other_block_index][1])
- assert present
+ if not present:
+ import pdb; pdb.set_trace()
+ assert present, 'could not find block for %s' % (other_key,)
+ other_entry_index, present = self._find_entry_index(other_key,
+ self._dirblocks[other_block_index][1])
+ if not present:
+ import pdb; pdb.set_trace()
+ assert present, 'could not find entry for %s' % (other_key,)
assert path_utf8 is not None
self._dirblocks[other_block_index][1][other_entry_index][1][0] = \
('r', path_utf8, 0, False, '')
@@ -1874,10 +1881,10 @@
# records.
update_block_index, present = \
self._find_block_index_from_key(other_key)
- assert present
+ assert present, 'could not find block for %s' % (other_key,)
update_entry_index, present = \
self._find_entry_index(other_key, self._dirblocks[update_block_index][1])
- assert present
+ assert present, 'could not find entry for %s' % (other_key,)
update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index]
if update_details[0] in ('r', 'a'): # relocated, absent
# its a pointer or absent in lookup_index's tree, use
@@ -1965,36 +1972,33 @@
raise errors.ObjectNotLocked(self)
-def bisect_split(a, x, lo=0, hi=None, cache={}):
- """Return the index where to insert item x in list a, assuming a is sorted.
-
- The return value i is such that all e in a[:i] have e < x, and all e in
- a[i:] have e >= x. So if x already appears in the list, i points just
- before the leftmost x already there.
-
- Optional args lo (default 0) and hi (default len(a)) bound the
+def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache={}):
+ """Return the index where to insert dirname into the dirblocks.
+
+ The return value idx is such that all directories blocks in dirblock[:idx]
+ have names < dirname, and all blocks in dirblock[idx:] have names >=
+ dirname.
+
+ Optional args lo (default 0) and hi (default len(dirblocks)) bound the
slice of a to be searched.
-
- The one trick is that we compare entries by spliting them into directory
- path chunks, rather than by individual strings.
"""
if hi is None:
- hi = len(a)
+ hi = len(dirblocks)
try:
- x_split = cache[x]
+ dirname_split = cache[dirname]
except KeyError:
- x_split = x.split('/')
- cache[x] = x_split
- x_split = x.split('/')
+ dirname_split = dirname.split('/')
+ cache[dirname] = dirname_split
while lo < hi:
mid = (lo+hi)//2
- cur = a[mid]
+ # Grab the dirname for the current dirblock
+ cur = dirblocks[mid][0]
try:
cur_split = cache[cur]
except KeyError:
cur_split = cur.split('/')
cache[cur] = cur_split
- if cur_split < x_split: lo = mid+1
+ if cur_split < dirname_split: lo = mid+1
else: hi = mid
return lo
=== modified file 'bzrlib/tests/test_dirstate.py'
--- a/bzrlib/tests/test_dirstate.py 2007-02-27 00:41:09 +0000
+++ b/bzrlib/tests/test_dirstate.py 2007-02-27 01:08:04 +0000
@@ -1292,44 +1292,59 @@
state, ['b'])
-class TestBisectSplit(TestCase):
- """Test that bisect_split() returns the expected values.
+class TestBisectDirblock(TestCase):
+ """Test that bisect_dirblock() returns the expected values.
- bisect_split is intended to work like bisect.bisect_left() except it splits
- the path by directory sections, rather than using a raw comparison.
+ bisect_dirblock is intended to work like bisect.bisect_left() except it
+ knows it is working on dirblocks and that dirblocks are sorted by ('path',
+ 'to', 'foo') chunks rather than by raw 'path/to/foo'.
"""
- def assertBisect(self, paths, split_paths, key, *args, **kwargs):
+ def assertBisect(self, dirblocks, split_dirblocks, path, *args, **kwargs):
"""Assert that bisect_split works like bisect_left on the split paths.
- :param paths: The plain list of paths. It should be split sorted
- :param split_paths: A list of split paths sorted identical to paths.
- :param key: The path we are bisecting for.
+ :param dirblocks: A list of (path, [info]) pairs.
+ :param split_dirblocks: A list of ((split, path), [info]) pairs.
+ :param path: The path we are indexing.
+
+ All other arguments will be passed along.
"""
- bisect_split_idx = dirstate.bisect_split(paths, key, *args, **kwargs)
- bisect_left_idx = bisect.bisect_left(split_paths, key.split('/'),
+ bisect_split_idx = dirstate.bisect_dirblock(dirblocks, path,
+ *args, **kwargs)
+ split_dirblock = (path.split('/'), [])
+ bisect_left_idx = bisect.bisect_left(split_dirblocks, split_dirblock,
*args)
self.assertEqual(bisect_left_idx, bisect_split_idx,
'bisect_split disagreed. %s != %s'
' for key %s'
- % (bisect_left_idx, bisect_split_idx, key)
+ % (bisect_left_idx, bisect_split_idx, path)
)
+ def paths_to_dirblocks(self, paths):
+ """Convert a list of paths into dirblock form.
+
+ Also, ensure that the paths are in proper sorted order.
+ """
+ dirblocks = [(path, []) for path in paths]
+ split_dirblocks = [(path.split('/'), []) for path in paths]
+ self.assertEqual(sorted(split_dirblocks), split_dirblocks)
+ return dirblocks, split_dirblocks
+
def test_simple(self):
"""In the simple case it works just like bisect_left"""
paths = ['', 'a', 'b', 'c', 'd']
- split_paths = [[''], ['a'], ['b'], ['c'], ['d']]
+ dirblocks, split_dirblocks = self.paths_to_dirblocks(paths)
for path in paths:
- self.assertBisect(paths, split_paths, path)
- self.assertBisect(paths, split_paths, '_')
- self.assertBisect(paths, split_paths, 'aa')
- self.assertBisect(paths, split_paths, 'bb')
- self.assertBisect(paths, split_paths, 'cc')
- self.assertBisect(paths, split_paths, 'dd')
- self.assertBisect(paths, split_paths, 'a/a')
- self.assertBisect(paths, split_paths, 'b/b')
- self.assertBisect(paths, split_paths, 'c/c')
- self.assertBisect(paths, split_paths, 'd/d')
+ self.assertBisect(dirblocks, split_dirblocks, path)
+ self.assertBisect(dirblocks, split_dirblocks, '_')
+ self.assertBisect(dirblocks, split_dirblocks, 'aa')
+ self.assertBisect(dirblocks, split_dirblocks, 'bb')
+ self.assertBisect(dirblocks, split_dirblocks, 'cc')
+ self.assertBisect(dirblocks, split_dirblocks, 'dd')
+ self.assertBisect(dirblocks, split_dirblocks, 'a/a')
+ self.assertBisect(dirblocks, split_dirblocks, 'b/b')
+ self.assertBisect(dirblocks, split_dirblocks, 'c/c')
+ self.assertBisect(dirblocks, split_dirblocks, 'd/d')
def test_involved(self):
"""This is where bisect_left diverges slightly."""
@@ -1341,10 +1356,9 @@
'z/z', 'z/z/a', 'z/z/z', 'z/z-a', 'z/z-z',
'z-a', 'z-z',
]
- split_paths = [p.split('/') for p in paths]
- self.assertEqual(split_paths, sorted(split_paths))
+ dirblocks, split_dirblocks = self.paths_to_dirblocks(paths)
for path in paths:
- self.assertBisect(paths, split_paths, path)
+ self.assertBisect(dirblocks, split_dirblocks, path)
def test_involved_cached(self):
"""This is where bisect_left diverges slightly."""
@@ -1357,7 +1371,7 @@
'z-a', 'z-z',
]
cache = {}
- split_paths = [p.split('/') for p in paths]
- self.assertEqual(split_paths, sorted(split_paths))
+ dirblocks, split_dirblocks = self.paths_to_dirblocks(paths)
for path in paths:
- self.assertBisect(paths, split_paths, path, cache=cache)
+ self.assertBisect(dirblocks, split_dirblocks, path, cache=cache)
+
=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py 2007-02-26 21:51:04 +0000
+++ b/bzrlib/workingtree_4.py 2007-02-27 01:08:04 +0000
@@ -294,7 +294,7 @@
for block in state._dirblocks[1:]: # skip the root
dirname = block[0]
try:
- parent_ie = parent_ies[block[0]]
+ parent_ie = parent_ies[dirname]
except KeyError:
# all the paths in this block are not versioned in this tree
continue
More information about the bazaar-commits
mailing list