Rev 2425: Switch over the internals of dirstate to use in http://bzr.arbash-meinel.com/branches/bzr/experimental/dirstate
John Arbash Meinel
john at arbash-meinel.com
Tue Feb 27 00:05:27 GMT 2007
At http://bzr.arbash-meinel.com/branches/bzr/experimental/dirstate
------------------------------------------------------------
revno: 2425
revision-id: john at arbash-meinel.com-20070227000515-xx6c9u095t29ntpp
parent: john at arbash-meinel.com-20070226230506-8l3jxhu3ed4ruiek
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate
timestamp: Mon 2007-02-26 18:05:15 -0600
message:
Switch over the internals of dirstate to use
path.split('/') as the dirblock key, rather than using path.
we have 11 errors where WorkingTree is directly accessing the
dirblocks.
I'm considering to do it a different way.
modified:
bzrlib/dirstate.py dirstate.py-20060728012006-d6mvoihjb3je9peu-1
bzrlib/tests/test_dirstate.py test_dirstate.py-20060728012006-d6mvoihjb3je9peu-2
bzrlib/workingtree_4.py workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
-------------- next part --------------
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py 2007-02-26 22:08:14 +0000
+++ b/bzrlib/dirstate.py 2007-02-27 00:05:15 +0000
@@ -29,7 +29,7 @@
dirstate format = header line, full checksum, row count, parent details,
ghost_details, entries;
-header line = "#bazaar dirstate flat format 2", NL;
+header line = "#bazaar dirstate flat format 3", NL;
full checksum = "adler32: ", ["-"], WHOLE_NUMBER, NL;
row count = "num_entries: ", digit, NL;
parent_details = WHOLE NUMBER, {REVISION_ID}* NL;
@@ -201,10 +201,10 @@
from bzrlib import (
errors,
+ inventory,
lock,
trace,
)
-import bzrlib.inventory
from bzrlib import osutils
from bzrlib.osutils import (
pathjoin,
@@ -248,6 +248,8 @@
NULLSTAT = 'x' * 32
NULL_PARENT_DETAILS = ('a', '', 0, False, '')
+ HEADER_FORMAT_3 = '#bazaar dirstate flat format 3\n'
+
def __init__(self, path):
"""Create a DirState object.
@@ -302,7 +304,7 @@
# find the location in the block.
# check its not there
# add it.
- #------- copied from bzrlib.inventory.make_entry
+ #------- copied from inventory.make_entry
# --- normalized_filename wants a unicode basename only, so get one.
dirname, basename = osutils.split(path)
# we dont import normalized_filename directly because we want to be
@@ -827,11 +829,12 @@
parent_block_index == -1 and dirname == ''):
assert dirname.endswith(
self._dirblocks[parent_block_index][1][parent_row_index][0][1])
- block_index, present = self._find_block_index_from_key((dirname, '', ''))
+ split_dirname = dirname.split('/')
+ block_index, present = self._find_block_index_from_split_dirname(split_dirname)
if not present:
## In future, when doing partial parsing, this should load and
# populate the entire block.
- self._dirblocks.insert(block_index, (dirname, []))
+ self._dirblocks.insert(block_index, (split_dirname, []))
return block_index
def _entries_to_current_state(self, new_entries):
@@ -848,17 +851,19 @@
"Missing root row %r" % new_entries[0][0]
# The two blocks here are deliberate: the root block and the
# contents-of-root block.
- self._dirblocks = [('', []), ('', [])]
+ self._dirblocks = [([''], []), ([''], [])]
current_block = self._dirblocks[0][1]
current_dirname = ''
root_key = ('', '')
+ current_dir_split = ['']
append_entry = current_block.append
for entry in new_entries:
if entry[0][0] != current_dirname:
# new block - different dirname
current_block = []
current_dirname = entry[0][0]
- self._dirblocks.append((current_dirname, current_block))
+ current_dir_split = current_dirname.split('/')
+ self._dirblocks.append((current_dir_split, current_block))
append_entry = current_block.append
# append the entry to the current block
append_entry(entry)
@@ -873,7 +878,7 @@
# The above loop leaves the "root block" entries mixed with the
# "contents-of-root block". But we don't want an if check on
# all entries, so instead we just fix it up here.
- assert self._dirblocks[1] == ('', [])
+ assert self._dirblocks[1] == ([''], [])
root_block = []
contents_of_root_block = []
for entry in self._dirblocks[0][1]:
@@ -881,8 +886,8 @@
root_block.append(entry)
else:
contents_of_root_block.append(entry)
- self._dirblocks[0] = ('', root_block)
- self._dirblocks[1] = ('', contents_of_root_block)
+ self._dirblocks[0] = ([''], root_block)
+ self._dirblocks[1] = ([''], contents_of_root_block)
def _entry_to_line(self, entry):
"""Serialize entry to a NULL delimited line ready for _get_output_lines.
@@ -945,15 +950,18 @@
"""
if key[0:2] == ('', ''):
return 0, True
- block_index = bisect.bisect_left(self._dirblocks, (key[0], []), 1)
- # _right returns one-past-where-key is so we have to subtract
- # one to use it. we use _right here because there are two
- # '' blocks - the root, and the contents of root
- # we always have a minimum of 2 in self._dirblocks: root and
- # root-contents, and for '', we get 2 back, so this is
- # simple and correct:
+ split_dirname = key[0].split('/')
+ return self._find_block_index_from_split_dirname(split_dirname)
+
+ def _find_block_index_from_split_dirname(self, split_dirname):
+ """Find the block for a given split dirname.
+
+ This will never return the root entry, it only returns the content
+ blocks.
+ """
+ block_index = bisect.bisect_left(self._dirblocks, (split_dirname, []), 1)
present = (block_index < len(self._dirblocks) and
- self._dirblocks[block_index][0] == key[0])
+ self._dirblocks[block_index][0] == split_dirname)
return block_index, present
def _find_entry_index(self, key, block):
@@ -1229,10 +1237,10 @@
# persist.
result = DirState(path)
# root dir and root dir contents with no children.
- empty_tree_dirblocks = [('', []), ('', [])]
+ empty_tree_dirblocks = [([''], []), ([''], [])]
# a new root directory, with a NULLSTAT.
empty_tree_dirblocks[0][1].append(
- (('', '', bzrlib.inventory.ROOT_ID), [
+ (('', '', inventory.ROOT_ID), [
('d', '', 0, False, DirState.NULLSTAT),
]))
result.lock_write()
@@ -1298,7 +1306,7 @@
:param lines: A sequece of lines containing the parents list and the
path lines.
"""
- output_lines = ['#bazaar dirstate flat format 2\n']
+ output_lines = [DirState.HEADER_FORMAT_3]
lines.append('') # a final newline
inventory_text = '\0\n\0'.join(lines)
output_lines.append('adler32: %s\n' % (zlib.adler32(inventory_text),))
@@ -1384,9 +1392,10 @@
next()
# The two blocks here are deliberate: the root block and the
# contents-of-root block.
- self._dirblocks = [('', []), ('', [])]
+ self._dirblocks = [([''], []), ([''], [])]
current_block = self._dirblocks[0][1]
current_dirname = ''
+ current_dir_split = ['']
append_entry = current_block.append
for count in xrange(self._num_entries):
dirname = next()
@@ -1396,7 +1405,8 @@
# new block - different dirname
current_block = []
current_dirname = dirname
- self._dirblocks.append((current_dirname, current_block))
+ current_dir_split = current_dirname.split('/')
+ self._dirblocks.append((current_dir_split, current_block))
append_entry = current_block.append
# we know current_dirname == dirname, so re-use it to avoid
# creating new strings
@@ -1469,7 +1479,7 @@
and their ids. Followed by a newline.
"""
header = self._state_file.readline()
- assert header == '#bazaar dirstate flat format 2\n', \
+ assert header == DirState.HEADER_FORMAT_3, \
'invalid header line: %r' % (header,)
adler_line = self._state_file.readline()
assert adler_line.startswith('adler32: '), 'missing adler32 checksum'
@@ -1794,7 +1804,7 @@
# Remove it, its meaningless.
block = self._find_block(current_old[0])
entry_index, present = self._find_entry_index(current_old[0], block[1])
- assert present
+ assert present, 'could not find entry for %s' % (current_old,)
block[1].pop(entry_index)
# if we have an id_index in use, remove this key from it for this id.
if self._id_index is not None:
@@ -1806,10 +1816,10 @@
for update_key in all_remaining_keys:
update_block_index, present = \
self._find_block_index_from_key(update_key)
- assert present
+ assert present, 'could not find block for %s' % (update_key,)
update_entry_index, present = \
self._find_entry_index(update_key, self._dirblocks[update_block_index][1])
- assert present
+ assert present, 'could not find entry for %s' % (update_key,)
update_tree_details = self._dirblocks[update_block_index][1][update_entry_index][1]
# it must not be absent at the moment
assert update_tree_details[0][0] != 'a' # absent
@@ -1859,9 +1869,14 @@
# the test for existing kinds is different: this can be
# factored out to a helper though.
other_block_index, present = self._find_block_index_from_key(other_key)
- assert present
- other_entry_index, present = self._find_entry_index(other_key, self._dirblocks[other_block_index][1])
- assert present
+ if not present:
+ import pdb; pdb.set_trace()
+ assert present, 'could not find block for %s' % (other_key,)
+ other_entry_index, present = self._find_entry_index(other_key,
+ self._dirblocks[other_block_index][1])
+ if not present:
+ import pdb; pdb.set_trace()
+ assert present, 'could not find entry for %s' % (other_key,)
assert path_utf8 is not None
self._dirblocks[other_block_index][1][other_entry_index][1][0] = \
('r', path_utf8, 0, False, '')
@@ -1874,10 +1889,10 @@
# records.
update_block_index, present = \
self._find_block_index_from_key(other_key)
- assert present
+ assert present, 'could not find block for %s' % (other_key,)
update_entry_index, present = \
self._find_entry_index(other_key, self._dirblocks[update_block_index][1])
- assert present
+ assert present, 'could not find entry for %s' % (other_key,)
update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index]
if update_details[0] in ('r', 'a'): # relocated, absent
# its a pointer or absent in lookup_index's tree, use
=== modified file 'bzrlib/tests/test_dirstate.py'
--- a/bzrlib/tests/test_dirstate.py 2007-02-25 14:45:50 +0000
+++ b/bzrlib/tests/test_dirstate.py 2007-02-27 00:05:15 +0000
@@ -69,8 +69,8 @@
('d', '', 0, False, packed_stat),
]
dirblocks = []
- dirblocks.append(('', [root_entry_direntry]))
- dirblocks.append(('', []))
+ dirblocks.append(([''], [root_entry_direntry]))
+ dirblocks.append(([''], []))
state = self.create_empty_dirstate()
try:
state._set_data([], dirblocks)
@@ -140,10 +140,10 @@
('f', null_sha, 40, False, packed_stat),
]
dirblocks = []
- dirblocks.append(('', [root_entry]))
- dirblocks.append(('', [a_entry, b_entry, c_entry, d_entry]))
- dirblocks.append(('a', [e_entry, f_entry]))
- dirblocks.append(('b', [g_entry, h_entry]))
+ dirblocks.append(([''], [root_entry]))
+ dirblocks.append(([''], [a_entry, b_entry, c_entry, d_entry]))
+ dirblocks.append((['a'], [e_entry, f_entry]))
+ dirblocks.append((['b'], [g_entry, h_entry]))
state = dirstate.DirState.initialize('dirstate')
try:
state._set_data([], dirblocks)
@@ -685,7 +685,7 @@
def test_get_line_with_2_rows(self):
state = self.create_dirstate_with_root_and_subdir()
try:
- self.assertEqual(['#bazaar dirstate flat format 2\n',
+ self.assertEqual(['#bazaar dirstate flat format 3\n',
'adler32: -1327947603\n',
'num_entries: 2\n',
'0\x00\n\x00'
@@ -785,7 +785,7 @@
state._get_block_entry_index(dirname, basename, tree_index))
if dir_present:
block = state._dirblocks[block_index]
- self.assertEqual(dirname, block[0])
+ self.assertEqual(dirname.split('/'), block[0])
if dir_present and file_present:
row = state._dirblocks[block_index][1][row_index]
self.assertEqual(dirname, row[0][0])
=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py 2007-02-26 21:51:04 +0000
+++ b/bzrlib/workingtree_4.py 2007-02-27 00:05:15 +0000
@@ -292,9 +292,10 @@
# and should be profiled - RBC 20070216
parent_ies = {'' : inv.root}
for block in state._dirblocks[1:]: # skip the root
- dirname = block[0]
+ split_dirname = block[0]
+ dirname = osutils.pathjoin(*split_dirname)
try:
- parent_ie = parent_ies[block[0]]
+ parent_ie = parent_ies[dirname]
except KeyError:
# all the paths in this block are not versioned in this tree
continue
@@ -592,7 +593,8 @@
from_key, old_entry_details = old_entry
cur_details = old_entry_details[0]
# remove the old row
- to_key = ((to_block[0],) + from_key[1:3])
+ dirname = osutils.pathjoin(*to_block[0])
+ to_key = (dirname, from_key[1], from_key[2])
minikind = cur_details[0]
move_one(old_entry, from_path_utf8=from_rel_utf8,
minikind=minikind,
@@ -966,9 +968,8 @@
# first check: is the path one to remove - it or its children
delete_block = False
for path in paths_to_unversion:
- if (block[0].startswith(path) and
- (len(block[0]) == len(path) or
- block[0][len(path)] == '/')):
+ split_path = path.split('/')
+ if (block[0][:len(split_path)] == split_path):
# this entire block should be deleted - its the block for a
# path to unversion; or the child of one
delete_block = True
@@ -1181,7 +1182,8 @@
# and should be profiled - RBC 20070216
parent_ies = {'' : inv.root}
for block in self._dirstate._dirblocks[1:]: #skip root
- dirname = block[0]
+ split_dirname = block[0]
+ dirname = osutils.pathjoin(*split_dirname)
try:
parent_ie = parent_ies[dirname]
except KeyError:
@@ -1398,7 +1400,7 @@
# NB: show_status depends on being able to pass in non-versioned files
# and report them as unknown
# TODO: handle extra trees in the dirstate.
- if extra_trees:
+ if True or extra_trees:
for f in super(InterDirStateTree, self)._iter_changes(
include_unchanged, specific_files, pb, extra_trees,
require_versioned):
@@ -1718,8 +1720,10 @@
# convert the unicode relpaths in the dir index to uf8 for
# comparison with dirstate data.
# TODO: keep the utf8 version around for giving to the caller.
- current_dir_info = ((current_dir_info[0][0].encode('utf8'), current_dir_info[0][1]),
- [(line[0].encode('utf8'), line[1].encode('utf8')) + line[2:] for line in current_dir_info[1]])
+ current_dir_info = ((current_dir_info[0][0].encode('utf8').split('/'),
+ current_dir_info[0][1]),
+ [(line[0].encode('utf8'), line[1].encode('utf8')) + line[2:]
+ for line in current_dir_info[1]])
# walk until both the directory listing and the versioned metadata
# are exhausted. TODO: reevaluate this, perhaps we should stop when
# the versioned data runs out.
More information about the bazaar-commits
mailing list