Rev 2425: Switch over the internals of dirstate to use in http://bzr.arbash-meinel.com/branches/bzr/experimental/dirstate

Tue Feb 27 00:05:27 GMT 2007

At http://bzr.arbash-meinel.com/branches/bzr/experimental/dirstate

------------------------------------------------------------
revno: 2425
revision-id: john at arbash-meinel.com-20070227000515-xx6c9u095t29ntpp
parent: john at arbash-meinel.com-20070226230506-8l3jxhu3ed4ruiek
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate
timestamp: Mon 2007-02-26 18:05:15 -0600
message:
  Switch over the internals of dirstate to use
  path.split('/') as the dirblock key, rather than using path.
  we have 11 errors where WorkingTree is directly accessing the
  dirblocks.
  I'm considering to do it a different way.
modified:
  bzrlib/dirstate.py             dirstate.py-20060728012006-d6mvoihjb3je9peu-1
  bzrlib/tests/test_dirstate.py  test_dirstate.py-20060728012006-d6mvoihjb3je9peu-2
  bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
-------------- next part --------------
=== modified file 'bzrlib/dirstate.py'

--- a/bzrlib/dirstate.py	2007-02-26 22:08:14 +0000
+++ b/bzrlib/dirstate.py	2007-02-27 00:05:15 +0000
@@ -29,7 +29,7 @@
 
 dirstate format = header line, full checksum, row count, parent details,
  ghost_details, entries;
-header line = "#bazaar dirstate flat format 2", NL;
+header line = "#bazaar dirstate flat format 3", NL;
 full checksum = "adler32: ", ["-"], WHOLE_NUMBER, NL;
 row count = "num_entries: ", digit, NL;
 parent_details = WHOLE NUMBER, {REVISION_ID}* NL;
@@ -201,10 +201,10 @@
 
 from bzrlib import (
     errors,
+    inventory,
     lock,
     trace,
     )
-import bzrlib.inventory
 from bzrlib import osutils
 from bzrlib.osutils import (
     pathjoin,
@@ -248,6 +248,8 @@
     NULLSTAT = 'x' * 32
     NULL_PARENT_DETAILS = ('a', '', 0, False, '')
 
+    HEADER_FORMAT_3 = '#bazaar dirstate flat format 3\n'
+
     def __init__(self, path):
         """Create a  DirState object.
 
@@ -302,7 +304,7 @@
         # find the location in the block.
         # check its not there
         # add it.
-        #------- copied from bzrlib.inventory.make_entry
+        #------- copied from inventory.make_entry
         # --- normalized_filename wants a unicode basename only, so get one.
         dirname, basename = osutils.split(path)
         # we dont import normalized_filename directly because we want to be
@@ -827,11 +829,12 @@
             parent_block_index == -1 and dirname == ''):
             assert dirname.endswith(
                 self._dirblocks[parent_block_index][1][parent_row_index][0][1])
-        block_index, present = self._find_block_index_from_key((dirname, '', ''))
+        split_dirname = dirname.split('/')
+        block_index, present = self._find_block_index_from_split_dirname(split_dirname)
         if not present:
             ## In future, when doing partial parsing, this should load and 
             # populate the entire block.
-            self._dirblocks.insert(block_index, (dirname, []))
+            self._dirblocks.insert(block_index, (split_dirname, []))
         return block_index
 
     def _entries_to_current_state(self, new_entries):
@@ -848,17 +851,19 @@
             "Missing root row %r" % new_entries[0][0]
         # The two blocks here are deliberate: the root block and the 
         # contents-of-root block.
-        self._dirblocks = [('', []), ('', [])]
+        self._dirblocks = [([''], []), ([''], [])]
         current_block = self._dirblocks[0][1]
         current_dirname = ''
         root_key = ('', '')
+        current_dir_split = ['']
         append_entry = current_block.append
         for entry in new_entries:
             if entry[0][0] != current_dirname:
                 # new block - different dirname
                 current_block = []
                 current_dirname = entry[0][0]
-                self._dirblocks.append((current_dirname, current_block))
+                current_dir_split = current_dirname.split('/')
+                self._dirblocks.append((current_dir_split, current_block))
                 append_entry = current_block.append
             # append the entry to the current block
             append_entry(entry)
@@ -873,7 +878,7 @@
         # The above loop leaves the "root block" entries mixed with the
         # "contents-of-root block". But we don't want an if check on
         # all entries, so instead we just fix it up here.
-        assert self._dirblocks[1] == ('', [])
+        assert self._dirblocks[1] == ([''], [])
         root_block = []
         contents_of_root_block = []
         for entry in self._dirblocks[0][1]:
@@ -881,8 +886,8 @@
                 root_block.append(entry)
             else:
                 contents_of_root_block.append(entry)
-        self._dirblocks[0] = ('', root_block)
-        self._dirblocks[1] = ('', contents_of_root_block)
+        self._dirblocks[0] = ([''], root_block)
+        self._dirblocks[1] = ([''], contents_of_root_block)
 
     def _entry_to_line(self, entry):
         """Serialize entry to a NULL delimited line ready for _get_output_lines.
@@ -945,15 +950,18 @@
         """
         if key[0:2] == ('', ''):
             return 0, True
-        block_index = bisect.bisect_left(self._dirblocks, (key[0], []), 1)
-        # _right returns one-past-where-key is so we have to subtract
-        # one to use it. we use _right here because there are two
-        # '' blocks - the root, and the contents of root
-        # we always have a minimum of 2 in self._dirblocks: root and
-        # root-contents, and for '', we get 2 back, so this is 
-        # simple and correct:
+        split_dirname = key[0].split('/')
+        return self._find_block_index_from_split_dirname(split_dirname)
+
+    def _find_block_index_from_split_dirname(self, split_dirname):
+        """Find the block for a given split dirname.
+
+        This will never return the root entry, it only returns the content
+        blocks.
+        """
+        block_index = bisect.bisect_left(self._dirblocks, (split_dirname, []), 1)
         present = (block_index < len(self._dirblocks) and
-            self._dirblocks[block_index][0] == key[0])
+            self._dirblocks[block_index][0] == split_dirname)
         return block_index, present
 
     def _find_entry_index(self, key, block):
@@ -1229,10 +1237,10 @@
         # persist.
         result = DirState(path)
         # root dir and root dir contents with no children.
-        empty_tree_dirblocks = [('', []), ('', [])]
+        empty_tree_dirblocks = [([''], []), ([''], [])]
         # a new root directory, with a NULLSTAT.
         empty_tree_dirblocks[0][1].append(
-            (('', '', bzrlib.inventory.ROOT_ID), [
+            (('', '', inventory.ROOT_ID), [
                 ('d', '', 0, False, DirState.NULLSTAT),
             ]))
         result.lock_write()
@@ -1298,7 +1306,7 @@
         :param lines: A sequece of lines containing the parents list and the
             path lines.
         """
-        output_lines = ['#bazaar dirstate flat format 2\n']
+        output_lines = [DirState.HEADER_FORMAT_3]
         lines.append('') # a final newline
         inventory_text = '\0\n\0'.join(lines)
         output_lines.append('adler32: %s\n' % (zlib.adler32(inventory_text),))
@@ -1384,9 +1392,10 @@
                     next()
                 # The two blocks here are deliberate: the root block and the
                 # contents-of-root block.
-                self._dirblocks = [('', []), ('', [])]
+                self._dirblocks = [([''], []), ([''], [])]
                 current_block = self._dirblocks[0][1]
                 current_dirname = ''
+                current_dir_split = ['']
                 append_entry = current_block.append
                 for count in xrange(self._num_entries):
                     dirname = next()
@@ -1396,7 +1405,8 @@
                         # new block - different dirname
                         current_block = []
                         current_dirname = dirname
-                        self._dirblocks.append((current_dirname, current_block))
+                        current_dir_split = current_dirname.split('/')
+                        self._dirblocks.append((current_dir_split, current_block))
                         append_entry = current_block.append
                     # we know current_dirname == dirname, so re-use it to avoid
                     # creating new strings
@@ -1469,7 +1479,7 @@
         and their ids. Followed by a newline.
         """
         header = self._state_file.readline()
-        assert header == '#bazaar dirstate flat format 2\n', \
+        assert header == DirState.HEADER_FORMAT_3, \
             'invalid header line: %r' % (header,)
         adler_line = self._state_file.readline()
         assert adler_line.startswith('adler32: '), 'missing adler32 checksum'
@@ -1794,7 +1804,7 @@
             # Remove it, its meaningless.
             block = self._find_block(current_old[0])
             entry_index, present = self._find_entry_index(current_old[0], block[1])
-            assert present
+            assert present, 'could not find entry for %s' % (current_old,)
             block[1].pop(entry_index)
             # if we have an id_index in use, remove this key from it for this id.
             if self._id_index is not None:
@@ -1806,10 +1816,10 @@
         for update_key in all_remaining_keys:
             update_block_index, present = \
                 self._find_block_index_from_key(update_key)
-            assert present
+            assert present, 'could not find block for %s' % (update_key,)
             update_entry_index, present = \
                 self._find_entry_index(update_key, self._dirblocks[update_block_index][1])
-            assert present
+            assert present, 'could not find entry for %s' % (update_key,)
             update_tree_details = self._dirblocks[update_block_index][1][update_entry_index][1]
             # it must not be absent at the moment
             assert update_tree_details[0][0] != 'a' # absent
@@ -1859,9 +1869,14 @@
                     # the test for existing kinds is different: this can be
                     # factored out to a helper though.
                     other_block_index, present = self._find_block_index_from_key(other_key)
-                    assert present
-                    other_entry_index, present = self._find_entry_index(other_key, self._dirblocks[other_block_index][1])
-                    assert present
+                    if not present:
+                        import pdb; pdb.set_trace()
+                    assert present, 'could not find block for %s' % (other_key,)
+                    other_entry_index, present = self._find_entry_index(other_key,
+                                            self._dirblocks[other_block_index][1])
+                    if not present:
+                        import pdb; pdb.set_trace()
+                    assert present, 'could not find entry for %s' % (other_key,)
                     assert path_utf8 is not None
                     self._dirblocks[other_block_index][1][other_entry_index][1][0] = \
                         ('r', path_utf8, 0, False, '')
@@ -1874,10 +1889,10 @@
                     # records.
                     update_block_index, present = \
                         self._find_block_index_from_key(other_key)
-                    assert present
+                    assert present, 'could not find block for %s' % (other_key,)
                     update_entry_index, present = \
                         self._find_entry_index(other_key, self._dirblocks[update_block_index][1])
-                    assert present
+                    assert present, 'could not find entry for %s' % (other_key,)
                     update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index]
                     if update_details[0] in ('r', 'a'): # relocated, absent
                         # its a pointer or absent in lookup_index's tree, use

=== modified file 'bzrlib/tests/test_dirstate.py'
--- a/bzrlib/tests/test_dirstate.py	2007-02-25 14:45:50 +0000
+++ b/bzrlib/tests/test_dirstate.py	2007-02-27 00:05:15 +0000
@@ -69,8 +69,8 @@
             ('d', '', 0, False, packed_stat),
             ]
         dirblocks = []
-        dirblocks.append(('', [root_entry_direntry]))
-        dirblocks.append(('', []))
+        dirblocks.append(([''], [root_entry_direntry]))
+        dirblocks.append(([''], []))
         state = self.create_empty_dirstate()
         try:
             state._set_data([], dirblocks)
@@ -140,10 +140,10 @@
             ('f', null_sha, 40, False, packed_stat),
             ]
         dirblocks = []
-        dirblocks.append(('', [root_entry]))
-        dirblocks.append(('', [a_entry, b_entry, c_entry, d_entry]))
-        dirblocks.append(('a', [e_entry, f_entry]))
-        dirblocks.append(('b', [g_entry, h_entry]))
+        dirblocks.append(([''], [root_entry]))
+        dirblocks.append(([''], [a_entry, b_entry, c_entry, d_entry]))
+        dirblocks.append((['a'], [e_entry, f_entry]))
+        dirblocks.append((['b'], [g_entry, h_entry]))
         state = dirstate.DirState.initialize('dirstate')
         try:
             state._set_data([], dirblocks)
@@ -685,7 +685,7 @@
     def test_get_line_with_2_rows(self):
         state = self.create_dirstate_with_root_and_subdir()
         try:
-            self.assertEqual(['#bazaar dirstate flat format 2\n',
+            self.assertEqual(['#bazaar dirstate flat format 3\n',
                 'adler32: -1327947603\n',
                 'num_entries: 2\n',
                 '0\x00\n\x00'
@@ -785,7 +785,7 @@
             state._get_block_entry_index(dirname, basename, tree_index))
         if dir_present:
             block = state._dirblocks[block_index]
-            self.assertEqual(dirname, block[0])
+            self.assertEqual(dirname.split('/'), block[0])
         if dir_present and file_present:
             row = state._dirblocks[block_index][1][row_index]
             self.assertEqual(dirname, row[0][0])

=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py	2007-02-26 21:51:04 +0000
+++ b/bzrlib/workingtree_4.py	2007-02-27 00:05:15 +0000
@@ -292,9 +292,10 @@
         # and should be profiled - RBC 20070216
         parent_ies = {'' : inv.root}
         for block in state._dirblocks[1:]: # skip the root
-            dirname = block[0]
+            split_dirname = block[0]
+            dirname = osutils.pathjoin(*split_dirname)
             try:
-                parent_ie = parent_ies[block[0]]
+                parent_ie = parent_ies[dirname]
             except KeyError:
                 # all the paths in this block are not versioned in this tree
                 continue
@@ -592,7 +593,8 @@
                 from_key, old_entry_details = old_entry
                 cur_details = old_entry_details[0]
                 # remove the old row
-                to_key = ((to_block[0],) + from_key[1:3])
+                dirname = osutils.pathjoin(*to_block[0])
+                to_key = (dirname, from_key[1], from_key[2])
                 minikind = cur_details[0]
                 move_one(old_entry, from_path_utf8=from_rel_utf8,
                          minikind=minikind,
@@ -966,9 +968,8 @@
             # first check: is the path one to remove - it or its children
             delete_block = False
             for path in paths_to_unversion:
-                if (block[0].startswith(path) and
-                    (len(block[0]) == len(path) or
-                     block[0][len(path)] == '/')):
+                split_path = path.split('/')
+                if (block[0][:len(split_path)] == split_path):
                     # this entire block should be deleted - its the block for a
                     # path to unversion; or the child of one
                     delete_block = True
@@ -1181,7 +1182,8 @@
         # and should be profiled - RBC 20070216
         parent_ies = {'' : inv.root}
         for block in self._dirstate._dirblocks[1:]: #skip root
-            dirname = block[0]
+            split_dirname = block[0]
+            dirname = osutils.pathjoin(*split_dirname)
             try:
                 parent_ie = parent_ies[dirname]
             except KeyError:
@@ -1398,7 +1400,7 @@
         # NB: show_status depends on being able to pass in non-versioned files
         # and report them as unknown
             # TODO: handle extra trees in the dirstate.
-        if extra_trees:
+        if True or extra_trees:
             for f in super(InterDirStateTree, self)._iter_changes(
                 include_unchanged, specific_files, pb, extra_trees,
                 require_versioned):
@@ -1718,8 +1720,10 @@
                 # convert the unicode relpaths in the dir index to uf8 for
                 # comparison with dirstate data.
                 # TODO: keep the utf8 version around for giving to the caller.
-                current_dir_info = ((current_dir_info[0][0].encode('utf8'), current_dir_info[0][1]),
-                    [(line[0].encode('utf8'), line[1].encode('utf8')) + line[2:] for line in current_dir_info[1]])
+                current_dir_info = ((current_dir_info[0][0].encode('utf8').split('/'),
+                                     current_dir_info[0][1]),
+                    [(line[0].encode('utf8'), line[1].encode('utf8')) + line[2:]
+                     for line in current_dir_info[1]])
             # walk until both the directory listing and the versioned metadata
             # are exhausted. TODO: reevaluate this, perhaps we should stop when
             # the versioned data runs out.