Rev 2343: Significant steps back to operation. in sftp://bazaar.launchpad.net/%7Ebzr/bzr/dirstate/

Robert Collins robertc at robertcollins.net
Tue Feb 20 20:05:52 GMT 2007


At sftp://bazaar.launchpad.net/%7Ebzr/bzr/dirstate/

------------------------------------------------------------
revno: 2343
revision-id: robertc at robertcollins.net-20070220200447-pvv3g67iyk2vro42
parent: robertc at robertcollins.net-20070220084410-3msfwqdxfnvi0qa0
committer: Robert Collins <robertc at robertcollins.net>
branch nick: dirstate
timestamp: Wed 2007-02-21 07:04:47 +1100
message:
  Significant steps back to operation.
modified:
  bzrlib/dirstate.py             dirstate.py-20060728012006-d6mvoihjb3je9peu-1
  bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py	2007-02-20 08:44:10 +0000
+++ b/bzrlib/dirstate.py	2007-02-20 20:04:47 +0000
@@ -459,9 +459,9 @@
 
         :return: The block index, True if the block for the key is present.
         """
-        block_index = bisect.bisect_left(self._dirblocks, (dirname, []))
+        block_index = bisect.bisect_left(self._dirblocks, (key[0], []))
         present = (block_index < len(self._dirblocks) and
-            self._dirblocks[block_index][0] == dirname)
+            self._dirblocks[block_index][0] == key[0])
         return block_index, present
 
     def _find_dirblock_index(self, dirname):
@@ -1183,84 +1183,113 @@
         # incremental algorithm:
         # two iterators: current data and new data, both in dirblock order. 
         new_iterator = new_inv.iter_entries_by_dir()
-        # we will be modifying the dirstate, so we need a stable iterator. In future we might write one, for now we just clone the state into a list
+        # we will be modifying the dirstate, so we need a stable iterator. In
+        # future we might write one, for now we just clone the state into a
+        # list - which is a shallow copy, so each 
         old_iterator = iter(list(self._iter_entries()))
         # both must have roots so this is safe:
         current_new = new_iterator.next()
         current_old = old_iterator.next()
         def advance(iterator):
             try:
-                return old_iterator.next()
+                return iterator.next()
             except StopIteration:
                 return None
         while current_new or current_old:
             # skip entries in old that are not really there
-            if current_old and current_old[1][0] in ('relocated', 'absent'):
+            if current_old and current_old[1][0][0] in ('relocated', 'absent'):
                 current_old = advance(old_iterator)
                 continue
             if current_new:
                 # convert new into dirblock style
-                new_dirname, new_basename = os.path.split(current_new[0].encode('utf8'))
+                new_path_utf8 = current_new[0].encode('utf8')
+                new_dirname, new_basename = os.path.split(new_path_utf8)
                 new_id = current_new[1].file_id.encode('utf8')
                 new_entry_key = (new_dirname, new_basename, new_id)
+            else:
+                # for safety disable variables
+                new_path_utf8 = new_dirname = new_basename = new_id = new_entry_key = None
             # 5 cases, we dont have a value that is strictly greater than everything, so
             # we make both end conditions explicit
             if not current_old:
                 # old is finished: insert current_new into the state.
                 self.update_minimal(new_entry_key, current_new[1].kind,
-                    executable=current_new[1].executable, id_index=id_index)
+                    num_present_parents, executable=current_new[1].executable,
+                    id_index=id_index, path_utf8=new_path_utf8)
                 current_new = advance(new_iterator)
             elif not current_new:
                 # new is finished
-                import pdb;pdb.set_trace()
+                self._make_absent(num_present_parents, current_old, id_index)
+                current_old = advance(old_iterator)
             elif new_entry_key == current_old[0]:
                 # same -  common case
                 # TODO: update the record if anything significant has changed.
-                # the minimal required trigger is if the execute bit has
-                # changed.
-                if current_old[1][0][3] != current_new[1].executable:
-                    import pdb;pdb.set_trace()
+                # the minimal required trigger is if the execute bit or cached
+                # kind has changed.
+                if (current_old[1][0][3] != current_new[1].executable or
+                    current_old[1][0][0] != current_new[1].kind):
+                    self.update_minimal(current_old[0], current_new[1].kind,
+                        num_present_parents,
+                        executable=current_new[1].executable,
+                        id_index=id_index, path_utf8=new_path_utf8)
                 # both sides are dealt with, move on
                 current_old = advance(old_iterator)
                 current_new = advance(new_iterator)
             elif new_entry_key < current_old[0]:
-                # new comes before 
-                import pdb;pdb.set_trace()
+                # new comes before:
+                # add a entry for this and advance new
+                self.update_minimal(new_entry_key, current_new[1].kind,
+                    num_present_parents, executable=current_new[1].executable,
+                    id_index=id_index, path_utf8=new_path_utf8)
+                current_new = advance(new_iterator)
             else:
                 # old comes before:
-                # remove old from the state, advance old
-                # to remove old, we have two conditions.
-                # either its the last reference to this path that we are
-                # removing, or its not. If its the last reference, we remove
-                # the entire row and remove the path from the id mapping. If
-                # its not the last reference, we just set it to absent.
-                last_reference = True
-                for lookup_index in xrange(1, num_present_parents + 1):
-                    if current_old[1][lookup_index] not in ('absent', 'relocated'):
-                        last_reference = False
-                        break
-                if not last_reference:
-                    import pdb;pdb.set_trace()
-                    # common case, theres a parent at this path
-                    current_old[1][0] = DirState.NULL_PARENT_DETAILS
-                else:
-                    # there are no more references at this path
-                    id_index[current_old[0][2]].remove(current_old[0])
-                    # are there others (which will need to be changed
-                    # from relocated to absent for index 0)?
-                    if len(id_index[current_old[0][2]]):
-                        import pdb;pdb.set_trace()
-                    block = self._find_block(current_old[0])
-                    entry_index, present = self._find_entry_index(key, block)
-                    assert present
-                    block.pop(entry_index)
+                self._make_absent(num_present_parents, current_old, id_index)
                 current_old = advance(old_iterator)
         self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
-    def update_minimal(self, key, kind, executable=False, fingerprint='',
-        packed_stat=None, size=0, id_index=None):
+    def _make_absent(self, num_present_parents, current_old, id_index):
+        # remove old from the state, advance old
+        # to remove old, we have two conditions:
+        # either its the last reference to this path that we are
+        # removing, or its not. If its the last reference, we remove
+        # the entire row and remove the path from the id mapping. If
+        # its not the last reference, we just set it to absent.
+        last_reference = True
+        for lookup_index in xrange(1, num_present_parents + 1):
+            if current_old[1][lookup_index] not in ('absent', 'relocated'):
+                last_reference = False
+                break
+        if not last_reference:
+            # common case, theres a parent at this path
+            current_old[1][0] = DirState.NULL_PARENT_DETAILS
+        else:
+            # there are no more references at this path
+            id_index[current_old[0][2]].remove(current_old[0])
+            # are there others (which will need to be changed
+            # from relocated to absent for index 0)?
+            for update_key in id_index[current_old[0][2]]:
+                # update the entry for 0 to say absent: there is a parent at
+                # that path, but nothing in this tree for that file id anymore.
+                update_block_index, present = \
+                    self._find_block_index_from_key(update_key)
+                assert present
+                update_entry_index, present = \
+                    self._find_entry_index(update_key, self._dirblocks[update_block_index][1])
+                assert present
+                update_tree_details = self._dirblocks[update_block_index][1][update_entry_index][1]
+                # it must currently be relocated
+                assert update_tree_details[0][0] == 'relocated'
+                update_tree_details[0] = DirState.NULL_PARENT_DETAILS
+            block = self._find_block(current_old[0])
+            entry_index, present = self._find_entry_index(current_old[0], block[1])
+            assert present
+            block[1].pop(entry_index)
+
+    def update_minimal(self, key, kind, num_present_parents, executable=False,
+        fingerprint='', packed_stat=None, size=0, id_index=None,
+        path_utf8=None):
         """Update an entry to the state in tree 0."""
-        self._read_dirblocks_if_needed()
         if key[0:2] == ('', ''):
             block = self._root_entries
         else:
@@ -1269,22 +1298,82 @@
             packed_stat = DirState.NULLSTAT
         entry_index, present = self._find_entry_index(key, block)
         new_details = (kind, fingerprint, size, executable, packed_stat)
+        assert id_index, 'need an id index to do updates for now !'
         if not present:
             # new entry, synthesis cross reference here,
-            assert id_index, 'need an id index to generate a new entry'
-            existing_keys = id_index.setdefault(key, set())
+            existing_keys = id_index.setdefault(key[2], set())
             if not existing_keys:
                 # not currently in the state, simplest case
                 new_entry = key, [new_details] + self._empty_parent_info()
             else:
-                import pdb;pdb.set_trace()
+                # present at one or more existing other paths.
+                # grab one of them and use it to generate parent
+                # relocation/absent entries.
+                new_entry = key, [new_details]
+                for other_key in existing_keys:
+                    # change the record at other to be a pointer to this new
+                    # record. The loop looks similar to the change to
+                    # relocations when updating an existing record but its not:
+                    # the test for existing kinds is different: this can be
+                    # factored out to a helper though.
+                    other_block_index, present = self._find_block_index_from_key(other_key)
+                    assert present
+                    other_entry_index, present = self._find_entry_index(other_key, self._dirblocks[other_block_index][1])
+                    assert present
+                    self._dirblocks[other_block_index][1][other_entry_index][1][0] = \
+                        ('relocated', path_utf8, 0, False, '')
 
+                for lookup_index in xrange(1, num_present_parents + 1):
+                    # grab any one entry, use it to find the right path.
+                    # TODO: optimise this to reduce memory use in highly 
+                    # fragmented situations by reusing the relocation
+                    # records.
+                    update_block_index, present = \
+                        self._find_block_index_from_key(other_key)
+                    assert present
+                    update_entry_index, present = \
+                        self._find_entry_index(other_key, self._dirblocks[update_block_index][1])
+                    assert present
+                    update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index]
+                    if update_details[0] in ('relocated', 'absent'):
+                        # its a pointer or absent in lookup_index's tree, use
+                        # it as is.
+                        new_entry[1].append(update_details)
+                    else:
+                        # we have the right key, make a pointer to it.
+                        pointer_path = os.path.join(other_key[0:2])
+                        new_details.append(('relocated', pointer_path, 0, False, ''))
             block.insert(entry_index, new_entry)
             existing_keys.add(key)
         else:
             # Does the new state matter? 
-            import pdb;pdb.set_trace()
             block[entry_index][1][0] = new_details
+            # parents cannot be affected by what we do.
+            # other occurences of this id can be found 
+            # from the id index.
+            # ---
+            # tree index consistency: All other paths for this id in this tree
+            # index must point to the correct path. We have to loop here because
+            # we may have passed entries in the state with this file id already
+            # that were absent - where parent entries are - and they need to be
+            # converted to relocated.
+            assert path_utf8 is not None
+            for entry_key in id_index.setdefault(key[2], set()):
+                # TODO:PROFILING: It might be faster to just update
+                # rather than checking if we need to, and then overwrite
+                # the one we are located at.
+                if entry_key != key:
+                    # this file id is at a different path in one of the
+                    # other trees, so put absent pointers there
+                    # This is the vertical axis in the matrix, all pointing
+                    # to the real path.
+                    block_index, present = self._find_block_index_from_key(entry_key)
+                    assert present
+                    entry_index, present = self._find_entry_index(entry_key, self._dirblocks[block_index][1])
+                    assert present
+                    self._dirblocks[block_index][1][entry_index][1][0] = \
+                        ('relocated', path_utf8, 0, False, '')
+
         self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
 

=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py	2007-02-20 08:44:10 +0000
+++ b/bzrlib/workingtree_4.py	2007-02-20 20:04:47 +0000
@@ -282,15 +282,13 @@
         return state._get_entry(0, fileid_utf8=file_id, path_utf8=path)
 
     def get_file_sha1(self, file_id, path=None, stat_value=None):
-        #if not path:
-        #    path = self.inventory.id2path(file_id)
-        #    # now lookup row by path
-        row, parents = self._get_row(file_id=file_id, path=path)
-        assert row is not None, 'what error should this raise'
+        # check file id is valid unconditionally.
+        entry, parents = self._get_entry(file_id=file_id, path=path)
+        assert entry is not None, 'what error should this raise'
         # TODO:
         # if row stat is valid, use cached sha1, else, get a new sha1.
         if path is None:
-            path = (row[0] + '/' + row[1]).strip('/').decode('utf8')
+            path = os.path.join(entry[0][0:2]).decode('utf8')
         return self._hashcache.get_sha1(path, stat_value)
 
     def _get_inventory(self):
@@ -329,9 +327,8 @@
     def id2path(self, fileid):
         state = self.current_dirstate()
         fileid_utf8 = fileid.encode('utf8')
-        for row, parents in state._iter_rows():
-            if row[3] == fileid_utf8:
-                return (row[0] + '/' + row[1]).decode('utf8').strip('/')
+        key, tree_details = state._get_entry(0, fileid_utf8=fileid_utf8)
+        return os.path.join(*key[0:2]).decode('utf8')
 
     @needs_read_lock
     def __iter__(self):
@@ -341,12 +338,13 @@
         and the working file exists.
         """
         result = []
-        for row, parents in self.current_dirstate()._iter_rows():
-            if row[0] == '/':
+        for key, tree_details in self.current_dirstate()._iter_entries():
+            if tree_details[0][0] in ('absent', 'relocated'):
+                # not relevant to the working tree
                 continue
-            path = pathjoin(self.basedir, row[0].decode('utf8'), row[1].decode('utf8'))
+            path = pathjoin(self.basedir, key[0].decode('utf8'), key[1].decode('utf8'))
             if osutils.lexists(path):
-                result.append(row[3].decode('utf8'))
+                result.append(key[2].decode('utf8'))
         return iter(result)
 
     @needs_read_lock
@@ -522,11 +520,10 @@
     @needs_read_lock
     def path2id(self, path):
         """Return the id for path in this tree."""
-        state = self.current_dirstate()
-        row = self._get_row(path=path)
-        if row == (None, None):
+        entry = self._get_entry(path=path)
+        if entry == (None, None):
             return None
-        return row[0][3].decode('utf8')
+        return entry[0][2].decode('utf8')
 
     def read_working_inventory(self):
         """Read the working inventory.



More information about the bazaar-commits mailing list