Rev 2399: Simplify update_minimal a bit more, by making id_index a in http://bazaar.launchpad.net/%7Ebzr/bzr/dirstate

John Arbash Meinel john at arbash-meinel.com
Sun Feb 25 22:08:51 GMT 2007


At http://bazaar.launchpad.net/%7Ebzr/bzr/dirstate

------------------------------------------------------------
revno: 2399
revision-id: john at arbash-meinel.com-20070225220748-zhn0hzx6zo92vfcz
parent: robertc at robertcollins.net-20070225215219-xrrl2gv0hfw6ijzh
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate
timestamp: Sun 2007-02-25 16:07:48 -0600
message:
  Simplify update_minimal a bit more, by making id_index a
  cached variable, which is updated on mutating operations
  also use this to implement a faster id2path, and add direct tests
  since id2path now uses a more complex implementation.
modified:
  bzrlib/dirstate.py             dirstate.py-20060728012006-d6mvoihjb3je9peu-1
  bzrlib/tests/test_workingtree_4.py test_workingtree_4.p-20070223025758-531n3tznl3zacv2o-1
  bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
-------------- next part --------------
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py	2007-02-25 21:08:22 +0000
+++ b/bzrlib/dirstate.py	2007-02-25 22:07:48 +0000
@@ -281,6 +281,7 @@
         self._state_file = None
         self._filename = path
         self._lock_token = None
+        self._id_index = None
         self._end_of_header = None
         self._bisect_page_size = DirState.BISECT_PAGE_SIZE
 
@@ -1162,7 +1163,8 @@
             assert entry[0][2] and entry[1][tree_index][0] not in ('a', 'r'), 'unversioned entry?!?!'
             if fileid_utf8:
                 if entry[0][2] != fileid_utf8:
-                    raise BzrError('integrity error ? : mismatching tree_index, file_id and path')
+                    raise errors.BzrError('integrity error ? : mismatching'
+                                          ' tree_index, file_id and path')
             return entry
         else:
             for entry in self._iter_entries():
@@ -1255,10 +1257,12 @@
 
     def _get_id_index(self):
         """Get an id index of self._dirblocks."""
-        id_index = {}
-        for key, tree_details in self._iter_entries():
-            id_index.setdefault(key[2], set()).add(key)
-        return id_index
+        if self._id_index is None:
+            id_index = {}
+            for key, tree_details in self._iter_entries():
+                id_index.setdefault(key[2], set()).add(key)
+            self._id_index = id_index
+        return self._id_index
 
     def _get_output_lines(self, lines):
         """format lines for final output.
@@ -1506,9 +1510,8 @@
         entry = self._get_entry(0, path_utf8=path)
         # mark the old path absent, and insert a new root path
         self._make_absent(entry)
-        id_index = self._get_id_index()
         self.update_minimal(('', '', new_id), 'd',
-            path_utf8='', id_index=id_index, packed_stat=entry[1][0][4])
+            path_utf8='', packed_stat=entry[1][0][4])
         self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
     def set_parent_trees(self, trees, ghosts):
@@ -1649,6 +1652,7 @@
         self._ghosts = list(ghosts)
         self._header_state = DirState.IN_MEMORY_MODIFIED
         self._dirblock_state = DirState.IN_MEMORY_MODIFIED
+        self._id_index = id_index
 
     def set_state_from_inventory(self, new_inv):
         """Set new_inv as the current state. 
@@ -1660,9 +1664,6 @@
         """
         self._read_dirblocks_if_needed()
         # sketch:
-        #  generate a byid index of the dirstate
-        id_index = self._get_id_index()
-
         # incremental algorithm:
         # two iterators: current data and new data, both in dirblock order. 
         new_iterator = new_inv.iter_entries_by_dir()
@@ -1701,11 +1702,11 @@
                 # old is finished: insert current_new into the state.
                 self.update_minimal(new_entry_key, current_new_minikind,
                     executable=current_new[1].executable,
-                    id_index=id_index, path_utf8=new_path_utf8)
+                    path_utf8=new_path_utf8)
                 current_new = advance(new_iterator)
             elif not current_new:
                 # new is finished
-                self._make_absent(current_old, id_index)
+                self._make_absent(current_old)
                 current_old = advance(old_iterator)
             elif new_entry_key == current_old[0]:
                 # same -  common case
@@ -1716,7 +1717,7 @@
                     current_old[1][0][0] != current_new_minikind):
                     self.update_minimal(current_old[0], current_new_minikind,
                         executable=current_new[1].executable,
-                        id_index=id_index, path_utf8=new_path_utf8)
+                        path_utf8=new_path_utf8)
                 # both sides are dealt with, move on
                 current_old = advance(old_iterator)
                 current_new = advance(new_iterator)
@@ -1725,19 +1726,17 @@
                 # add a entry for this and advance new
                 self.update_minimal(new_entry_key, current_new_minikind,
                     executable=current_new[1].executable,
-                    id_index=id_index, path_utf8=new_path_utf8)
+                    path_utf8=new_path_utf8)
                 current_new = advance(new_iterator)
             else:
                 # old comes before:
-                self._make_absent(current_old, id_index)
+                self._make_absent(current_old)
                 current_old = advance(old_iterator)
         self._dirblock_state = DirState.IN_MEMORY_MODIFIED
 
-    def _make_absent(self, current_old, id_index=None):
+    def _make_absent(self, current_old):
         """Mark current_old - an entry - as absent for tree 0.
 
-        :param id_index: An index from fileid_utf8 to sets of keys, used by
-            some functions. If provided it will be updated if needed.
         :return: True if this was the last details entry for they entry key:
             that is, if the underlying block has had the entry removed, thus
             shrinking in length.
@@ -1763,8 +1762,8 @@
             assert present
             block[1].pop(entry_index)
             # if we have an id_index in use, remove this key from it for this id.
-            if id_index is not None:
-                id_index[current_old[0][2]].remove(current_old[0])
+            if self._id_index is not None:
+                self._id_index[current_old[0][2]].remove(current_old[0])
         # update all remaining keys for this id to record it as absent. The
         # existing details may either be the record we are making as deleted
         # (if there were other trees with the id present at this path), or may
@@ -1784,7 +1783,7 @@
         return last_reference
 
     def update_minimal(self, key, minikind, executable=False, fingerprint='',
-                       packed_stat=None, size=0, id_index=None, path_utf8=None):
+                       packed_stat=None, size=0, path_utf8=None):
         """Update an entry to the state in tree 0.
 
         This will either create a new entry at 'key' or update an existing one.
@@ -1798,8 +1797,6 @@
         :param fingerprint: Simple fingerprint for new entry.
         :param packed_stat: packed stat value for new entry.
         :param size: Size information for new entry
-        :param id_index: A mapping from file_id => key, as returned by
-                self._get_id_index
         :param path_utf8: key[0] + '/' + key[1], just passed in to avoid doing
                 extra computation.
         """
@@ -1808,7 +1805,7 @@
             packed_stat = DirState.NULLSTAT
         entry_index, present = self._find_entry_index(key, block)
         new_details = (minikind, fingerprint, size, executable, packed_stat)
-        assert id_index is not None, 'need an id index to do updates for now !'
+        id_index = self._get_id_index()
         if not present:
             # new entry, synthesis cross reference here,
             existing_keys = id_index.setdefault(key[2], set())

=== modified file 'bzrlib/tests/test_workingtree_4.py'
--- a/bzrlib/tests/test_workingtree_4.py	2007-02-23 05:12:06 +0000
+++ b/bzrlib/tests/test_workingtree_4.py	2007-02-25 22:07:48 +0000
@@ -418,3 +418,19 @@
         tree.unlock()
         second_parent_tree.unlock()
         self.assertIsInstance(optimiser, workingtree_4.InterDirStateTree)
+
+    def test_id2path(self):
+        tree = self.make_workingtree('tree')
+        self.build_tree(['tree/a'])
+        tree.add(['a'], ['a-id'])
+        self.assertEqual(u'a', tree.id2path('a-id'))
+        self.assertIs(None, tree.id2path('a'))
+        tree.commit('a')
+
+        tree.rename_one('a', u'b\xb5rry')
+        self.assertEqual(u'b\xb5rry', tree.id2path('a-id'))
+        tree.commit(u'b\xb5rry')
+        tree.unversion(['a-id'])
+        self.assertEqual(None, tree.id2path('a-id'))
+        self.assertEqual(None, tree.id2path('b-id'))
+

=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py	2007-02-25 21:08:22 +0000
+++ b/bzrlib/workingtree_4.py	2007-02-25 22:07:48 +0000
@@ -342,18 +342,22 @@
                     self.basedir, row[0].decode('utf8'), row[1].decode('utf8')))
 
     @needs_read_lock
-    def id2path(self, fileid):
-        fileid = osutils.safe_file_id(fileid)
-        inv = self._get_inventory()
-        return inv.id2path(fileid)
-        # TODO: jam 20070222 At present dirstate is very slow at id => path,
-        #       while inventory is very fast at it. So for now, just generate
-        #       the inventory and do the id => path check.
-        #       In the future, we want to make dirstate better at id=>path
-        #       checks so that we don't have to create the inventory.
-        # state = self.current_dirstate()
-        # key, tree_details = state._get_entry(0, fileid_utf8=fileid)
-        # return os.path.join(*key[0:2]).decode('utf8')
+    def id2path(self, file_id):
+        file_id = osutils.safe_file_id(file_id)
+        state = self.current_dirstate()
+        possible_dir_name_ids = state._get_id_index().get(file_id, None)
+        if not possible_dir_name_ids:
+            return None
+        for dir_name_id in possible_dir_name_ids:
+            (block_index, entry_index, dir_present,
+             file_present) = state._get_block_entry_index(dir_name_id[0],
+                                                          dir_name_id[1], 0)
+            if file_present:
+                entry = state._dirblocks[block_index][1][entry_index]
+                assert entry[1][0][0] not in ('a', 'r')
+                path_utf8 = osutils.pathjoin(entry[0][0], entry[0][1])
+                return path_utf8.decode('utf8')
+        return None
 
     @needs_read_lock
     def __iter__(self):
@@ -536,7 +540,6 @@
                         fingerprint=old_entry_details[0][1],
                         packed_stat=old_entry_details[0][4],
                         size=old_entry_details[0][2],
-                        id_index=state._get_id_index(),
                         path_utf8=from_rel.encode('utf8')))
                 # create new row in current block
                 state.update_minimal(to_key,
@@ -545,7 +548,6 @@
                         fingerprint=old_entry_details[0][1],
                         packed_stat=old_entry_details[0][4],
                         size=old_entry_details[0][2],
-                        id_index=state._get_id_index(),
                         path_utf8=to_rel.encode('utf8'))
                 added_entry_index, _ = state._find_entry_index(to_key, to_block[1])
                 new_entry = to_block[1][added_entry_index]



More information about the bazaar-commits mailing list