Rev 2399: Simplify update_minimal a bit more, by making id_index a in http://bazaar.launchpad.net/%7Ebzr/bzr/dirstate
John Arbash Meinel
john at arbash-meinel.com
Sun Feb 25 22:08:51 GMT 2007
At http://bazaar.launchpad.net/%7Ebzr/bzr/dirstate
------------------------------------------------------------
revno: 2399
revision-id: john at arbash-meinel.com-20070225220748-zhn0hzx6zo92vfcz
parent: robertc at robertcollins.net-20070225215219-xrrl2gv0hfw6ijzh
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate
timestamp: Sun 2007-02-25 16:07:48 -0600
message:
Simplify update_minimal a bit more, by making id_index a
cached variable, which is updated on mutating operations
also use this to implement a faster id2path, and add direct tests
since id2path now uses a more complex implementation.
modified:
bzrlib/dirstate.py dirstate.py-20060728012006-d6mvoihjb3je9peu-1
bzrlib/tests/test_workingtree_4.py test_workingtree_4.p-20070223025758-531n3tznl3zacv2o-1
bzrlib/workingtree_4.py workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
-------------- next part --------------
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py 2007-02-25 21:08:22 +0000
+++ b/bzrlib/dirstate.py 2007-02-25 22:07:48 +0000
@@ -281,6 +281,7 @@
self._state_file = None
self._filename = path
self._lock_token = None
+ self._id_index = None
self._end_of_header = None
self._bisect_page_size = DirState.BISECT_PAGE_SIZE
@@ -1162,7 +1163,8 @@
assert entry[0][2] and entry[1][tree_index][0] not in ('a', 'r'), 'unversioned entry?!?!'
if fileid_utf8:
if entry[0][2] != fileid_utf8:
- raise BzrError('integrity error ? : mismatching tree_index, file_id and path')
+ raise errors.BzrError('integrity error ? : mismatching'
+ ' tree_index, file_id and path')
return entry
else:
for entry in self._iter_entries():
@@ -1255,10 +1257,12 @@
def _get_id_index(self):
"""Get an id index of self._dirblocks."""
- id_index = {}
- for key, tree_details in self._iter_entries():
- id_index.setdefault(key[2], set()).add(key)
- return id_index
+ if self._id_index is None:
+ id_index = {}
+ for key, tree_details in self._iter_entries():
+ id_index.setdefault(key[2], set()).add(key)
+ self._id_index = id_index
+ return self._id_index
def _get_output_lines(self, lines):
"""format lines for final output.
@@ -1506,9 +1510,8 @@
entry = self._get_entry(0, path_utf8=path)
# mark the old path absent, and insert a new root path
self._make_absent(entry)
- id_index = self._get_id_index()
self.update_minimal(('', '', new_id), 'd',
- path_utf8='', id_index=id_index, packed_stat=entry[1][0][4])
+ path_utf8='', packed_stat=entry[1][0][4])
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
def set_parent_trees(self, trees, ghosts):
@@ -1649,6 +1652,7 @@
self._ghosts = list(ghosts)
self._header_state = DirState.IN_MEMORY_MODIFIED
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
+ self._id_index = id_index
def set_state_from_inventory(self, new_inv):
"""Set new_inv as the current state.
@@ -1660,9 +1664,6 @@
"""
self._read_dirblocks_if_needed()
# sketch:
- # generate a byid index of the dirstate
- id_index = self._get_id_index()
-
# incremental algorithm:
# two iterators: current data and new data, both in dirblock order.
new_iterator = new_inv.iter_entries_by_dir()
@@ -1701,11 +1702,11 @@
# old is finished: insert current_new into the state.
self.update_minimal(new_entry_key, current_new_minikind,
executable=current_new[1].executable,
- id_index=id_index, path_utf8=new_path_utf8)
+ path_utf8=new_path_utf8)
current_new = advance(new_iterator)
elif not current_new:
# new is finished
- self._make_absent(current_old, id_index)
+ self._make_absent(current_old)
current_old = advance(old_iterator)
elif new_entry_key == current_old[0]:
# same - common case
@@ -1716,7 +1717,7 @@
current_old[1][0][0] != current_new_minikind):
self.update_minimal(current_old[0], current_new_minikind,
executable=current_new[1].executable,
- id_index=id_index, path_utf8=new_path_utf8)
+ path_utf8=new_path_utf8)
# both sides are dealt with, move on
current_old = advance(old_iterator)
current_new = advance(new_iterator)
@@ -1725,19 +1726,17 @@
# add a entry for this and advance new
self.update_minimal(new_entry_key, current_new_minikind,
executable=current_new[1].executable,
- id_index=id_index, path_utf8=new_path_utf8)
+ path_utf8=new_path_utf8)
current_new = advance(new_iterator)
else:
# old comes before:
- self._make_absent(current_old, id_index)
+ self._make_absent(current_old)
current_old = advance(old_iterator)
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
- def _make_absent(self, current_old, id_index=None):
+ def _make_absent(self, current_old):
"""Mark current_old - an entry - as absent for tree 0.
- :param id_index: An index from fileid_utf8 to sets of keys, used by
- some functions. If provided it will be updated if needed.
:return: True if this was the last details entry for they entry key:
that is, if the underlying block has had the entry removed, thus
shrinking in length.
@@ -1763,8 +1762,8 @@
assert present
block[1].pop(entry_index)
# if we have an id_index in use, remove this key from it for this id.
- if id_index is not None:
- id_index[current_old[0][2]].remove(current_old[0])
+ if self._id_index is not None:
+ self._id_index[current_old[0][2]].remove(current_old[0])
# update all remaining keys for this id to record it as absent. The
# existing details may either be the record we are making as deleted
# (if there were other trees with the id present at this path), or may
@@ -1784,7 +1783,7 @@
return last_reference
def update_minimal(self, key, minikind, executable=False, fingerprint='',
- packed_stat=None, size=0, id_index=None, path_utf8=None):
+ packed_stat=None, size=0, path_utf8=None):
"""Update an entry to the state in tree 0.
This will either create a new entry at 'key' or update an existing one.
@@ -1798,8 +1797,6 @@
:param fingerprint: Simple fingerprint for new entry.
:param packed_stat: packed stat value for new entry.
:param size: Size information for new entry
- :param id_index: A mapping from file_id => key, as returned by
- self._get_id_index
:param path_utf8: key[0] + '/' + key[1], just passed in to avoid doing
extra computation.
"""
@@ -1808,7 +1805,7 @@
packed_stat = DirState.NULLSTAT
entry_index, present = self._find_entry_index(key, block)
new_details = (minikind, fingerprint, size, executable, packed_stat)
- assert id_index is not None, 'need an id index to do updates for now !'
+ id_index = self._get_id_index()
if not present:
# new entry, synthesis cross reference here,
existing_keys = id_index.setdefault(key[2], set())
=== modified file 'bzrlib/tests/test_workingtree_4.py'
--- a/bzrlib/tests/test_workingtree_4.py 2007-02-23 05:12:06 +0000
+++ b/bzrlib/tests/test_workingtree_4.py 2007-02-25 22:07:48 +0000
@@ -418,3 +418,19 @@
tree.unlock()
second_parent_tree.unlock()
self.assertIsInstance(optimiser, workingtree_4.InterDirStateTree)
+
+ def test_id2path(self):
+ tree = self.make_workingtree('tree')
+ self.build_tree(['tree/a'])
+ tree.add(['a'], ['a-id'])
+ self.assertEqual(u'a', tree.id2path('a-id'))
+ self.assertIs(None, tree.id2path('a'))
+ tree.commit('a')
+
+ tree.rename_one('a', u'b\xb5rry')
+ self.assertEqual(u'b\xb5rry', tree.id2path('a-id'))
+ tree.commit(u'b\xb5rry')
+ tree.unversion(['a-id'])
+ self.assertEqual(None, tree.id2path('a-id'))
+ self.assertEqual(None, tree.id2path('b-id'))
+
=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py 2007-02-25 21:08:22 +0000
+++ b/bzrlib/workingtree_4.py 2007-02-25 22:07:48 +0000
@@ -342,18 +342,22 @@
self.basedir, row[0].decode('utf8'), row[1].decode('utf8')))
@needs_read_lock
- def id2path(self, fileid):
- fileid = osutils.safe_file_id(fileid)
- inv = self._get_inventory()
- return inv.id2path(fileid)
- # TODO: jam 20070222 At present dirstate is very slow at id => path,
- # while inventory is very fast at it. So for now, just generate
- # the inventory and do the id => path check.
- # In the future, we want to make dirstate better at id=>path
- # checks so that we don't have to create the inventory.
- # state = self.current_dirstate()
- # key, tree_details = state._get_entry(0, fileid_utf8=fileid)
- # return os.path.join(*key[0:2]).decode('utf8')
+ def id2path(self, file_id):
+ file_id = osutils.safe_file_id(file_id)
+ state = self.current_dirstate()
+ possible_dir_name_ids = state._get_id_index().get(file_id, None)
+ if not possible_dir_name_ids:
+ return None
+ for dir_name_id in possible_dir_name_ids:
+ (block_index, entry_index, dir_present,
+ file_present) = state._get_block_entry_index(dir_name_id[0],
+ dir_name_id[1], 0)
+ if file_present:
+ entry = state._dirblocks[block_index][1][entry_index]
+ assert entry[1][0][0] not in ('a', 'r')
+ path_utf8 = osutils.pathjoin(entry[0][0], entry[0][1])
+ return path_utf8.decode('utf8')
+ return None
@needs_read_lock
def __iter__(self):
@@ -536,7 +540,6 @@
fingerprint=old_entry_details[0][1],
packed_stat=old_entry_details[0][4],
size=old_entry_details[0][2],
- id_index=state._get_id_index(),
path_utf8=from_rel.encode('utf8')))
# create new row in current block
state.update_minimal(to_key,
@@ -545,7 +548,6 @@
fingerprint=old_entry_details[0][1],
packed_stat=old_entry_details[0][4],
size=old_entry_details[0][2],
- id_index=state._get_id_index(),
path_utf8=to_rel.encode('utf8'))
added_entry_index, _ = state._find_entry_index(to_key, to_block[1])
new_entry = to_block[1][added_entry_index]
More information about the bazaar-commits
mailing list