[MERGE] lazy inventory writes

Richard Wilbur richard.wilbur at gmail.com
Thu Oct 12 19:18:04 BST 2006


On Thu, 2006-10-05 at 18:01 +1000, Robert Collins wrote:
> WorkingTree currently writes the inventory on nearly every mutation
> operation. This patch adds the facility for us to write it only on
> unlock, and updates set_root_id to use this. This combined with a minor
> change to tree initialisation gives us only one inventory write to disk
> during tree creation.
> 
> Soon to follow, something about lazy inventory reading.
> 
> Rob and John

This looks like a nice start in a good direction.


> === modified file
> 'bzrlib/tests/workingtree_implementations/test_locking.py'
> ---
> bzrlib/tests/workingtree_implementations/test_locking.py    2006-09-27
> 22:12:53 +0000
> +++
> bzrlib/tests/workingtree_implementations/test_locking.py    2006-10-05
> 06:58:02 +0000
> @@ -100,6 +100,52 @@
>          self.assertTrue(wt.branch.is_locked())
>          wt.branch.unlock()
>          
> +    def _test_unlock_with_lock_method(self, methodname):
> +        """Create a tree and then test its unlocking behaviour.
> +
> +        :param methodname: The lock method to use to establish locks.
> +        """
> +        # when unlocking the last lock count from tree_write_lock,
> +        # the tree should do a flush().
> +        # we test that by changing the inventory using set_root_id
> +        tree = self.make_branch_and_tree('tree')
> +        # prepare for a series of changes that will modify the 
> +        # inventory
> +        getattr(tree, methodname)()
> +        # note that we dont have a try:finally here because of two
> reasons:
> +        # firstly there will only be errors reported if the test
> fails, and 
> +        # when it fails thats ok as long as the test suite cleanup
> still works,
> +        # which it will as the lock objects are released (thats wher
> ethe 
                                                                   ^^^^
  ^^^^ expect you meant 'where the'


> +        # warning comes from.  Secondly, its hard in this test to be 
                                            ^^^
contraction for it is == it's (possessive pronoun == its)


> +        # sure that we've got the right interactions between
> try:finally
> +        # and the lock/unlocks we are doing.
> +        getattr(tree, methodname)()
> +        # this should really do something within the public api
> +        # e.g. mkdir('foo') but all the mutating methods at the
> +        # moment trigger inventory writes and thus will not 
> +        # let us trigger a read-when-dirty situation.
> +        old_root = tree.get_root_id()
> +        tree.set_root_id('new-root')
> +        # to detect that the inventory is written by unlock, we
> +        # first check that it was not written yet.
> +        reference_tree = tree.bzrdir.open_workingtree()
> +        self.assertEqual(old_root, reference_tree.get_root_id())
> +        # now unlock the second held lock, which should do nothing.
> +        tree.unlock()
> +        reference_tree = tree.bzrdir.open_workingtree()
> +        self.assertEqual(old_root, reference_tree.get_root_id())
> +        # unlocking the first lock we took will now flush.
> +        tree.unlock()
> +        # and check it was written using another reference tree
> +        reference_tree = tree.bzrdir.open_workingtree()
> +        self.assertEqual('new-root', reference_tree.get_root_id())
> 

The comments in this section contain a good number of well-punctuated
but un-capitalized sentences?  Some other comments amount to phrases
punctuated as sentences, and still others, un-punctuated sentences.

It is a nice working narrative, but it would read more easily with some
help.

> [...]


> === modified file 'bzrlib/workingtree.py'
> --- bzrlib/workingtree.py       2006-09-27 20:27:46 +0000
> +++ bzrlib/workingtree.py       2006-10-05 07:42:06 +0000

> @@ -301,9 +308,14 @@
>              hc.write()
>  
>          if _inventory is None:
> -            self._set_inventory(self.read_working_inventory())
> +            self._inventory_is_modified = False
> +            self.read_working_inventory()
>          else:
> -            self._set_inventory(_inventory)
> +            # the caller of __init__ has provided an inventory,
> +            # we assume they know what they are doing - as its only
> +            # the Format factory and creation methods that are
> +            # permitted to do this.
> +            self._set_inventory(_inventory, dirty=False)
>  
>      branch = property(
>          fget=lambda self: self._branch,
> @@ -324,9 +336,19 @@
>          self._control_files.break_lock()
>          self.branch.break_lock()
>  
> -    def _set_inventory(self, inv):
> +    def _set_inventory(self, inv, dirty):
> +        """Set the internal cached inventory.
> +
> +        :param inv: The inventory to set.
> +        :param dirty: A boolean indicating whether the inventory is
> the same
> +            logical inventory as whats on disk. If True the inventory
> is not
> +            the same and should be written to disk or data will be
> lost, if
> +            False then the inventory is the same as that on disk and
> any
> +            serialisation would be unneeded overhead.
> +        """
>          assert inv.root is not None
>          self._inventory = inv
> +        self._inventory_is_modified = dirty
>  

_set_inventory() parameter 'dirty' is used as a keyword parameter in
several invocations but not declared as one in method definition.  (What
should the default value be?)

[...]
> @@ -843,6 +865,17 @@
>          else:
>              return '?'
>  
> +    def flush(self):
> +        """Write the in memory inventory to disk."""

How about """Write the inventory from memory to disk."""?
Alternatively, """Commit the inventory to disk."""?

> +        # TODO: Maybe this should only write on dirty ?
> +        if self._control_files._lock_mode != 'w':
> +            raise errors.NotWriteLocked(self)
> +        sio = StringIO()
> +        bzrlib.xml5.serializer_v5.write_inventory(self._inventory,
> sio)
> +        sio.seek(0)
> +        self._control_files.put('inventory', sio)
> +        self._inventory_is_modified = False
> +
>      def list_files(self, include_root=False):
>          """Recursively list all files as (path, class, kind, id,
> entry).
>  
[...]
> @@ -1018,7 +1054,7 @@
>                              ["rename rolled back"])
>          except:
>              # restore the inventory on error
> -            self._set_inventory(orig_inv)
> +            self._set_inventory(orig_inv, dirty=original_modified)
>              raise
>          self._write_inventory(inv)
>          return result
> @@ -1464,12 +1500,21 @@
>          
>      @needs_read_lock
>      def read_working_inventory(self):
> -        """Read the working inventory."""
> +        """Read the working inventory.
> +        
> +        :raises errors.InventoryModified: When the current in memory
> +            inventory has been modified, read_working_inventory will
> +            fail.
The phrase "read_working_inventory will fail" seems a good deal less
specific than the goal of the declaration.
If you take the verb "raises" to begin the thought, it could read more
easily as
 :raises errors.InventoryModified: when the current inventory has
uncommitted modifications.

Another possibility,
 :raises errors.InventoryModified: when the inventory currently cached
in memory has uncommitted modifications.


> +        """
> +        # conceptually this should be an implementation detail of the
> tree. 
> +        # XXX: Deprecate this.
>          # ElementTree does its own conversion from UTF-8, so open in
>          # binary.
> +        if self._inventory_is_modified:
> +            raise errors.InventoryModified(self)
>          result = bzrlib.xml5.serializer_v5.read_inventory(
>              self._control_files.get('inventory'))
> -        self._set_inventory(result)
> +        self._set_inventory(result, dirty=False)
>          return result
>  
>      @needs_tree_write_lock
> @@ -1555,16 +1600,31 @@
>      @needs_tree_write_lock
>      def set_root_id(self, file_id):
>          """Set the root id for this tree."""
> -        inv = self.read_working_inventory()
> +        # for compatability 
> +        if file_id is None:
> +            symbol_versioning.warn(symbol_versioning.zero_twelve
> +                % 'WorkingTree.set_root_id with fileid=None',
> +                DeprecationWarning,
> +                stacklevel=3)
> +            file_id = ROOT_ID
> +        inv = self._inventory
>          orig_root_id = inv.root.file_id
> +        # TODO: it might be nice to exit early if there was nothing
> +        # to do, saving us from trigger a sync on unlock.
                                          ^- 'ing'
> +        self._inventory_is_modified = True
> +        # we preserve the root inventory entry object, but
> +        # unlinkit from the byid index
>          del inv._byid[inv.root.file_id]
>          inv.root.file_id = file_id
> +        # and link it into the index with the new changed id.
>          inv._byid[inv.root.file_id] = inv.root
> +        # and finally update all children to reference the new id.
           # Finally, update ...
> +        # XXX: this should be safe to just look at the root.children
           # XXX: It should ...
-or-
           # XXX: In this case, it should ...
> +        # list, not the WHOLE INVENTORY.
>          for fid in inv:
>              entry = inv[fid]
>              if entry.parent_id == orig_root_id:
>                  entry.parent_id = inv.root.file_id
> -        self._write_inventory(inv)
>  
>      def unlock(self):
>          """See Branch.unlock.
> @@ -1667,12 +1727,8 @@
>      @needs_tree_write_lock
>      def _write_inventory(self, inv):
>          """Write inventory as the current inventory."""
           """Set inv as the current inventory and commit to disk."""
-or-
           """Set inv as the current inventory and flush."""
> -        sio = StringIO()
> -        bzrlib.xml5.serializer_v5.write_inventory(inv, sio)
> -        sio.seek(0)
> -        self._control_files.put('inventory', sio)
> -        self._set_inventory(inv)
> -        mutter('wrote working inventory')
> +        self._set_inventory(inv, dirty=True)
> +        self.flush()
>  
>      def set_conflicts(self, arg):
>          raise UnsupportedOperation(self.set_conflicts, self)
[...]
> @@ -2038,8 +2100,10 @@
>                  wt.set_parent_trees([(revision_id, basis_tree)])
>              build_tree(basis_tree, wt)
>          finally:
> +            # unlock in this order so that the unlock-triggers-flush
> in
> +            # WorkingTree is given a chance to fire.
Without the first word of the sentence capitalized, it is natural to
look above in the code in an attempt to find the beginning of the
sentence.  If this is supposed to refer to the method named unlock(),
instead of the common English verb, the parentheses would help.
> 
> +            control_files.unlock()
>              wt.unlock()
> -            control_files.unlock()
>          return wt
>  
>      def __init__(self):

I like where this is going.  Look forward to your comments.

Sincerely,

Richard
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 191 bytes
Desc: This is a digitally signed message part
Url : https://lists.ubuntu.com/archives/bazaar/attachments/20061012/f60cdef7/attachment.pgp 


More information about the bazaar mailing list