[MERGE] Unique roots for bzr

John Arbash Meinel john at arbash-meinel.com
Sun Oct 8 07:55:24 BST 2006


-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Aaron Bentley wrote:
> Hi all,
> 
> This patch implements unique roots for new trees.  It is based my
> original nested-trees work in May/June, which I have kept up-to-date
> with current bzr over the past months.
> 
> It's intended to have the most minimal effect on commandline behaviour.
>  The root is added as part of 'bzr init', so that you don't need to run
> 'bzr add .' in order to do an empty commit.
> 
> 
> It also has no root for the empty tree.  This is still a bone of
> contention between me and Robert.  Martin expressed a slight preference
> for this approach, while John said "whatever makes the code cleanest".
> 
> I believe it's cleanest to special case just the 'add' of the tree root,
> e.g. in transform.build_tree.  If the empty tree had a root, we would
> have to handle the fact that the first commit deletes the existing root
> and creates a new one.
> 
> So while support for rootless empty trees is by no means strong, it
> seems to be a slight majority, and I'm therefore leaving the code as is.
> 
> Aaron

v- What are these changes for? It seems like we should have only 1 api
for creating an inventory. And if anything this would have come from
before when bundles were a plugin, and so they had to conform to
multiple apis.

It would be better to just clean this up.

- -        root_id = base_inv.root.file_id
         try:
- -            # New inventories have a unique root_id
- -            inv = Inventory(root_id, self.revision_id)
+            inv = Inventory(None, self.revision_id)
         except TypeError:
             inv = Inventory(revision_id=self.revision_id)
- -        inv.root.revision = self.get_last_changed(root_id)

         def add_entry(file_id):
             path = self.id2path(file_id)
             if path is None:
                 return
- -            parent_path = dirname(path)
- -            if parent_path == u'':
- -                parent_id = root_id
+            if path == '':
+                parent_id = None
             else:
+                parent_path = dirname(path)
                 parent_id = self.path2id(parent_path)

             kind = self.get_kind(file_id)
@@ -690,8 +687,6 @@

         sorted_entries = self.sorted_path_id()
         for path, file_id in sorted_entries:
- -            if file_id == inv.root.file_id:
- -                continue
             add_entry(file_id)

         return inv

=== modified file 'bzrlib/bzrdir.py'
- --- bzrlib/bzrdir.py	2006-09-29 13:09:33 +0000
+++ bzrlib/bzrdir.py	2006-10-02 12:58:41 +0000
@@ -680,7 +680,12 @@
         # TODO: jam 20060426 we probably need a test in here in the
         #       case that the newly sprouted branch is a remote one
         if result_repo is None or result_repo.make_working_trees():
- -            result.create_workingtree()
+            wt = result.create_workingtree()
+            if wt.inventory.root is None:
+                try:
+                    wt.set_root_id(self.open_workingtree.get_root_id())
+                except errors.NoWorkingTree:
+                    pass
         return result

^- What is this doing? It certainly seems like it is doing something we
want to avoid. (Namely, creating a new disk file, and then reading it
back in). If this is necessary, then I think we have a problem elsewhere.


v- How can you get a root entry after you just skipped the first entry?

@@ -1650,6 +1655,8 @@
             entries = inv.iter_entries()
             entries.next()
             for path, ie in entries:
+                if inv.is_root(ie.file_id):
+                    continue
                 assert getattr(ie, 'revision', None) is not None, \
                     'no revision on {%s} in {%s}' % \
                     (file_id, rev.revision_id)
@@ -1668,9 +1675,10 @@
         mutter('converting texts of revision {%s}',
                rev_id)
         parent_invs = map(self._load_updated_inventory, present_parents)


v- And why here are you not skipping the first one, but you skip the
first one in other contexts? I know the overhead of a function call for
each file is more than a single skip, but I think you mentioned
something about 'is_root()' being able to return True for nested trees,
or something like that.

- -        entries = inv.iter_entries()
- -        entries.next()
- -        for path, ie in entries:
+        for file_id in inv:
+            if inv.is_root(file_id):
+                continue
+            ie = inv[file_id]
             self._convert_file_version(rev, ie, parent_invs)

     def _convert_file_version(self, rev, ie, parent_invs):

...


v- This looks like a change that was done for debugging that should be
reverted.
- -
- -            self.builder.record_entry_contents(ie, self.parent_invs,
- -                path, self.work_tree)
+            try:
+                self.builder.record_entry_contents(ie, self.parent_invs,
+                    path, self.work_tree)
+            except:
+                raise
+                raise repr((self.builder.new_inventory.root,
+                self.work_inv.root, ie))
             # describe the nature of the change that has occurred
relative to
             # the basis inventory.
             if (self.basis_inv.has_id(ie.file_id)):



...
v- Why are you updating a deprecated function? I can see updating the
call to changes_from, but it doesn't seem like you want to update the
compare_trees api.

 @deprecated_function(zero_nine)
 def compare_trees(old_tree, new_tree, want_unchanged=False,
                   specific_files=None, extra_trees=None,
- -                  require_versioned=False):
+                  require_versioned=False, include_root=False):
     """compare_trees was deprecated in 0.10. Please see
Tree.changes_from."""
     return new_tree.changes_from(old_tree,
         want_unchanged=want_unchanged,
         specific_files=specific_files,
         extra_trees=extra_trees,
- -        require_versioned=require_versioned)
+        require_versioned=require_versioned,
+        include_root=include_root)


 def _compare_trees(old_tree, new_tree, want_unchanged, specific_file_ids,
                    include_root):

     from osutils import is_inside_any
- -
     old_inv = old_tree.inventory
     new_inv = new_tree.inventory
     delta = TreeDelta()

=== modified file 'bzrlib/errors.py'
- --- bzrlib/errors.py	2006-09-27 20:27:46 +0000
+++ bzrlib/errors.py	2006-10-02 12:58:02 +0000
@@ -964,6 +964,20 @@
     """Tree transform is malformed %(conflicts)r"""

v- I think this is better done as:
class NoFinalPath(BzrNewError):
    """No final name for trans_id %(trans_id)r
file-id: %(file_id)r
root trans-id: %(root_trans_id)r
"""

(Which is why we want to change from using docstrings, but in the short
term, I think it is better to not overload str.)

+class NoFinalPath(BzrNewError):
+    """No final name for trans_id %(trans_id)r"""
+
+    def __init__(self, trans_id, transform):
+        self.trans_id = trans_id
+        self.file_id = transform.final_file_id(trans_id)
+        self.root_trans_id = transform.root
+
+    def __str__():
+        return (BzrNewError.str(self) +
+                "\nfile-id: %(file_id)r"
+                "\nroot trans-id: %(root_trans_id)r" % self.__dict__)
+
+
 class BzrBadParameter(BzrNewError):
     """A bad parameter : %(param)s is not usable.


=== modified file 'bzrlib/info.py'
- --- bzrlib/info.py	2006-09-08 18:52:17 +0000
+++ bzrlib/info.py	2006-10-02 12:58:03 +0000
@@ -210,9 +210,10 @@
     print '  %8d ignored' % ignore_cnt


v- Any reason you changed this to be a standard loop rather than just doing:
dir_cnt = sum(1 for path, ie in entries
              if ie.kind == 'directory' and not
work_inv.is_root(ie.file_id))

     dir_cnt = 0
- -    entries = work_inv.iter_entries()
- -    entries.next()
- -    dir_cnt = sum(1 for path, ie in entries if ie.kind == 'directory')
+    for file_id in work_inv:
+        if (work_inv.get_file_kind(file_id) == 'directory' and
+            not work_inv.is_root(file_id)):
+            dir_cnt += 1
     print '  %8d versioned %s' \
           % (dir_cnt,
              plural(dir_cnt, 'subdirectory', 'subdirectories'))


...

v- Is it possible for us to not default to an id with capital letters?
It means that we default to having a path which must be escaped in the
repository, which causes problems with older versions of Apache, etc.
One of the nice things about bzr-0.9 is that it sanitizes new file ids
to avoid these sorts of problems. It means that people might eventually
have problems with their setups, but we avoid provoking it from the start.

     >>> inv = Inventory('TREE_ROOT-12345678-12345678')
     >>> inv.add(InventoryFile('123-123', 'hello.c', ROOT_ID))
+    Traceback (most recent call last):
+    BzrError: parent_id {TREE_ROOT} not in inventory
+    >>> inv.add(InventoryFile('123-123', 'hello.c',
'TREE_ROOT-12345678-12345678'))
     InventoryFile('123-123', 'hello.c',
parent_id='TREE_ROOT-12345678-12345678', sha1=None, len=None)
     """
     def __init__(self, root_id=ROOT_ID, revision_id=None):

...

v- This seems pretty weird, what are you trying to do here?

+        try:
+            self.assertIs(dir.open_branch().last_revision(), None)
+        except errors.NotBranchError:
+            pass
         target = self.sproutOrSkip(dir, self.get_url('target'))
         self.assertNotEqual(dir.transport.base, target.transport.base)
+        # testing inventory isn't reasonable for repositories
         self.assertDirectoriesEqual(dir.root_transport,
target.root_transport,
                                     ['./.bzr/repository/inventory.knit',
+                                     './.bzr/inventory'
                                      ])
+        try:
+            # If we happen to have a tree, we'll guarantee everything
+            # except for the tree root is the same.
+            inventory_f = file(dir.transport.base+'inventory', 'rb')
+            self.assertContainsRe(inventory_f.read(),
+                                  '<inventory file_id="TREE_ROOT[^"]*"'
+                                  ' format="5">\n</inventory>\n')
+            inventory_f.close()
+        except IOError, e:
+            if e.errno != errno.ENOENT:
+                raise

^- I thought Knit2 would have an inventory format="6".


     def test_sprout_bzrdir_with_repository_to_shared(self):
         tree = self.make_branch_and_tree('commit_tree')

=== modified file
'bzrlib/tests/repository_implementations/test_fileid_involved.py'
- --- bzrlib/tests/repository_implementations/test_fileid_involved.py
2006-09-09 18:52:57 +0000
+++ bzrlib/tests/repository_implementations/test_fileid_involved.py
2006-10-02 12:58:13 +0000
@@ -176,9 +176,10 @@
     def fileids_altered_by_revision_ids(self, revision_ids):
         """This is a wrapper to strip TREE_ROOT if it occurs"""
         repo = self.branch.repository
+        root_id = self.branch.basis_tree().inventory.root.file_id
         result = repo.fileids_altered_by_revision_ids(revision_ids)
- -        if 'TREE_ROOT' in result:
- -            del result['TREE_ROOT']
+        if root_id in result:
+            del result[root_id]
         return result

^- Isn't this bogus? I would think that you need root_id to be in the
result of 'fileids_involved_by', considering you need to merge the root
inventory knit, or you lose information.


     def test_fileids_altered_by_revision_ids(self):

...


v- If a WorkingTree gets a root id at creation time, shouldn't a
MemoryTree do the same? In fact, the only tree that should be allowed to
not have a root id is the "empty" tree.

=== modified file 'bzrlib/tests/test_memorytree.py'
- --- bzrlib/tests/test_memorytree.py	2006-09-15 02:03:15 +0000
+++ bzrlib/tests/test_memorytree.py	2006-10-02 12:58:08 +0000
@@ -86,7 +86,8 @@
         branch = self.make_branch('branch')
         tree = MemoryTree.create_on_branch(branch)
         tree.lock_write()
- -        tree.add(['afile', 'adir'], None, ['file', 'directory'])
+        tree.add(['', 'afile', 'adir'], None,
+                 ['directory', 'file', 'directory'])
         self.assertEqual('afile', tree.id2path(tree.path2id('afile')))
         self.assertEqual('adir', tree.id2path(tree.path2id('adir')))
         self.assertFalse(tree.has_filename('afile'))
@@ -97,7 +98,8 @@
         branch = self.make_branch('branch')
         tree = MemoryTree.create_on_branch(branch)
         tree.lock_write()
- -        tree.add(['foo'], ids=['foo-id'], kinds=['file'])
+        tree.add(['', 'foo'], ids=['root-id', 'foo-id'],
+                  kinds=['directory', 'file'])
         tree.put_file_bytes_non_atomic('foo-id', 'barshoom')
         self.assertEqual('barshoom', tree.get_file('foo-id').read())
         tree.unlock()
@@ -106,7 +108,8 @@
         branch = self.make_branch('branch')
         tree = MemoryTree.create_on_branch(branch)
         tree.lock_write()
- -        tree.add(['foo'], ids=['foo-id'], kinds=['file'])
+        tree.add(['', 'foo'], ids=['root-id', 'foo-id'],
+                 kinds=['directory', 'file'])
         tree.put_file_bytes_non_atomic('foo-id', 'first-content')
         tree.put_file_bytes_non_atomic('foo-id', 'barshoom')
         self.assertEqual('barshoom', tree.get_file('foo-id').read())
@@ -121,7 +124,8 @@
         branch = self.make_branch('branch')
         tree = MemoryTree.create_on_branch(branch)
         tree.lock_write()
- -        tree.add(['foo'], ids=['foo-id'], kinds=['file'])
+        tree.add(['', 'foo'], ids=['root-id', 'foo-id'],
+                 kinds=['directory', 'file'])
         tree.put_file_bytes_non_atomic('foo-id', 'barshoom')
         revision_id = tree.commit('message baby')
         # the parents list for the tree should have changed.
@@ -136,7 +140,8 @@
         branch = self.make_branch('branch')
         tree = MemoryTree.create_on_branch(branch)
         tree.lock_write()
- -        tree.add(['foo'], ids=['foo-id'], kinds=['file'])
+        tree.add(['', 'foo'], ids=['root-id', 'foo-id'],
+                 kinds=['directory', 'file'])
         tree.unversion(['foo-id'])
         self.assertFalse(tree.has_id('foo-id'))
         tree.unlock()

...

It is a side thing, but if we are going to start using it, I think it is
important that 'bzrlib.workingtree.get_root_id()' return a nice file id.


So a few comments, but I think this is mergable.


John
=:->
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.5 (Darwin)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org

iD8DBQFFKKDcJdeBCYSNAAMRApnZAKCpeeqWhdoo6t5wsciLTGQmSSDHOQCfexxB
EY0DitNDraewFT+V7cTRxlw=
=aoAj
-----END PGP SIGNATURE-----





More information about the bazaar mailing list