Rev 4588: (jam) Get bundles working with --2a (bug #393349) in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Tue Aug 4 17:20:12 BST 2009

At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 4588 [merge]
revision-id: pqm at pqm.ubuntu.com-20090804162005-kyldsbg8c018fknc
parent: pqm at pqm.ubuntu.com-20090804144859-bgjydda2yp4422it
parent: john at arbash-meinel.com-20090804141009-uety2n17v1atk5ok
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Tue 2009-08-04 17:20:05 +0100
message:
  (jam) Get bundles working with --2a (bug #393349)
added:
  bzrlib/tests/per_repository/test_merge_directive.py test_send.py-20090717144100-x6fgufcynx6yu5b6-1
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/bundle/serializer/v4.py v10.py-20070611062757-5ggj7k18s9dej0fr-1
  bzrlib/chk_map.py              chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
  bzrlib/chk_serializer.py       chk_serializer.py-20081002064345-2tofdfj2eqq01h4b-1
  bzrlib/send.py                 send.py-20090521192735-j7cdb33ykmtmzx4w-1
  bzrlib/serializer.py           serializer.py-20090402143702-wmkh9cfjhwpju0qi-1
  bzrlib/tests/per_repository/__init__.py __init__.py-20060131092037-9564957a7d4a841b
  bzrlib/tests/test_bundle.py    test.py-20050630184834-092aa401ab9f039c
=== modified file 'NEWS'

--- a/NEWS	2009-08-04 12:35:07 +0000
+++ b/NEWS	2009-08-04 14:10:09 +0000
@@ -54,6 +54,12 @@
 * ``bzr revert .`` no longer generates an InconsistentDelta error when
   there are missing subtrees. (Robert Collins, #367632)
 
+* ``bzr send`` now generates valid bundles with ``--2a`` formats. However,
+  do to internal changes necessary to support this, older clients will
+  fail when trying to insert them. For newer clients, the bundle can be
+  used to apply the changes to any rich-root compatible format.
+  (John Arbash Meinel, #393349)
+
 * Cope with FTP servers that don't support restart/append by falling back
   to reading and then rewriting the whole file, such as TahoeLAFS.  (This
   fallback may be slow for some access patterns.)  (Nils Durner, #294709)

=== modified file 'bzrlib/bundle/serializer/v4.py'
--- a/bzrlib/bundle/serializer/v4.py	2009-06-10 03:56:49 +0000
+++ b/bzrlib/bundle/serializer/v4.py	2009-08-04 14:08:32 +0000
@@ -22,12 +22,14 @@
     diff,
     errors,
     iterablefile,
+    lru_cache,
     multiparent,
     osutils,
     pack,
     revision as _mod_revision,
+    serializer,
     trace,
-    serializer,
+    ui,
     )
 from bzrlib.bundle import bundle_data, serializer as bundle_serializer
 from bzrlib import bencode
@@ -315,12 +317,83 @@
     def write_revisions(self):
         """Write bundle records for all revisions and signatures"""
         inv_vf = self.repository.inventories
-        revision_order = [key[-1] for key in multiparent.topo_iter_keys(inv_vf,
-            self.revision_keys)]
+        topological_order = [key[-1] for key in multiparent.topo_iter_keys(
+                                inv_vf, self.revision_keys)]
+        revision_order = topological_order
         if self.target is not None and self.target in self.revision_ids:
+            # Make sure the target revision is always the last entry
+            revision_order = list(topological_order)
             revision_order.remove(self.target)
             revision_order.append(self.target)
-        self._add_mp_records_keys('inventory', inv_vf, [(revid,) for revid in revision_order])
+        if self.repository._serializer.support_altered_by_hack:
+            # Repositories that support_altered_by_hack means that
+            # inventories.make_mpdiffs() contains all the data about the tree
+            # shape. Formats without support_altered_by_hack require
+            # chk_bytes/etc, so we use a different code path.
+            self._add_mp_records_keys('inventory', inv_vf,
+                                      [(revid,) for revid in topological_order])
+        else:
+            # Inventories should always be added in pure-topological order, so
+            # that we can apply the mpdiff for the child to the parent texts.
+            self._add_inventory_mpdiffs_from_serializer(topological_order)
+        self._add_revision_texts(revision_order)
+
+    def _add_inventory_mpdiffs_from_serializer(self, revision_order):
+        """Generate mpdiffs by serializing inventories.
+
+        The current repository only has part of the tree shape information in
+        the 'inventories' vf. So we use serializer.write_inventory_to_string to
+        get a 'full' representation of the tree shape, and then generate
+        mpdiffs on that data stream. This stream can then be reconstructed on
+        the other side.
+        """
+        inventory_key_order = [(r,) for r in revision_order]
+        parent_map = self.repository.inventories.get_parent_map(
+                            inventory_key_order)
+        missing_keys = set(inventory_key_order).difference(parent_map)
+        if missing_keys:
+            raise errors.RevisionNotPresent(list(missing_keys)[0],
+                                            self.repository.inventories)
+        inv_to_str = self.repository._serializer.write_inventory_to_string
+        # Make sure that we grab the parent texts first
+        just_parents = set()
+        map(just_parents.update, parent_map.itervalues())
+        just_parents.difference_update(parent_map)
+        # Ignore ghost parents
+        present_parents = self.repository.inventories.get_parent_map(
+                            just_parents)
+        ghost_keys = just_parents.difference(present_parents)
+        needed_inventories = list(present_parents) + inventory_key_order
+        needed_inventories = [k[-1] for k in needed_inventories]
+        all_lines = {}
+        for inv in self.repository.iter_inventories(needed_inventories):
+            revision_id = inv.revision_id
+            key = (revision_id,)
+            as_bytes = inv_to_str(inv)
+            # The sha1 is validated as the xml/textual form, not as the
+            # form-in-the-repository
+            sha1 = osutils.sha_string(as_bytes)
+            as_lines = osutils.split_lines(as_bytes)
+            del as_bytes
+            all_lines[key] = as_lines
+            if key in just_parents:
+                # We don't transmit those entries
+                continue
+            # Create an mpdiff for this text, and add it to the output
+            parent_keys = parent_map[key]
+            # See the comment in VF.make_mpdiffs about how this effects
+            # ordering when there are ghosts present. I think we have a latent
+            # bug
+            parent_lines = [all_lines[p_key] for p_key in parent_keys
+                            if p_key not in ghost_keys]
+            diff = multiparent.MultiParent.from_lines(
+                as_lines, parent_lines)
+            text = ''.join(diff.to_patch())
+            parent_ids = [k[-1] for k in parent_keys]
+            self.bundle.add_multiparent_record(text, sha1, parent_ids,
+                                               'inventory', revision_id, None)
+
+    def _add_revision_texts(self, revision_order):
         parent_map = self.repository.get_parent_map(revision_order)
         revision_to_str = self.repository._serializer.write_revision_to_string
         revisions = self.repository.get_revisions(revision_order)
@@ -543,30 +616,104 @@
             vf_records.append((key, parents, meta['sha1'], d_func(text)))
         versionedfile.add_mpdiffs(vf_records)
 
+    def _get_parent_inventory_texts(self, inventory_text_cache,
+                                    inventory_cache, parent_ids):
+        cached_parent_texts = {}
+        remaining_parent_ids = []
+        for parent_id in parent_ids:
+            p_text = inventory_text_cache.get(parent_id, None)
+            if p_text is None:
+                remaining_parent_ids.append(parent_id)
+            else:
+                cached_parent_texts[parent_id] = p_text
+        ghosts = ()
+        # TODO: Use inventory_cache to grab inventories we already have in
+        #       memory
+        if remaining_parent_ids:
+            # first determine what keys are actually present in the local
+            # inventories object (don't use revisions as they haven't been
+            # installed yet.)
+            parent_keys = [(r,) for r in remaining_parent_ids]
+            present_parent_map = self._repository.inventories.get_parent_map(
+                                        parent_keys)
+            present_parent_ids = []
+            ghosts = set()
+            for p_id in remaining_parent_ids:
+                if (p_id,) in present_parent_map:
+                    present_parent_ids.append(p_id)
+                else:
+                    ghosts.add(p_id)
+            to_string = self._source_serializer.write_inventory_to_string
+            for parent_inv in self._repository.iter_inventories(
+                                    present_parent_ids):
+                p_text = to_string(parent_inv)
+                inventory_cache[parent_inv.revision_id] = parent_inv
+                cached_parent_texts[parent_inv.revision_id] = p_text
+                inventory_text_cache[parent_inv.revision_id] = p_text
+
+        parent_texts = [cached_parent_texts[parent_id]
+                        for parent_id in parent_ids
+                         if parent_id not in ghosts]
+        return parent_texts
+
     def _install_inventory_records(self, records):
-        if self._info['serializer'] == self._repository._serializer.format_num:
+        if (self._info['serializer'] == self._repository._serializer.format_num
+            and self._repository._serializer.support_altered_by_hack):
             return self._install_mp_records_keys(self._repository.inventories,
                 records)
-        for key, metadata, bytes in records:
-            revision_id = key[-1]
-            parent_ids = metadata['parents']
-            parents = [self._repository.get_inventory(p)
-                       for p in parent_ids]
-            p_texts = [self._source_serializer.write_inventory_to_string(p)
-                       for p in parents]
-            target_lines = multiparent.MultiParent.from_patch(bytes).to_lines(
-                p_texts)
-            sha1 = osutils.sha_strings(target_lines)
-            if sha1 != metadata['sha1']:
-                raise errors.BadBundle("Can't convert to target format")
-            target_inv = self._source_serializer.read_inventory_from_string(
-                ''.join(target_lines))
-            self._handle_root(target_inv, parent_ids)
-            try:
-                self._repository.add_inventory(revision_id, target_inv,
-                                               parent_ids)
-            except errors.UnsupportedInventoryKind:
-                raise errors.IncompatibleRevision(repr(self._repository))
+        # Use a 10MB text cache, since these are string xml inventories. Note
+        # that 10MB is fairly small for large projects (a single inventory can
+        # be >5MB). Another possibility is to cache 10-20 inventory texts
+        # instead
+        inventory_text_cache = lru_cache.LRUSizeCache(10*1024*1024)
+        # Also cache the in-memory representation. This allows us to create
+        # inventory deltas to apply rather than calling add_inventory from
+        # scratch each time.
+        inventory_cache = lru_cache.LRUCache(10)
+        pb = ui.ui_factory.nested_progress_bar()
+        try:
+            num_records = len(records)
+            for idx, (key, metadata, bytes) in enumerate(records):
+                pb.update('installing inventory', idx, num_records)
+                revision_id = key[-1]
+                parent_ids = metadata['parents']
+                # Note: This assumes the local ghosts are identical to the
+                #       ghosts in the source, as the Bundle serialization
+                #       format doesn't record ghosts.
+                p_texts = self._get_parent_inventory_texts(inventory_text_cache,
+                                                           inventory_cache,
+                                                           parent_ids)
+                # Why does to_lines() take strings as the source, it seems that
+                # it would have to cast to a list of lines, which we get back
+                # as lines and then cast back to a string.
+                target_lines = multiparent.MultiParent.from_patch(bytes
+                            ).to_lines(p_texts)
+                inv_text = ''.join(target_lines)
+                del target_lines
+                sha1 = osutils.sha_string(inv_text)
+                if sha1 != metadata['sha1']:
+                    raise errors.BadBundle("Can't convert to target format")
+                # Add this to the cache so we don't have to extract it again.
+                inventory_text_cache[revision_id] = inv_text
+                target_inv = self._source_serializer.read_inventory_from_string(
+                    inv_text)
+                self._handle_root(target_inv, parent_ids)
+                parent_inv = None
+                if parent_ids:
+                    parent_inv = inventory_cache.get(parent_ids[0], None)
+                try:
+                    if parent_inv is None:
+                        self._repository.add_inventory(revision_id, target_inv,
+                                                       parent_ids)
+                    else:
+                        delta = target_inv._make_delta(parent_inv)
+                        self._repository.add_inventory_by_delta(parent_ids[0],
+                            delta, revision_id, parent_ids)
+                except errors.UnsupportedInventoryKind:
+                    raise errors.IncompatibleRevision(repr(self._repository))
+                inventory_cache[revision_id] = target_inv
+        finally:
+            pb.finished()
 
     def _handle_root(self, target_inv, parent_ids):
         revision_id = target_inv.revision_id

=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py	2009-07-16 23:28:49 +0000
+++ b/bzrlib/chk_map.py	2009-08-04 14:10:09 +0000
@@ -60,6 +60,9 @@
 # We are caching bytes so len(value) is perfectly accurate
 _page_cache = lru_cache.LRUSizeCache(_PAGE_CACHE_SIZE)
 
+def clear_cache():
+    _page_cache.clear()
+
 # If a ChildNode falls below this many bytes, we check for a remap
 _INTERESTING_NEW_SIZE = 50
 # If a ChildNode shrinks by more than this amount, we check for a remap

=== modified file 'bzrlib/chk_serializer.py'
--- a/bzrlib/chk_serializer.py	2009-07-01 10:46:27 +0000
+++ b/bzrlib/chk_serializer.py	2009-07-22 20:22:21 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2008 Canonical Ltd
+# Copyright (C) 2008, 2009 Canonical Ltd
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -21,8 +21,8 @@
     cache_utf8,
     inventory,
     revision as _mod_revision,
-    xml5,
     xml6,
+    xml7,
     )
 
 
@@ -131,7 +131,7 @@
         return self.read_revision_from_string(f.read())
 
 
-class CHKSerializerSubtree(BEncodeRevisionSerializer1, xml6.Serializer_v6):
+class CHKSerializerSubtree(BEncodeRevisionSerializer1, xml7.Serializer_v7):
     """A CHKInventory based serializer that supports tree references"""
 
     supported_kinds = set(['file', 'directory', 'symlink', 'tree-reference'])
@@ -152,14 +152,14 @@
             return inventory.TreeReference(file_id, name, parent_id, revision,
                                            reference_revision)
         else:
-            return xml6.Serializer_v6._unpack_entry(self, elt)
+            return xml7.Serializer_v7._unpack_entry(self, elt)
 
     def __init__(self, node_size, search_key_name):
         self.maximum_size = node_size
         self.search_key_name = search_key_name
 
 
-class CHKSerializer(xml5.Serializer_v5):
+class CHKSerializer(xml6.Serializer_v6):
     """A CHKInventory based serializer with 'plain' behaviour."""
 
     format_num = '9'

=== modified file 'bzrlib/send.py'
--- a/bzrlib/send.py	2009-07-15 07:32:26 +0000
+++ b/bzrlib/send.py	2009-07-17 14:41:02 +0000
@@ -77,6 +77,9 @@
                        submit_branch)
 
         if mail_to is None or format is None:
+            # TODO: jam 20090716 we open the submit_branch here, but we *don't*
+            #       pass it down into the format creation, so it will have to
+            #       open it again
             submit_br = Branch.open(submit_branch)
             submit_config = submit_br.get_config()
             if mail_to is None:
@@ -126,7 +129,6 @@
         if revision_id == NULL_REVISION:
             raise errors.BzrCommandError('No revisions to submit.')
         if format is None:
-            # TODO: Query submit branch for its preferred format
             format = format_registry.get()
         directive = format(branch, revision_id, submit_branch,
             public_branch, no_patch, no_bundle, message, base_revision_id)

=== modified file 'bzrlib/serializer.py'
--- a/bzrlib/serializer.py	2009-06-15 19:04:38 +0000
+++ b/bzrlib/serializer.py	2009-07-29 17:44:34 +0000
@@ -27,10 +27,26 @@
     squashes_xml_invalid_characters = False
 
     def write_inventory(self, inv, f):
-        """Write inventory to a file"""
+        """Write inventory to a file.
+
+        Note: this is a *whole inventory* operation, and should only be used
+        sparingly, as it does not scale well with large trees.
+        """
         raise NotImplementedError(self.write_inventory)
 
     def write_inventory_to_string(self, inv):
+        """Produce a simple string representation of an inventory.
+
+        Note: this is a *whole inventory* operation, and should only be used
+        sparingly, as it does not scale well with large trees.
+
+        The requirement for the contents of the string is that it can be passed
+        to read_inventory_from_string and the result is an identical inventory
+        in memory.
+
+        (All serializers as of 2009-07-29 produce XML, but this is not mandated
+        by the interface.)
+        """
         raise NotImplementedError(self.write_inventory_to_string)
 
     def read_inventory_from_string(self, string, revision_id=None,
@@ -52,6 +68,7 @@
         raise NotImplementedError(self.read_inventory_from_string)
 
     def read_inventory(self, f, revision_id=None):
+        """See read_inventory_from_string."""
         raise NotImplementedError(self.read_inventory)
 
     def write_revision(self, rev, f):

=== modified file 'bzrlib/tests/per_repository/__init__.py'
--- a/bzrlib/tests/per_repository/__init__.py	2009-07-10 06:45:04 +0000
+++ b/bzrlib/tests/per_repository/__init__.py	2009-07-22 17:22:06 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2006, 2007, 2008 Canonical Ltd
+# Copyright (C) 2006, 2007, 2008, 2009 Canonical Ltd
 # Authors: Robert Collins <robert.collins at canonical.com>
 #          and others
 #
@@ -867,6 +867,7 @@
         'test_has_revisions',
         'test_is_write_locked',
         'test_iter_reverse_revision_history',
+        'test_merge_directive',
         'test_pack',
         'test_reconcile',
         'test_refresh_data',

=== added file 'bzrlib/tests/per_repository/test_merge_directive.py'
--- a/bzrlib/tests/per_repository/test_merge_directive.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/per_repository/test_merge_directive.py	2009-07-22 17:22:06 +0000
@@ -0,0 +1,73 @@
+# Copyright (C) 2009 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Tests for how merge directives interact with various repository formats.
+
+Bundles contain the serialized form, so changes in serialization based on
+repository effects the final bundle.
+"""
+
+from bzrlib import (
+    chk_map,
+    errors,
+    merge_directive,
+    tests,
+    )
+
+from bzrlib.tests.per_repository import TestCaseWithRepository
+
+
+class TestMergeDirective(TestCaseWithRepository):
+
+    def make_two_branches(self):
+        builder = self.make_branch_builder('source')
+        builder.start_series()
+        builder.build_snapshot('A', None, [
+            ('add', ('', 'root-id', 'directory', None)),
+            ('add', ('f', 'f-id', 'file', 'initial content\n')),
+            ])
+        builder.build_snapshot('B', 'A', [
+            ('modify', ('f-id', 'new content\n')),
+            ])
+        builder.finish_series()
+        b1 = builder.get_branch()
+        b2 = b1.bzrdir.sprout('target', revision_id='A').open_branch()
+        return b1, b2
+
+    def create_merge_directive(self, source_branch, submit_url):
+        return merge_directive.MergeDirective2.from_objects(
+            source_branch.repository,
+            source_branch.last_revision(),
+            time=1247775710, timezone=0,
+            target_branch=submit_url)
+
+    def test_create_merge_directive(self):
+        source_branch, target_branch = self.make_two_branches()
+        directive = self.create_merge_directive(source_branch,
+                                                target_branch.base)
+        self.assertIsInstance(directive, merge_directive.MergeDirective2)
+
+
+    def test_create_and_install_directive(self):
+        source_branch, target_branch = self.make_two_branches()
+        directive = self.create_merge_directive(source_branch,
+                                                target_branch.base)
+        chk_map.clear_cache()
+        directive.install_revisions(target_branch.repository)
+        rt = target_branch.repository.revision_tree('B')
+        rt.lock_read()
+        self.assertEqualDiff('new content\n', rt.get_file_text('f-id'))
+        rt.unlock()

=== modified file 'bzrlib/tests/test_bundle.py'
--- a/bzrlib/tests/test_bundle.py	2009-07-20 04:26:55 +0000
+++ b/bzrlib/tests/test_bundle.py	2009-08-04 14:10:09 +0000
@@ -50,6 +50,22 @@
 from bzrlib.transform import TreeTransform
 
 
+def get_text(vf, key):
+    """Get the fulltext for a given revision id that is present in the vf"""
+    stream = vf.get_record_stream([key], 'unordered', True)
+    record = stream.next()
+    return record.get_bytes_as('fulltext')
+
+
+def get_inventory_text(repo, revision_id):
+    """Get the fulltext for the inventory at revision id"""
+    repo.lock_read()
+    try:
+        return get_text(repo.inventories, (revision_id,))
+    finally:
+        repo.unlock()
+
+
 class MockTree(object):
     def __init__(self):
         from bzrlib.inventory import InventoryDirectory, ROOT_ID
@@ -558,8 +574,9 @@
         self.tree1 = self.make_branch_and_tree('b1')
         self.b1 = self.tree1.branch
 
-        open('b1/one', 'wb').write('one\n')
-        self.tree1.add('one')
+        self.build_tree_contents([('b1/one', 'one\n')])
+        self.tree1.add('one', 'one-id')
+        self.tree1.set_root_id('root-id')
         self.tree1.commit('add one', rev_id='a at cset-0-1')
 
         bundle = self.get_valid_bundle('null:', 'a at cset-0-1')
@@ -576,8 +593,8 @@
                 , 'b1/sub/sub/'
                 , 'b1/sub/sub/nonempty.txt'
                 ])
-        open('b1/sub/sub/emptyfile.txt', 'wb').close()
-        open('b1/dir/nolastnewline.txt', 'wb').write('bloop')
+        self.build_tree_contents([('b1/sub/sub/emptyfile.txt', ''),
+                                  ('b1/dir/nolastnewline.txt', 'bloop')])
         tt = TreeTransform(self.tree1)
         tt.new_file('executable', tt.root, '#!/bin/sh\n', 'exe-1', True)
         tt.apply()
@@ -616,7 +633,8 @@
 
         bundle = self.get_valid_bundle('a at cset-0-2', 'a at cset-0-3')
         self.assertRaises((errors.TestamentMismatch,
-            errors.VersionedFileInvalidChecksum), self.get_invalid_bundle,
+            errors.VersionedFileInvalidChecksum,
+            errors.BadBundle), self.get_invalid_bundle,
             'a at cset-0-2', 'a at cset-0-3')
         # Check a rollup bundle
         bundle = self.get_valid_bundle('null:', 'a at cset-0-3')
@@ -646,9 +664,10 @@
                           verbose=False)
         bundle = self.get_valid_bundle('a at cset-0-5', 'a at cset-0-6')
         other = self.get_checkout('a at cset-0-5')
-        tree1_inv = self.tree1.branch.repository.get_inventory_xml(
-            'a at cset-0-5')
-        tree2_inv = other.branch.repository.get_inventory_xml('a at cset-0-5')
+        tree1_inv = get_inventory_text(self.tree1.branch.repository,
+                                       'a at cset-0-5')
+        tree2_inv = get_inventory_text(other.branch.repository,
+                                       'a at cset-0-5')
         self.assertEqualDiff(tree1_inv, tree2_inv)
         other.rename_one('sub/dir/nolastnewline.txt', 'sub/nolastnewline.txt')
         other.commit('rename file', rev_id='a at cset-0-6b')
@@ -1317,7 +1336,7 @@
         new_text = self.get_raw(StringIO(''.join(bundle_txt)))
         new_text = new_text.replace('<file file_id="exe-1"',
                                     '<file executable="y" file_id="exe-1"')
-        new_text = new_text.replace('B222', 'B237')
+        new_text = new_text.replace('B260', 'B275')
         bundle_txt = StringIO()
         bundle_txt.write(serializer._get_bundle_header('4'))
         bundle_txt.write('\n')
@@ -1429,6 +1448,200 @@
         return 'metaweave'
 
 
+class V4_2aBundleTester(V4BundleTester):
+
+    def bzrdir_format(self):
+        return '2a'
+
+    def get_invalid_bundle(self, base_rev_id, rev_id):
+        """Create a bundle from base_rev_id -> rev_id in built-in branch.
+        Munge the text so that it's invalid.
+
+        :return: The in-memory bundle
+        """
+        from bzrlib.bundle import serializer
+        bundle_txt, rev_ids = self.create_bundle_text(base_rev_id, rev_id)
+        new_text = self.get_raw(StringIO(''.join(bundle_txt)))
+        # We are going to be replacing some text to set the executable bit on a
+        # file. Make sure the text replacement actually works correctly.
+        self.assertContainsRe(new_text, '(?m)B244\n\ni 1\n<inventory')
+        new_text = new_text.replace('<file file_id="exe-1"',
+                                    '<file executable="y" file_id="exe-1"')
+        new_text = new_text.replace('B244', 'B259')
+        bundle_txt = StringIO()
+        bundle_txt.write(serializer._get_bundle_header('4'))
+        bundle_txt.write('\n')
+        bundle_txt.write(new_text.encode('bz2'))
+        bundle_txt.seek(0)
+        bundle = read_bundle(bundle_txt)
+        self.valid_apply_bundle(base_rev_id, bundle)
+        return bundle
+
+    def make_merged_branch(self):
+        builder = self.make_branch_builder('source')
+        builder.start_series()
+        builder.build_snapshot('a at cset-0-1', None, [
+            ('add', ('', 'root-id', 'directory', None)),
+            ('add', ('file', 'file-id', 'file', 'original content\n')),
+            ])
+        builder.build_snapshot('a at cset-0-2a', ['a at cset-0-1'], [
+            ('modify', ('file-id', 'new-content\n')),
+            ])
+        builder.build_snapshot('a at cset-0-2b', ['a at cset-0-1'], [
+            ('add', ('other-file', 'file2-id', 'file', 'file2-content\n')),
+            ])
+        builder.build_snapshot('a at cset-0-3', ['a at cset-0-2a', 'a at cset-0-2b'], [
+            ('add', ('other-file', 'file2-id', 'file', 'file2-content\n')),
+            ])
+        builder.finish_series()
+        self.b1 = builder.get_branch()
+        self.b1.lock_read()
+        self.addCleanup(self.b1.unlock)
+
+    def make_bundle_just_inventories(self, base_revision_id,
+                                     target_revision_id,
+                                     revision_ids):
+        sio = StringIO()
+        writer = v4.BundleWriteOperation(base_revision_id, target_revision_id,
+                                         self.b1.repository, sio)
+        writer.bundle.begin()
+        writer._add_inventory_mpdiffs_from_serializer(revision_ids)
+        writer.bundle.end()
+        sio.seek(0)
+        return sio
+
+    def test_single_inventory_multiple_parents_as_xml(self):
+        self.make_merged_branch()
+        sio = self.make_bundle_just_inventories('a at cset-0-1', 'a at cset-0-3',
+                                                ['a at cset-0-3'])
+        reader = v4.BundleReader(sio, stream_input=False)
+        records = list(reader.iter_records())
+        self.assertEqual(1, len(records))
+        (bytes, metadata, repo_kind, revision_id,
+         file_id) = records[0]
+        self.assertIs(None, file_id)
+        self.assertEqual('a at cset-0-3', revision_id)
+        self.assertEqual('inventory', repo_kind)
+        self.assertEqual({'parents': ['a at cset-0-2a', 'a at cset-0-2b'],
+                          'sha1': '09c53b0c4de0895e11a2aacc34fef60a6e70865c',
+                          'storage_kind': 'mpdiff',
+                         }, metadata)
+        # We should have an mpdiff that takes some lines from both parents.
+        self.assertEqualDiff(
+            'i 1\n'
+            '<inventory format="10" revision_id="a at cset-0-3">\n'
+            '\n'
+            'c 0 1 1 2\n'
+            'c 1 3 3 2\n', bytes)
+
+    def test_single_inv_no_parents_as_xml(self):
+        self.make_merged_branch()
+        sio = self.make_bundle_just_inventories('null:', 'a at cset-0-1',
+                                                ['a at cset-0-1'])
+        reader = v4.BundleReader(sio, stream_input=False)
+        records = list(reader.iter_records())
+        self.assertEqual(1, len(records))
+        (bytes, metadata, repo_kind, revision_id,
+         file_id) = records[0]
+        self.assertIs(None, file_id)
+        self.assertEqual('a at cset-0-1', revision_id)
+        self.assertEqual('inventory', repo_kind)
+        self.assertEqual({'parents': [],
+                          'sha1': 'a13f42b142d544aac9b085c42595d304150e31a2',
+                          'storage_kind': 'mpdiff',
+                         }, metadata)
+        # We should have an mpdiff that takes some lines from both parents.
+        self.assertEqualDiff(
+            'i 4\n'
+            '<inventory format="10" revision_id="a at cset-0-1">\n'
+            '<directory file_id="root-id" name=""'
+                ' revision="a at cset-0-1" />\n'
+            '<file file_id="file-id" name="file" parent_id="root-id"'
+                ' revision="a at cset-0-1"'
+                ' text_sha1="09c2f8647e14e49e922b955c194102070597c2d1"'
+                ' text_size="17" />\n'
+            '</inventory>\n'
+            '\n', bytes)
+
+    def test_multiple_inventories_as_xml(self):
+        self.make_merged_branch()
+        sio = self.make_bundle_just_inventories('a at cset-0-1', 'a at cset-0-3',
+            ['a at cset-0-2a', 'a at cset-0-2b', 'a at cset-0-3'])
+        reader = v4.BundleReader(sio, stream_input=False)
+        records = list(reader.iter_records())
+        self.assertEqual(3, len(records))
+        revision_ids = [rev_id for b, m, k, rev_id, f in records]
+        self.assertEqual(['a at cset-0-2a', 'a at cset-0-2b', 'a at cset-0-3'],
+                         revision_ids)
+        metadata_2a = records[0][1]
+        self.assertEqual({'parents': ['a at cset-0-1'],
+                          'sha1': '1e105886d62d510763e22885eec733b66f5f09bf',
+                          'storage_kind': 'mpdiff',
+                         }, metadata_2a)
+        metadata_2b = records[1][1]
+        self.assertEqual({'parents': ['a at cset-0-1'],
+                          'sha1': 'f03f12574bdb5ed2204c28636c98a8547544ccd8',
+                          'storage_kind': 'mpdiff',
+                         }, metadata_2b)
+        metadata_3 = records[2][1]
+        self.assertEqual({'parents': ['a at cset-0-2a', 'a at cset-0-2b'],
+                          'sha1': '09c53b0c4de0895e11a2aacc34fef60a6e70865c',
+                          'storage_kind': 'mpdiff',
+                         }, metadata_3)
+        bytes_2a = records[0][0]
+        self.assertEqualDiff(
+            'i 1\n'
+            '<inventory format="10" revision_id="a at cset-0-2a">\n'
+            '\n'
+            'c 0 1 1 1\n'
+            'i 1\n'
+            '<file file_id="file-id" name="file" parent_id="root-id"'
+                ' revision="a at cset-0-2a"'
+                ' text_sha1="50f545ff40e57b6924b1f3174b267ffc4576e9a9"'
+                ' text_size="12" />\n'
+            '\n'
+            'c 0 3 3 1\n', bytes_2a)
+        bytes_2b = records[1][0]
+        self.assertEqualDiff(
+            'i 1\n'
+            '<inventory format="10" revision_id="a at cset-0-2b">\n'
+            '\n'
+            'c 0 1 1 2\n'
+            'i 1\n'
+            '<file file_id="file2-id" name="other-file" parent_id="root-id"'
+                ' revision="a at cset-0-2b"'
+                ' text_sha1="b46c0c8ea1e5ef8e46fc8894bfd4752a88ec939e"'
+                ' text_size="14" />\n'
+            '\n'
+            'c 0 3 4 1\n', bytes_2b)
+        bytes_3 = records[2][0]
+        self.assertEqualDiff(
+            'i 1\n'
+            '<inventory format="10" revision_id="a at cset-0-3">\n'
+            '\n'
+            'c 0 1 1 2\n'
+            'c 1 3 3 2\n', bytes_3)
+
+    def test_creating_bundle_preserves_chk_pages(self):
+        self.make_merged_branch()
+        target = self.b1.bzrdir.sprout('target',
+                                       revision_id='a at cset-0-2a').open_branch()
+        bundle_txt, rev_ids = self.create_bundle_text('a at cset-0-2a',
+                                                      'a at cset-0-3')
+        self.assertEqual(['a at cset-0-2b', 'a at cset-0-3'], rev_ids)
+        bundle = read_bundle(bundle_txt)
+        target.lock_write()
+        self.addCleanup(target.unlock)
+        install_bundle(target.repository, bundle)
+        inv1 = self.b1.repository.inventories.get_record_stream([
+            ('a at cset-0-3',)], 'unordered',
+            True).next().get_bytes_as('fulltext')
+        inv2 = target.repository.inventories.get_record_stream([
+            ('a at cset-0-3',)], 'unordered',
+            True).next().get_bytes_as('fulltext')
+        self.assertEqualDiff(inv1, inv2)
+
+
 class MungedBundleTester(object):
 
     def build_test_bundle(self):