Rev 2699: Speed up installing revisions from Bundle 4 in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Wed Aug 15 02:08:24 BST 2007


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 2699
revision-id: pqm at pqm.ubuntu.com-20070815010819-0q50hsyvbvu1qs3k
parent: pqm at pqm.ubuntu.com-20070814221506-6rw0b0oolfdeqrdw
parent: aaron.bentley at utoronto.ca-20070814235636-q8zmreothkjf4mnv
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Wed 2007-08-15 02:08:19 +0100
message:
  Speed up installing revisions from Bundle 4
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/bundle/bundle_data.py   read_changeset.py-20050619171944-c0d95aa685537640
  bzrlib/bundle/serializer/v4.py v10.py-20070611062757-5ggj7k18s9dej0fr-1
  bzrlib/diff.py                 diff.py-20050309040759-26944fbbf2ebbf36
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/merge_directive.py      merge_directive.py-20070228184838-ja62280spt1g7f4x-1
  bzrlib/multiparent.py          __init__.py-20070410133617-n1jdhcc1n1mibarp-1
  bzrlib/tests/test_bundle.py    test.py-20050630184834-092aa401ab9f039c
  bzrlib/tests/test_multiparent.py test_multiparent.py-20070410133617-n1jdhcc1n1mibarp-4
  bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
  bzrlib/versionedfile.py        versionedfile.py-20060222045106-5039c71ee3b65490
  bzrlib/weave.py                knit.py-20050627021749-759c29984154256b
    ------------------------------------------------------------
    revno: 2520.4.154
    merged: aaron.bentley at utoronto.ca-20070814235636-q8zmreothkjf4mnv
    parent: aaron.bentley at utoronto.ca-20070814233803-sve7n1ffyxahtbi3
    committer: Aaron Bentley <aaron.bentley at utoronto.ca>
    branch nick: bzr.mpbundle
    timestamp: Tue 2007-08-14 19:56:36 -0400
    message:
      Fix syntax
    ------------------------------------------------------------
    revno: 2520.4.153
    merged: aaron.bentley at utoronto.ca-20070814233803-sve7n1ffyxahtbi3
    parent: aaron.bentley at utoronto.ca-20070814233548-ctlr8sb1lcufb3ny
    committer: Aaron Bentley <aaron.bentley at utoronto.ca>
    branch nick: bzr.mpbundle
    timestamp: Tue 2007-08-14 19:38:03 -0400
    message:
      Update NEWS
    ------------------------------------------------------------
    revno: 2520.4.152
    merged: aaron.bentley at utoronto.ca-20070814233548-ctlr8sb1lcufb3ny
    parent: abentley at panoramicfeedback.com-20070727194336-vowoapmp4oae10ik
    parent: pqm at pqm.ubuntu.com-20070814221506-6rw0b0oolfdeqrdw
    committer: Aaron Bentley <aaron.bentley at utoronto.ca>
    branch nick: bzr.mpbundle
    timestamp: Tue 2007-08-14 19:35:48 -0400
    message:
      Merge bzr.dev
    ------------------------------------------------------------
    revno: 2520.4.151
    merged: abentley at panoramicfeedback.com-20070727194336-vowoapmp4oae10ik
    parent: abentley at panoramicfeedback.com-20070727193913-26vhslrniy7sd6q1
    committer: Aaron Bentley <abentley at panoramicfeedback.com>
    branch nick: bzr.mpbundle
    timestamp: Fri 2007-07-27 15:43:36 -0400
    message:
      Style fixes from review
    ------------------------------------------------------------
    revno: 2520.4.150
    merged: abentley at panoramicfeedback.com-20070727193913-26vhslrniy7sd6q1
    parent: abentley at panoramicfeedback.com-20070727171448-nbuzv9zxhvci7dzo
    committer: Aaron Bentley <abentley at panoramicfeedback.com>
    branch nick: bzr.mpbundle
    timestamp: Fri 2007-07-27 15:39:13 -0400
    message:
      Test that non-Weave uses left_matching_blocks for add_lines
    ------------------------------------------------------------
    revno: 2520.4.149
    merged: abentley at panoramicfeedback.com-20070727171448-nbuzv9zxhvci7dzo
    parent: abentley at panoramicfeedback.com-20070727135329-02ettz9f9g02keb5
    committer: Aaron Bentley <abentley at panoramicfeedback.com>
    branch nick: bzr.mpbundle
    timestamp: Fri 2007-07-27 13:14:48 -0400
    message:
      Ajust NEWS to mention 'send' command
    ------------------------------------------------------------
    revno: 2520.4.148
    merged: abentley at panoramicfeedback.com-20070727135329-02ettz9f9g02keb5
    parent: abentley at panoramicfeedback.com-20070727133254-pb0j7mfanlyub4vy
    committer: Aaron Bentley <abentley at panoramicfeedback.com>
    branch nick: bzr.mpbundle
    timestamp: Fri 2007-07-27 09:53:29 -0400
    message:
      Updates from review
    ------------------------------------------------------------
    revno: 2520.4.147
    merged: abentley at panoramicfeedback.com-20070727133254-pb0j7mfanlyub4vy
    parent: abentley at panoramicfeedback.com-20070724174758-gebodfcenj53hoi2
    parent: pqm at pqm.ubuntu.com-20070727061532-14ly852y2g2dbcb8
    committer: Aaron Bentley <abentley at panoramicfeedback.com>
    branch nick: bzr.mpbundle
    timestamp: Fri 2007-07-27 09:32:54 -0400
    message:
      Merge from bzr.dev
    ------------------------------------------------------------
    revno: 2520.4.146
    merged: abentley at panoramicfeedback.com-20070724174758-gebodfcenj53hoi2
    parent: abentley at panoramicfeedback.com-20070724165924-uxl0k3sd36k31zqh
    committer: Aaron Bentley <abentley at panoramicfeedback.com>
    branch nick: bzr.mpbundle
    timestamp: Tue 2007-07-24 13:47:58 -0400
    message:
      Avoid get_matching_blocks for un-annotated text
    ------------------------------------------------------------
    revno: 2520.4.145
    merged: abentley at panoramicfeedback.com-20070724165924-uxl0k3sd36k31zqh
    parent: abentley at panoramicfeedback.com-20070724162533-cjf27biedp34bhc7
    committer: Aaron Bentley <abentley at panoramicfeedback.com>
    branch nick: bzr.mpbundle
    timestamp: Tue 2007-07-24 12:59:24 -0400
    message:
      Add memory_friendly toggle, be memory-unfriendly for merge directives
    ------------------------------------------------------------
    revno: 2520.4.144
    merged: abentley at panoramicfeedback.com-20070724162533-cjf27biedp34bhc7
    parent: abentley at panoramicfeedback.com-20070724151629-zcorv1ovxgh0oixs
    committer: Aaron Bentley <abentley at panoramicfeedback.com>
    branch nick: bzr.mpbundle
    timestamp: Tue 2007-07-24 12:25:33 -0400
    message:
      Make Reconstructor use cached versions
    ------------------------------------------------------------
    revno: 2520.4.143
    merged: abentley at panoramicfeedback.com-20070724151629-zcorv1ovxgh0oixs
    parent: abentley at panoramicfeedback.com-20070724145423-j9y3kussedy4y6q2
    committer: Aaron Bentley <abentley at panoramicfeedback.com>
    branch nick: bzr.mpbundle
    timestamp: Tue 2007-07-24 11:16:29 -0400
    message:
      Fix invalid bundle test
    ------------------------------------------------------------
    revno: 2520.4.142
    merged: abentley at panoramicfeedback.com-20070724145423-j9y3kussedy4y6q2
    parent: abentley at panoramicfeedback.com-20070724135633-fd408vp7tl1tddm8
    committer: Aaron Bentley <abentley at panoramicfeedback.com>
    branch nick: bzr.mpbundle
    timestamp: Tue 2007-07-24 10:54:23 -0400
    message:
      Clean up installation of inventory records
    ------------------------------------------------------------
    revno: 2520.4.141
    merged: abentley at panoramicfeedback.com-20070724135633-fd408vp7tl1tddm8
    parent: aaron.bentley at utoronto.ca-20070724114440-c0cc1qqequkw6qru
    committer: Aaron Bentley <abentley at panoramicfeedback.com>
    branch nick: bzr.mpbundle
    timestamp: Tue 2007-07-24 09:56:33 -0400
    message:
      More batch operations adding mpdiffs
    ------------------------------------------------------------
    revno: 2520.4.140
    merged: aaron.bentley at utoronto.ca-20070724114440-c0cc1qqequkw6qru
    parent: aaron.bentley at utoronto.ca-20070724014150-tk0u7xdx3b87xka1
    committer: Aaron Bentley <aaron.bentley at utoronto.ca>
    branch nick: bzr.mpbundle
    timestamp: Tue 2007-07-24 07:44:40 -0400
    message:
      Use matching blocks from mpdiff for knit delta creation
    ------------------------------------------------------------
    revno: 2520.4.139
    merged: aaron.bentley at utoronto.ca-20070724014150-tk0u7xdx3b87xka1
    parent: aaron.bentley at utoronto.ca-20070724013952-isph0r8pe5hgavw1
    committer: Aaron Bentley <aaron.bentley at utoronto.ca>
    branch nick: bzr.mpbundle
    timestamp: Mon 2007-07-23 21:41:50 -0400
    message:
      Support Multiparent.get_matching_blocks
    ------------------------------------------------------------
    revno: 2520.4.138
    merged: aaron.bentley at utoronto.ca-20070724013952-isph0r8pe5hgavw1
    parent: aaron.bentley at utoronto.ca-20070724013350-ldez6w3pqsa9ffe1
    committer: Aaron Bentley <aaron.bentley at utoronto.ca>
    branch nick: bzr.mpbundle
    timestamp: Mon 2007-07-23 21:39:52 -0400
    message:
      Fix benign off-by-one error generating mpdiffs
    ------------------------------------------------------------
    revno: 2520.4.137
    merged: aaron.bentley at utoronto.ca-20070724013350-ldez6w3pqsa9ffe1
    parent: abentley at panoramicfeedback.com-20070719181611-etp4itjrl2tci9kl
    parent: pqm at pqm.ubuntu.com-20070723214204-jwd5cwzvq93wwlgs
    committer: Aaron Bentley <aaron.bentley at utoronto.ca>
    branch nick: bzr.mpbundle
    timestamp: Mon 2007-07-23 21:33:50 -0400
    message:
      Merge bzr.dev
=== modified file 'NEWS'
--- a/NEWS	2007-08-14 19:29:56 +0000
+++ b/NEWS	2007-08-14 23:38:03 +0000
@@ -1,3 +1,10 @@
+IN DEVELOPMENT
+
+  IMPROVEMENTS:
+
+  * ``pull`` and ``merge`` are much faster at installing bundle format 4.
+    (Aaron Bentley)
+
 bzr 0.90rc1 2007-08-14
 
   BUGFIXES:
@@ -92,8 +99,6 @@
       (Ian Clatworthy)
 
     * New bundle and merge directive formats are faster to generate, and
-      more robust against email mangling.  New `send` command replaces
-      `bundle-revisions` and `merge-directive`.  (Aaron Bentley)
 
     * Annotate merge now works when there are local changes. (Aaron Bentley)
 

=== modified file 'bzrlib/bundle/bundle_data.py'
--- a/bzrlib/bundle/bundle_data.py	2007-06-28 14:14:16 +0000
+++ b/bzrlib/bundle/bundle_data.py	2007-07-27 13:53:29 +0000
@@ -457,8 +457,12 @@
                         ' (unrecognized action): %r' % action_line)
             valid_actions[action](kind, extra, lines)
 
-    def install_revisions(self, target_repo):
-        """Install revisions and return the target revision"""
+    def install_revisions(self, target_repo, stream_input=True):
+        """Install revisions and return the target revision
+
+        :param target_repo: The repository to install into
+        :param stream_input: Ignored by this implementation.
+        """
         apply_bundle.install_bundle(target_repo, self)
         return self.target
 

=== modified file 'bzrlib/bundle/serializer/v4.py'
--- a/bzrlib/bundle/serializer/v4.py	2007-08-08 02:57:22 +0000
+++ b/bzrlib/bundle/serializer/v4.py	2007-08-14 23:56:36 +0000
@@ -144,13 +144,23 @@
     body
     """
 
-    def __init__(self, fileobj):
+    def __init__(self, fileobj, stream_input=True):
+        """Constructor
+
+        :param fileobj: a file containing a bzip-encoded container
+        :param stream_input: If True, the BundleReader stream input rather than
+            reading it all into memory at once.  Reading it into memory all at
+            once is (currently) faster.
+        """
         line = fileobj.readline()
         if line != '\n':
             fileobj.readline()
         self.patch_lines = []
-        self._container = pack.ContainerReader(
-            iterablefile.IterableFile(self.iter_decode(fileobj)))
+        if stream_input:
+            source_file = iterablefile.IterableFile(self.iter_decode(fileobj))
+        else:
+            source_file = StringIO(bz2.decompress(fileobj.read()))
+        self._container = pack.ContainerReader(source_file)
 
     @staticmethod
     def iter_decode(fileobj):
@@ -382,11 +392,17 @@
     def install(self, repository):
         return self.install_revisions(repository)
 
-    def install_revisions(self, repository):
-        """Install this bundle's revisions into the specified repository"""
+    def install_revisions(self, repository, stream_input=True):
+        """Install this bundle's revisions into the specified repository
+
+        :param target_repo: The repository to install into
+        :param stream_input: If True, will stream input rather than reading it
+            all into memory at once.  Reading it into memory all at once is
+            (currently) faster.
+        """
         repository.lock_write()
         try:
-            ri = RevisionInstaller(self.get_bundle_reader(),
+            ri = RevisionInstaller(self.get_bundle_reader(stream_input),
                                    self._serializer, repository)
             return ri.install()
         finally:
@@ -399,9 +415,15 @@
         """
         return None, self.target, 'inapplicable'
 
-    def get_bundle_reader(self):
+    def get_bundle_reader(self, stream_input=True):
+        """Return a new BundleReader for the associated bundle
+
+        :param stream_input: If True, the BundleReader stream input rather than
+            reading it all into memory at once.  Reading it into memory all at
+            once is (currently) faster.
+        """
         self._fileobj.seek(0)
-        return BundleReader(self._fileobj)
+        return BundleReader(self._fileobj, stream_input)
 
     def _get_real_revisions(self):
         if self.__real_revisions is None:
@@ -448,6 +470,8 @@
         current_file = None
         current_versionedfile = None
         pending_file_records = []
+        inventory_vf = None
+        pending_inventory_records = []
         added_inv = set()
         target_revision = None
         for bytes, metadata, repo_kind, revision_id, file_id in\
@@ -455,24 +479,31 @@
             if repo_kind == 'info':
                 assert self._info is None
                 self._handle_info(metadata)
-            if repo_kind != 'file':
+            if ((repo_kind, file_id) != ('file', current_file) and
+                len(pending_file_records) > 0):
                 self._install_mp_records(current_versionedfile,
                     pending_file_records)
                 current_file = None
                 current_versionedfile = None
                 pending_file_records = []
-                if repo_kind == 'inventory':
-                    self._install_inventory(revision_id, metadata, bytes)
-                if repo_kind == 'revision':
-                    target_revision = revision_id
-                    self._install_revision(revision_id, metadata, bytes)
-                if repo_kind == 'signature':
-                    self._install_signature(revision_id, metadata, bytes)
+            if len(pending_inventory_records) > 0 and repo_kind != 'inventory':
+                self._install_inventory_records(inventory_vf,
+                                                pending_inventory_records)
+                pending_inventory_records = []
+            if repo_kind == 'inventory':
+                if inventory_vf is None:
+                    inventory_vf = self._repository.get_inventory_weave()
+                if revision_id not in inventory_vf:
+                    pending_inventory_records.append((revision_id, metadata,
+                                                      bytes))
+            if repo_kind == 'revision':
+                target_revision = revision_id
+                self._install_revision(revision_id, metadata, bytes)
+            if repo_kind == 'signature':
+                self._install_signature(revision_id, metadata, bytes)
             if repo_kind == 'file':
-                if file_id != current_file:
-                    self._install_mp_records(current_versionedfile,
-                        pending_file_records)
-                    current_file = file_id
+                current_file = file_id
+                if current_versionedfile is None:
                     current_versionedfile = \
                         self._repository.weave_store.get_weave_or_empty(
                         file_id, self._repository.get_transaction())
@@ -501,30 +532,28 @@
                       records if r not in versionedfile]
         versionedfile.add_mpdiffs(vf_records)
 
-    def _install_inventory(self, revision_id, metadata, text):
-        vf = self._repository.get_inventory_weave()
-        if revision_id in vf:
-            return
-        parent_ids = metadata['parents']
+    def _install_inventory_records(self, vf, records):
         if self._info['serializer'] == self._repository._serializer.format_num:
-            return self._install_mp_records(vf, [(revision_id, metadata,
-                                                  text)])
-        parents = [self._repository.get_inventory(p)
-                   for p in parent_ids]
-        parent_texts = [self._source_serializer.write_inventory_to_string(p)
-                        for p in parents]
-        target_lines = multiparent.MultiParent.from_patch(text).to_lines(
-            parent_texts)
-        sha1 = osutils.sha_strings(target_lines)
-        if sha1 != metadata['sha1']:
-            raise errors.BadBundle("Can't convert to target format")
-        target_inv = self._source_serializer.read_inventory_from_string(
-            ''.join(target_lines))
-        self._handle_root(target_inv, parent_ids)
-        try:
-            self._repository.add_inventory(revision_id, target_inv, parent_ids)
-        except errors.UnsupportedInventoryKind:
-            raise errors.IncompatibleRevision(repr(self._repository))
+            return self._install_mp_records(vf, records)
+        for revision_id, metadata, bytes in records:
+            parent_ids = metadata['parents']
+            parents = [self._repository.get_inventory(p)
+                       for p in parent_ids]
+            p_texts = [self._source_serializer.write_inventory_to_string(p)
+                       for p in parents]
+            target_lines = multiparent.MultiParent.from_patch(bytes).to_lines(
+                p_texts)
+            sha1 = osutils.sha_strings(target_lines)
+            if sha1 != metadata['sha1']:
+                raise errors.BadBundle("Can't convert to target format")
+            target_inv = self._source_serializer.read_inventory_from_string(
+                ''.join(target_lines))
+            self._handle_root(target_inv, parent_ids)
+            try:
+                self._repository.add_inventory(revision_id, target_inv,
+                                               parent_ids)
+            except errors.UnsupportedInventoryKind:
+                raise errors.IncompatibleRevision(repr(self._repository))
 
     def _handle_root(self, target_inv, parent_ids):
         revision_id = target_inv.revision_id

=== modified file 'bzrlib/diff.py'
--- a/bzrlib/diff.py	2007-07-09 07:38:03 +0000
+++ b/bzrlib/diff.py	2007-07-27 13:32:54 +0000
@@ -14,6 +14,7 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
+import difflib
 import os
 import re
 import sys
@@ -48,6 +49,16 @@
 # invoke callbacks on an object.  That object can either accumulate a
 # list, write them out directly, etc etc.
 
+
+class _PrematchedMatcher(difflib.SequenceMatcher):
+    """Allow SequenceMatcher operations to use predetermined blocks"""
+
+    def __init__(self, matching_blocks):
+        difflib.SequenceMatcher(self, None, None)
+        self.matching_blocks = matching_blocks
+        self.opcodes = None
+
+
 def internal_diff(old_filename, oldlines, new_filename, newlines, to_file,
                   allow_binary=False, sequence_matcher=None,
                   path_encoding='utf8'):

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2007-08-08 02:26:02 +0000
+++ b/bzrlib/knit.py	2007-08-14 23:35:48 +0000
@@ -78,6 +78,7 @@
 """)
 from bzrlib import (
     cache_utf8,
+    diff,
     errors,
     osutils,
     patiencediff,
@@ -641,29 +642,32 @@
     __contains__ = has_version
 
     def _merge_annotations(self, content, parents, parent_texts={},
-                           delta=None, annotated=None):
+                           delta=None, annotated=None,
+                           left_matching_blocks=None):
         """Merge annotations for content.  This is done by comparing
         the annotations based on changed to the text.
         """
+        if left_matching_blocks is not None:
+            delta_seq = diff._PrematchedMatcher(left_matching_blocks)
+        else:
+            delta_seq = None
         if annotated:
-            delta_seq = None
             for parent_id in parents:
                 merge_content = self._get_content(parent_id, parent_texts)
-                seq = patiencediff.PatienceSequenceMatcher(
-                                   None, merge_content.text(), content.text())
-                if delta_seq is None:
-                    # setup a delta seq to reuse.
-                    delta_seq = seq
+                if (parent_id == parents[0] and delta_seq is not None):
+                    seq = delta_seq
+                else:
+                    seq = patiencediff.PatienceSequenceMatcher(
+                        None, merge_content.text(), content.text())
                 for i, j, n in seq.get_matching_blocks():
                     if n == 0:
                         continue
-                    # this appears to copy (origin, text) pairs across to the new
-                    # content for any line that matches the last-checked parent.
-                    # FIXME: save the sequence control data for delta compression
-                    # against the most relevant parent rather than rediffing.
+                    # this appears to copy (origin, text) pairs across to the
+                    # new content for any line that matches the last-checked
+                    # parent.
                     content._lines[j:j+n] = merge_content._lines[i:i+n]
         if delta:
-            if not annotated:
+            if delta_seq is None:
                 reference_content = self._get_content(parents[0], parent_texts)
                 new_texts = content.text()
                 old_texts = reference_content.text()
@@ -729,11 +733,13 @@
         self._check_add(version_id, lines)
         return self._add(version_id, lines[:], parents, self.delta, parent_texts)
 
-    def _add_lines(self, version_id, parents, lines, parent_texts):
+    def _add_lines(self, version_id, parents, lines, parent_texts,
+                   left_matching_blocks=None):
         """See VersionedFile.add_lines."""
         self._check_add(version_id, lines)
         self._check_versions_present(parents)
-        return self._add(version_id, lines[:], parents, self.delta, parent_texts)
+        return self._add(version_id, lines[:], parents, self.delta,
+                         parent_texts, left_matching_blocks)
 
     def _check_add(self, version_id, lines):
         """check that version_id and lines are safe to add."""
@@ -747,7 +753,8 @@
         self._check_lines_not_unicode(lines)
         self._check_lines_are_lines(lines)
 
-    def _add(self, version_id, lines, parents, delta, parent_texts):
+    def _add(self, version_id, lines, parents, delta, parent_texts,
+             left_matching_blocks=None):
         """Add a set of lines on top of version specified by parents.
 
         If delta is true, compress the text as a line-delta against
@@ -797,8 +804,9 @@
         lines = self.factory.make(lines, version_id)
         if delta or (self.factory.annotated and len(present_parents) > 0):
             # Merge annotations from parent texts if so is needed.
-            delta_hunks = self._merge_annotations(lines, present_parents, parent_texts,
-                                                  delta, self.factory.annotated)
+            delta_hunks = self._merge_annotations(lines, present_parents,
+                parent_texts, delta, self.factory.annotated,
+                left_matching_blocks)
 
         if delta:
             options.append('line-delta')

=== modified file 'bzrlib/merge_directive.py'
--- a/bzrlib/merge_directive.py	2007-08-13 22:17:57 +0000
+++ b/bzrlib/merge_directive.py	2007-08-14 23:35:48 +0000
@@ -190,7 +190,7 @@
                     StringIO(self.get_raw_bundle()))
                 # We don't use the bundle's target revision, because
                 # MergeDirective.revision_id is authoritative.
-                info.install_revisions(target_repo)
+                info.install_revisions(target_repo, stream_input=False)
             else:
                 source_branch = _mod_branch.Branch.open(self.source_branch)
                 target_repo.fetch(source_branch.repository, self.revision_id)

=== modified file 'bzrlib/multiparent.py'
--- a/bzrlib/multiparent.py	2007-07-04 03:38:28 +0000
+++ b/bzrlib/multiparent.py	2007-07-24 16:25:33 +0000
@@ -106,7 +106,7 @@
                 if block is None:
                     continue
                 i, j, n = block
-                while j + n < cur_line:
+                while j + n <= cur_line:
                     block = cur_block[p] = next_block(p)
                     if block is None:
                         break
@@ -136,6 +136,13 @@
             diff.hunks.append(new_text)
         return diff
 
+    def get_matching_blocks(self, parent, parent_len):
+        for hunk in self.hunks:
+            if not isinstance(hunk, ParentText) or hunk.parent != parent:
+                continue
+            yield (hunk.parent_pos, hunk.child_pos, hunk.num_lines)
+        yield parent_len, self.num_lines(), 0
+
     def to_lines(self, parents=()):
         """Contruct a fulltext from this diff and its parents"""
         mpvf = MultiMemoryVersionedFile()
@@ -483,8 +490,7 @@
             pass
         diff = self.get_diff(version_id)
         lines = []
-        reconstructor = _Reconstructor(self, self._lines,
-                                       self._parents)
+        reconstructor = _Reconstructor(self, self._lines, self._parents)
         reconstructor.reconstruct_version(lines, version_id)
         self._lines[version_id] = lines
         return lines
@@ -594,6 +600,9 @@
         while len(pending_reqs) > 0:
             req_version_id, req_start, req_end = pending_reqs.pop()
             # lazily allocate cursors for versions
+            if req_version_id in self.lines:
+                lines.extend(self.lines[req_version_id][req_start:req_end])
+                continue
             try:
                 start, end, kind, data, iterator = self.cursor[req_version_id]
             except KeyError:

=== modified file 'bzrlib/tests/test_bundle.py'
--- a/bzrlib/tests/test_bundle.py	2007-07-25 22:54:16 +0000
+++ b/bzrlib/tests/test_bundle.py	2007-08-14 23:35:48 +0000
@@ -1250,7 +1250,7 @@
         new_text = self.get_raw(StringIO(''.join(bundle_txt)))
         new_text = new_text.replace('<file file_id="exe-1"',
                                     '<file executable="y" file_id="exe-1"')
-        new_text = new_text.replace('B372', 'B387')
+        new_text = new_text.replace('B222', 'B237')
         bundle_txt = StringIO()
         bundle_txt.write(serializer._get_bundle_header('4'))
         bundle_txt.write('\n')
@@ -1459,7 +1459,27 @@
             'storage_kind':'fulltext'}, 'file', 'revid', 'fileid')
         writer.end()
         fileobj.seek(0)
-        record_iter = v4.BundleReader(fileobj).iter_records()
+        reader = v4.BundleReader(fileobj, stream_input=True)
+        record_iter = reader.iter_records()
+        record = record_iter.next()
+        self.assertEqual((None, {'foo': 'bar', 'storage_kind': 'header'},
+            'info', None, None), record)
+        record = record_iter.next()
+        self.assertEqual(("Record body", {'storage_kind': 'fulltext',
+                          'parents': ['1', '3']}, 'file', 'revid', 'fileid'),
+                          record)
+
+    def test_roundtrip_record_memory_hungry(self):
+        fileobj = StringIO()
+        writer = v4.BundleWriter(fileobj)
+        writer.begin()
+        writer.add_info_record(foo='bar')
+        writer._add_record("Record body", {'parents': ['1', '3'],
+            'storage_kind':'fulltext'}, 'file', 'revid', 'fileid')
+        writer.end()
+        fileobj.seek(0)
+        reader = v4.BundleReader(fileobj, stream_input=False)
+        record_iter = reader.iter_records()
         record = record_iter.next()
         self.assertEqual((None, {'foo': 'bar', 'storage_kind': 'header'},
             'info', None, None), record)

=== modified file 'bzrlib/tests/test_multiparent.py'
--- a/bzrlib/tests/test_multiparent.py	2007-06-26 20:05:54 +0000
+++ b/bzrlib/tests/test_multiparent.py	2007-07-24 01:41:50 +0000
@@ -49,6 +49,11 @@
                           multiparent.ParentText(0, 1, 2, 3)],
                          diff.hunks)
 
+        diff = multiparent.MultiParent.from_lines(LINES_2, [LINES_1])
+        self.assertEqual([multiparent.ParentText(0, 0, 0, 1),
+                          multiparent.ParentText(0, 2, 1, 3)],
+                         diff.hunks)
+
     def test_compare_two_parents(self):
         diff = multiparent.MultiParent.from_lines(LINES_1, [LINES_2, LINES_3])
         self.assertEqual([multiparent.ParentText(1, 0, 0, 4),
@@ -64,6 +69,15 @@
                           multiparent.ParentText(0, 3, 4, 1)],
                          diff.hunks)
 
+    def test_get_matching_blocks(self):
+        diff = multiparent.MultiParent.from_lines(LINES_1, [LINES_2])
+        self.assertEqual([(0, 0, 1), (1, 2, 3), (4, 5, 0)],
+                         list(diff.get_matching_blocks(0, len(LINES_2))))
+
+        diff = multiparent.MultiParent.from_lines(LINES_2, [LINES_1])
+        self.assertEqual([(0, 0, 1), (2, 1, 3), (5, 4, 0)],
+                         list(diff.get_matching_blocks(0, len(LINES_1))))
+
     def test_range_iterator(self):
         diff = multiparent.MultiParent.from_lines(LINES_1, [LINES_2, LINES_3])
         diff.hunks.append(multiparent.NewText(['q\n']))

=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py	2007-07-25 00:52:21 +0000
+++ b/bzrlib/tests/test_versionedfile.py	2007-07-27 19:39:13 +0000
@@ -36,7 +36,7 @@
                            )
 from bzrlib.knit import KnitVersionedFile, \
      KnitAnnotateFactory
-from bzrlib.tests import TestCaseWithTransport
+from bzrlib.tests import TestCaseWithTransport, TestSkipped
 from bzrlib.tests.HTTPTestUtil import TestCaseWithWebserver
 from bzrlib.trace import mutter
 from bzrlib.transport import get_transport
@@ -126,6 +126,25 @@
             (errors.BzrBadParameterUnicode, NotImplementedError),
             vf.add_lines_with_ghosts, 'a', [], ['a\n', u'b\n', 'c\n'])
 
+    def test_add_follows_left_matching_blocks(self):
+        """If we change left_matching_blocks, delta changes
+
+        Note: There are multiple correct deltas in this case, because
+        we start with 1 "a" and we get 3.
+        """
+        vf = self.get_file()
+        if isinstance(vf, WeaveFile):
+            raise TestSkipped("WeaveFile ignores left_matching_blocks")
+        vf.add_lines('1', [], ['a\n'])
+        vf.add_lines('2', ['1'], ['a\n', 'a\n', 'a\n'],
+                     left_matching_blocks=[(0, 0, 1), (1, 3, 0)])
+        self.assertEqual([(1, 1, 2, [('2', 'a\n'), ('2', 'a\n')])],
+                         vf.get_delta('2')[3])
+        vf.add_lines('3', ['1'], ['a\n', 'a\n', 'a\n'],
+                     left_matching_blocks=[(0, 2, 1), (1, 3, 0)])
+        self.assertEqual([(0, 0, 2, [('3', 'a\n'), ('3', 'a\n')])],
+                         vf.get_delta('3')[3])
+
     def test_inline_newline_throws(self):
         # \r characters are not permitted in lines being added
         vf = self.get_file()

=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py	2007-07-25 21:26:30 +0000
+++ b/bzrlib/versionedfile.py	2007-07-27 19:43:36 +0000
@@ -125,7 +125,8 @@
             new_full[-1] = new_full[-1][:-1]
         self.add_lines(version_id, parents, new_full)
 
-    def add_lines(self, version_id, parents, lines, parent_texts=None):
+    def add_lines(self, version_id, parents, lines, parent_texts=None,
+                  left_matching_blocks=None):
         """Add a single text on top of the versioned file.
 
         Must raise RevisionAlreadyPresent if the new version is
@@ -138,6 +139,9 @@
              version_id to allow delta optimisations. 
              VERY IMPORTANT: the texts must be those returned
              by add_lines or data corruption can be caused.
+        :param left_matching_blocks: a hint about which areas are common
+            between the text and its left-hand-parent.  The format is
+            the SequenceMatcher.get_matching_blocks format.
         :return: An opaque representation of the inserted version which can be
                  provided back to future add_lines calls in the parent_texts
                  dictionary.
@@ -145,9 +149,11 @@
         version_id = osutils.safe_revision_id(version_id)
         parents = [osutils.safe_revision_id(v) for v in parents]
         self._check_write_ok()
-        return self._add_lines(version_id, parents, lines, parent_texts)
+        return self._add_lines(version_id, parents, lines, parent_texts,
+                               left_matching_blocks)
 
-    def _add_lines(self, version_id, parents, lines, parent_texts):
+    def _add_lines(self, version_id, parents, lines, parent_texts,
+                   left_matching_blocks):
         """Helper to do the class specific add_lines."""
         raise NotImplementedError(self.add_lines)
 
@@ -298,17 +304,31 @@
         mpdiff.  mpdiff should be a MultiParent instance.
         """
         vf_parents = {}
-        for version, parents, expected_sha1, mpdiff in records:
-            mpvf = multiparent.MultiMemoryVersionedFile()
-            needed_parents = [p for p in parents if not mpvf.has_version(p)]
-            parent_lines = self._get_lf_split_line_list(needed_parents)
-            for parent_id, lines in zip(needed_parents, parent_lines):
-                mpvf.add_version(lines, parent_id, [])
-            mpvf.add_diff(mpdiff, version, parents)
-            lines = mpvf.get_line_list([version])[0]
-            version_text = self.add_lines(version, parents, lines, vf_parents)
+        mpvf = multiparent.MultiMemoryVersionedFile()
+        versions = []
+        for version, parent_ids, expected_sha1, mpdiff in records:
+            versions.append(version)
+            mpvf.add_diff(mpdiff, version, parent_ids)
+        needed_parents = set()
+        for version, parent_ids, expected_sha1, mpdiff in records:
+            needed_parents.update(p for p in parent_ids
+                                  if not mpvf.has_version(p))
+        for parent_id, lines in zip(needed_parents,
+                                 self._get_lf_split_line_list(needed_parents)):
+            mpvf.add_version(lines, parent_id, [])
+        for (version, parent_ids, expected_sha1, mpdiff), lines in\
+            zip(records, mpvf.get_line_list(versions)):
+            if len(parent_ids) == 1:
+                left_matching_blocks = list(mpdiff.get_matching_blocks(0,
+                    mpvf.get_diff(parent_ids[0]).num_lines()))
+            else:
+                left_matching_blocks = None
+            version_text = self.add_lines(version, parent_ids, lines,
+                vf_parents, left_matching_blocks=left_matching_blocks)
             vf_parents[version] = version_text
-            if expected_sha1 != self.get_sha1(version):
+        for (version, parent_ids, expected_sha1, mpdiff), sha1 in\
+             zip(records, self.get_sha1s(versions)):
+            if expected_sha1 != sha1:
                 raise errors.VersionedFileInvalidChecksum(version)
 
     def get_sha1(self, version_id):

=== modified file 'bzrlib/weave.py'
--- a/bzrlib/weave.py	2007-06-23 05:14:25 +0000
+++ b/bzrlib/weave.py	2007-07-24 11:44:40 +0000
@@ -449,7 +449,8 @@
         """Please use Weave.clone_text now."""
         return self.clone_text(new_rev_id, old_rev_id, parents)
 
-    def _add_lines(self, version_id, parents, lines, parent_texts):
+    def _add_lines(self, version_id, parents, lines, parent_texts,
+                   left_matching_blocks=None):
         """See VersionedFile.add_lines."""
         return self._add(version_id, lines, map(self._lookup, parents))
 
@@ -1109,7 +1110,8 @@
             # new file, save it
             self._save()
 
-    def _add_lines(self, version_id, parents, lines, parent_texts):
+    def _add_lines(self, version_id, parents, lines, parent_texts,
+        left_matching_blocks=None):
         """Add a version and save the weave."""
         self.check_not_reserved_id(version_id)
         result = super(WeaveFile, self)._add_lines(version_id, parents, lines,




More information about the bazaar-commits mailing list