Rev 2927: (robertc) Reduce list copying during text construction decreasing the time to extract single texts with many deltas. (Robert Collins) in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Mon Oct 22 22:25:22 BST 2007


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 2927
revision-id: pqm at pqm.ubuntu.com-20071022212520-al7xlieh3d7ng370
parent: pqm at pqm.ubuntu.com-20071022204528-m4i3ievs46d19324
parent: robertc at robertcollins.net-20071022195326-f3cae7e3e36spmno
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Mon 2007-10-22 22:25:20 +0100
message:
  (robertc) Reduce list copying during text construction decreasing the time to extract single texts with many deltas. (Robert Collins)
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
    ------------------------------------------------------------
    revno: 2921.2.2
    merged: robertc at robertcollins.net-20071022195326-f3cae7e3e36spmno
    parent: robertc at robertcollins.net-20071022060819-eojcv6pifohuxyns
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: knits
    timestamp: Tue 2007-10-23 05:53:26 +1000
    message:
      Review feedback.
    ------------------------------------------------------------
    revno: 2921.2.1
    merged: robertc at robertcollins.net-20071022060819-eojcv6pifohuxyns
    parent: pqm at pqm.ubuntu.com-20071019201226-6z006xotgfe7zmu8
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: knits
    timestamp: Mon 2007-10-22 16:08:19 +1000
    message:
      * Knit text reconstruction now avoids making copies of the lines list for
        interim texts when building a single text. The new ``apply_delta`` method
        on ``KnitContent`` aids this by allowing modification of the revision id
        such objects represent. (Robert Collins)
=== modified file 'NEWS'
--- a/NEWS	2007-10-22 15:54:47 +0000
+++ b/NEWS	2007-10-22 21:25:20 +0000
@@ -55,6 +55,11 @@
    * Inventory serialisation no longer double-sha's the content.
      (Robert Collins)
 
+   * Knit text reconstruction now avoids making copies of the lines list for
+     interim texts when building a single text. The new ``apply_delta`` method
+     on ``KnitContent`` aids this by allowing modification of the revision id
+     such objects represent. (Robert Collins)
+
    * Pack indices are now partially parsed for specific key lookup using a
      bisection approach. (Robert Collins)
 
@@ -78,7 +83,6 @@
    * Show encodings, locale and list of plugins in the traceback message.
      (Martin Pool, #63894)
 
-
   DOCUMENTATION:
 
    * New *Bazaar in Five Minutes* guide.  (Matthew Revell)

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2007-10-17 04:41:40 +0000
+++ b/bzrlib/knit.py	2007-10-22 19:53:26 +0000
@@ -138,6 +138,10 @@
         """Return a list of (origin, text) tuples."""
         return list(self.annotate_iter())
 
+    def apply_delta(self, delta, new_version_id):
+        """Apply delta to this object to become new_version_id."""
+        raise NotImplementedError(self.apply_delta)
+
     def line_delta_iter(self, new_lines):
         """Generate line-based delta from this content to new_lines."""
         new_texts = new_lines.text()
@@ -189,6 +193,14 @@
         """Yield tuples of (origin, text) for each content line."""
         return iter(self._lines)
 
+    def apply_delta(self, delta, new_version_id):
+        """Apply delta to this object to become new_version_id."""
+        offset = 0
+        lines = self._lines
+        for start, end, count, delta_lines in delta:
+            lines[offset+start:offset+end] = delta_lines
+            offset = offset + (start - end) + count
+
     def strip_last_line_newline(self):
         line = self._lines[-1][1].rstrip('\n')
         self._lines[-1] = (self._lines[-1][0], line)
@@ -225,6 +237,15 @@
         for line in self._lines:
             yield self._version_id, line
 
+    def apply_delta(self, delta, new_version_id):
+        """Apply delta to this object to become new_version_id."""
+        offset = 0
+        lines = self._lines
+        for start, end, count, delta_lines in delta:
+            lines[offset+start:offset+end] = delta_lines
+            offset = offset + (start - end) + count
+        self._version_id = new_version_id
+
     def copy(self):
         return PlainKnitContent(self._lines[:], self._version_id)
 
@@ -1004,6 +1025,12 @@
         the requested versions and content_map contains the KnitContents.
         Both dicts take version_ids as their keys.
         """
+        # FUTURE: This function could be improved for the 'extract many' case
+        # by tracking each component and only doing the copy when the number of
+        # children than need to apply delta's to it is > 1 or it is part of the
+        # final output.
+        version_ids = list(version_ids)
+        multiple_versions = len(version_ids) != 1
         record_map = self._get_record_map(version_ids)
 
         text_map = {}
@@ -1029,13 +1056,18 @@
                         content = self.factory.parse_fulltext(data, version_id)
                     elif method == 'line-delta':
                         delta = self.factory.parse_line_delta(data, version_id)
-                        content = content.copy()
-                        content._lines = self._apply_delta(content._lines,
-                                                           delta)
-                    content_map[component_id] = content
+                        if multiple_versions:
+                            # only doing this when we want multiple versions
+                            # output avoids list copies - which reference and
+                            # dereference many strings.
+                            content = content.copy()
+                        content.apply_delta(delta, version_id)
+                    if multiple_versions:
+                        content_map[component_id] = content
 
             if 'no-eol' in self._index.get_options(version_id):
-                content = content.copy()
+                if multiple_versions:
+                    content = content.copy()
                 content.strip_last_line_newline()
             final_content[version_id] = content
 
@@ -1052,16 +1084,6 @@
             text_map[version_id] = text
         return text_map, final_content
 
-    @staticmethod
-    def _apply_delta(lines, delta):
-        """Apply delta to lines."""
-        lines = list(lines)
-        offset = 0
-        for start, end, count, delta_lines in delta:
-            lines[offset+start:offset+end] = delta_lines
-            offset = offset + (start - end) + count
-        return lines
-
     def iter_lines_added_or_present_in_versions(self, version_ids=None, 
                                                 pb=None):
         """See VersionedFile.iter_lines_added_or_present_in_versions()."""




More information about the bazaar-commits mailing list