Rev 2927: (robertc) Reduce list copying during text construction decreasing the time to extract single texts with many deltas. (Robert Collins) in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Mon Oct 22 22:25:22 BST 2007
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 2927
revision-id: pqm at pqm.ubuntu.com-20071022212520-al7xlieh3d7ng370
parent: pqm at pqm.ubuntu.com-20071022204528-m4i3ievs46d19324
parent: robertc at robertcollins.net-20071022195326-f3cae7e3e36spmno
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Mon 2007-10-22 22:25:20 +0100
message:
(robertc) Reduce list copying during text construction decreasing the time to extract single texts with many deltas. (Robert Collins)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/knit.py knit.py-20051212171256-f056ac8f0fbe1bd9
------------------------------------------------------------
revno: 2921.2.2
merged: robertc at robertcollins.net-20071022195326-f3cae7e3e36spmno
parent: robertc at robertcollins.net-20071022060819-eojcv6pifohuxyns
committer: Robert Collins <robertc at robertcollins.net>
branch nick: knits
timestamp: Tue 2007-10-23 05:53:26 +1000
message:
Review feedback.
------------------------------------------------------------
revno: 2921.2.1
merged: robertc at robertcollins.net-20071022060819-eojcv6pifohuxyns
parent: pqm at pqm.ubuntu.com-20071019201226-6z006xotgfe7zmu8
committer: Robert Collins <robertc at robertcollins.net>
branch nick: knits
timestamp: Mon 2007-10-22 16:08:19 +1000
message:
* Knit text reconstruction now avoids making copies of the lines list for
interim texts when building a single text. The new ``apply_delta`` method
on ``KnitContent`` aids this by allowing modification of the revision id
such objects represent. (Robert Collins)
=== modified file 'NEWS'
--- a/NEWS 2007-10-22 15:54:47 +0000
+++ b/NEWS 2007-10-22 21:25:20 +0000
@@ -55,6 +55,11 @@
* Inventory serialisation no longer double-sha's the content.
(Robert Collins)
+ * Knit text reconstruction now avoids making copies of the lines list for
+ interim texts when building a single text. The new ``apply_delta`` method
+ on ``KnitContent`` aids this by allowing modification of the revision id
+ such objects represent. (Robert Collins)
+
* Pack indices are now partially parsed for specific key lookup using a
bisection approach. (Robert Collins)
@@ -78,7 +83,6 @@
* Show encodings, locale and list of plugins in the traceback message.
(Martin Pool, #63894)
-
DOCUMENTATION:
* New *Bazaar in Five Minutes* guide. (Matthew Revell)
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2007-10-17 04:41:40 +0000
+++ b/bzrlib/knit.py 2007-10-22 19:53:26 +0000
@@ -138,6 +138,10 @@
"""Return a list of (origin, text) tuples."""
return list(self.annotate_iter())
+ def apply_delta(self, delta, new_version_id):
+ """Apply delta to this object to become new_version_id."""
+ raise NotImplementedError(self.apply_delta)
+
def line_delta_iter(self, new_lines):
"""Generate line-based delta from this content to new_lines."""
new_texts = new_lines.text()
@@ -189,6 +193,14 @@
"""Yield tuples of (origin, text) for each content line."""
return iter(self._lines)
+ def apply_delta(self, delta, new_version_id):
+ """Apply delta to this object to become new_version_id."""
+ offset = 0
+ lines = self._lines
+ for start, end, count, delta_lines in delta:
+ lines[offset+start:offset+end] = delta_lines
+ offset = offset + (start - end) + count
+
def strip_last_line_newline(self):
line = self._lines[-1][1].rstrip('\n')
self._lines[-1] = (self._lines[-1][0], line)
@@ -225,6 +237,15 @@
for line in self._lines:
yield self._version_id, line
+ def apply_delta(self, delta, new_version_id):
+ """Apply delta to this object to become new_version_id."""
+ offset = 0
+ lines = self._lines
+ for start, end, count, delta_lines in delta:
+ lines[offset+start:offset+end] = delta_lines
+ offset = offset + (start - end) + count
+ self._version_id = new_version_id
+
def copy(self):
return PlainKnitContent(self._lines[:], self._version_id)
@@ -1004,6 +1025,12 @@
the requested versions and content_map contains the KnitContents.
Both dicts take version_ids as their keys.
"""
+ # FUTURE: This function could be improved for the 'extract many' case
+ # by tracking each component and only doing the copy when the number of
+ # children than need to apply delta's to it is > 1 or it is part of the
+ # final output.
+ version_ids = list(version_ids)
+ multiple_versions = len(version_ids) != 1
record_map = self._get_record_map(version_ids)
text_map = {}
@@ -1029,13 +1056,18 @@
content = self.factory.parse_fulltext(data, version_id)
elif method == 'line-delta':
delta = self.factory.parse_line_delta(data, version_id)
- content = content.copy()
- content._lines = self._apply_delta(content._lines,
- delta)
- content_map[component_id] = content
+ if multiple_versions:
+ # only doing this when we want multiple versions
+ # output avoids list copies - which reference and
+ # dereference many strings.
+ content = content.copy()
+ content.apply_delta(delta, version_id)
+ if multiple_versions:
+ content_map[component_id] = content
if 'no-eol' in self._index.get_options(version_id):
- content = content.copy()
+ if multiple_versions:
+ content = content.copy()
content.strip_last_line_newline()
final_content[version_id] = content
@@ -1052,16 +1084,6 @@
text_map[version_id] = text
return text_map, final_content
- @staticmethod
- def _apply_delta(lines, delta):
- """Apply delta to lines."""
- lines = list(lines)
- offset = 0
- for start, end, count, delta_lines in delta:
- lines[offset+start:offset+end] = delta_lines
- offset = offset + (start - end) + count
- return lines
-
def iter_lines_added_or_present_in_versions(self, version_ids=None,
pb=None):
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
More information about the bazaar-commits
mailing list