Rev 15: Factor out a get_matching_blocks style function. in http://people.ubuntu.com/~robertc/baz2.0/plugins/groupcompress/trunk
Robert Collins
robertc at robertcollins.net
Thu Jul 24 02:51:53 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/plugins/groupcompress/trunk
------------------------------------------------------------
revno: 15
revision-id: robertc at robertcollins.net-20080724015151-vul2s7bru8fqytjl
parent: robertc at robertcollins.net-20080721124131-0eoo6jo4bm52yoaw
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Thu 2008-07-24 11:51:51 +1000
message:
Factor out a get_matching_blocks style function.
modified:
groupcompress.py groupcompress.py-20080705181503-ccbxd6xuy1bdnrpu-8
=== modified file 'groupcompress.py'
--- a/groupcompress.py 2008-07-21 12:41:31 +0000
+++ b/groupcompress.py 2008-07-24 01:51:51 +0000
@@ -123,6 +123,55 @@
self.line_locations = {}
self.labels_deltas = {}
+ def get_matching_blocks(self, lines):
+ """Return an the ranges in lines which match self.lines.
+
+ :param lines: lines to compress
+ :return: A list of (old_start, new_start, length) tuples which reflect
+ a region in self.lines that is present in lines. The last element
+ of the list is always (old_len, new_len, 0) to provide a end point
+ for generating instructions from the matching blocks list.
+ """
+ result = []
+ pos = 0
+ line_locations = self.line_locations
+ range_len = 0
+ range_start = 0
+ flush_range = self.flush_range
+ copy_ends = None
+ # We either copy a range (while there are reusable lines) or we
+ # insert new lines. To find reusable lines we traverse
+ while pos < len(lines):
+ line = lines[pos]
+ if line not in line_locations:
+ if copy_ends:
+ result.append((min(copy_ends) - range_len, range_start, range_len))
+ range_start = pos
+ range_len = 1
+ copy_ends = None
+ else:
+ range_len += 1
+ else:
+ locations, next = line_locations[line]
+ if copy_ends:
+ next_locations = locations.intersection(copy_ends)
+ if len(next_locations):
+ # range continues
+ range_len += 1
+ copy_ends = set(loc + 1 for loc in next_locations)
+ pos += 1
+ continue
+ if copy_ends:
+ result.append((min(copy_ends) - range_len, range_start, range_len))
+ range_len = 1
+ copy_ends = next
+ range_start = pos
+ pos += 1
+ if copy_ends:
+ result.append((min(copy_ends) - range_len, range_start, range_len))
+ result.append((len(self.lines), len(lines), 0))
+ return result
+
def compress(self, key, lines, expected_sha):
"""Compress lines with label key.
@@ -149,45 +198,24 @@
index_lines = [False, False]
pos = 0
line_locations = self.line_locations
- accumulator = []
- copying = False
range_len = 0
range_start = 0
flush_range = self.flush_range
copy_ends = None
+ blocks = self.get_matching_blocks(lines)
+ current_pos = 0
# We either copy a range (while there are reusable lines) or we
# insert new lines. To find reusable lines we traverse
- while pos < len(lines):
- line = lines[pos]
- if line not in line_locations:
- if copying:
- flush_range(copying, range_start, copy_ends, range_len,
- lines, new_lines, index_lines)
- copying = False
- range_start = pos
- range_len = 1
- else:
- range_len += 1
- else:
- locations, next = line_locations[line]
- if copying:
- next_locations = locations.intersection(copy_ends)
- if len(next_locations):
- # range continues
- range_len += 1
- copy_ends = set(loc + 1 for loc in next_locations)
- pos += 1
- continue
- # New copy range starts here:
- flush_range(copying, range_start, copy_ends, range_len, lines,
- new_lines, index_lines)
- range_len = 1
- copy_ends = next
- range_start = pos
- copying = True
- pos += 1
- flush_range(copying, range_start, copy_ends, range_len, lines,
- new_lines, index_lines)
+ for old_start, new_start, range_len in blocks:
+ if new_start != current_pos:
+ # non-matching region
+ flush_range(False, current_pos, None, new_start - current_pos,
+ lines, new_lines, index_lines)
+ current_pos = new_start + range_len
+ if not range_len:
+ continue
+ flush_range(True, new_start, [old_start + range_len], range_len,
+ lines, new_lines, index_lines)
delta_start = (self.endpoint, len(self.lines))
self.output_lines(new_lines, index_lines)
trim_encoding_newline(lines)
More information about the bazaar-commits
mailing list