Rev 3918: Implement make_delta and apply_delta. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/vilajam
John Arbash Meinel
john at arbash-meinel.com
Fri Mar 27 20:12:29 GMT 2009
At http://bzr.arbash-meinel.com/branches/bzr/brisbane/vilajam
------------------------------------------------------------
revno: 3918
revision-id: john at arbash-meinel.com-20090327201212-1ykalx15yr1cquxt
parent: john at arbash-meinel.com-20090327193046-9i5yqw3estnvgv5a
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: vilajam
timestamp: Fri 2009-03-27 15:12:12 -0500
message:
Implement make_delta and apply_delta.
Update the permuted tests so that both implementations are tested.
-------------- next part --------------
=== modified file 'bzrlib/_groupcompress_py.py'
--- a/bzrlib/_groupcompress_py.py 2009-03-27 19:30:46 +0000
+++ b/bzrlib/_groupcompress_py.py 2009-03-27 20:12:12 +0000
@@ -206,10 +206,12 @@
out_lines.append(copy_bytes)
index_lines.append(False)
- def make_delta(self, new_lines, soft=False):
+ def make_delta(self, new_lines, bytes_length=None, soft=False):
"""Compute the delta for this content versus the original content."""
- # reserved for content type, content length, target_len
- out_lines = ['', '', '']
+ if bytes_length is None:
+ bytes_length = sum(map(len, new_lines))
+ # reserved for content type, content length
+ out_lines = ['', '', encode_base128_int(bytes_length)]
index_lines = [False, False, False]
blocks = self.get_matching_blocks(new_lines, soft=soft)
current_line_num = 0
@@ -226,6 +228,33 @@
return out_lines, index_lines
+def encode_base128_int(val):
+ """Convert an integer into a 7-bit lsb encoding."""
+ bytes = []
+ count = 0
+ while val >= 0x80:
+ bytes.append(chr((val | 0x80) & 0xFF))
+ val >>= 7
+ bytes.append(chr(val))
+ return ''.join(bytes)
+
+
+def decode_base128_int(bytes):
+ """Decode an integer from a 7-bit lsb encoding."""
+ offset = 0
+ val = 0
+ shift = 0
+ bval = ord(bytes[offset])
+ while bval >= 0x80:
+ val |= (bval & 0x7F) << shift
+ shift += 7
+ offset += 1
+ bval = ord(bytes[offset])
+ val |= bval << shift
+ offset += 1
+ return val, offset
+
+
def encode_copy_instruction(offset, length):
"""Convert this offset into a control code and bytes."""
copy_command = 0x80
@@ -258,27 +287,87 @@
return ''.join(copy_bytes)
+def decode_copy_instruction(bytes, cmd, pos):
+ """Decode a copy instruction from the next few bytes.
+
+ A copy instruction is a variable number of bytes, so we will parse the
+ bytes we care about, and return the new position, as well as the offset and
+ length referred to in the bytes.
+
+ :param bytes: A string of bytes
+ :param cmd: The command code
+ :param pos: The position in bytes right after the copy command
+ :return: (offset, length, newpos)
+ The offset of the copy start, the number of bytes to copy, and the
+ position after the last byte of the copy
+ """
+ if cmd & 0x80 != 0x80:
+ raise ValueError('copy instructions must have bit 0x80 set')
+ offset = 0
+ length = 0
+ if (cmd & 0x01):
+ offset = ord(bytes[pos])
+ pos += 1
+ if (cmd & 0x02):
+ offset = offset | (ord(bytes[pos]) << 8)
+ pos += 1
+ if (cmd & 0x04):
+ offset = offset | (ord(bytes[pos]) << 16)
+ pos += 1
+ if (cmd & 0x08):
+ offset = offset | (ord(bytes[pos]) << 24)
+ pos += 1
+ if (cmd & 0x10):
+ length = ord(bytes[pos])
+ pos += 1
+ if (cmd & 0x20):
+ length = length | (ord(bytes[pos]) << 8)
+ pos += 1
+ if (cmd & 0x40):
+ length = length | (ord(bytes[pos]) << 16)
+ pos += 1
+ if length == 0:
+ length = 65536
+ return (offset, length, pos)
+
def make_delta(source_bytes, target_bytes):
"""Create a delta from source to target."""
# TODO: The checks below may not be a the right place yet.
- if not isinstance(source_bytes, str):
+ if type(source_bytes) is not str:
raise TypeError('source is not a str')
- if not isinstance(target_bytes, str):
+ if type(target_bytes) is not str:
raise TypeError('target is not a str')
line_locations = EquivalenceTable([])
- return None
+ source_lines = osutils.split_lines(source_bytes)
+ line_locations.extend_lines(source_lines, [True]*len(source_lines))
+ delta, _ = line_locations.make_delta(osutils.split_lines(target_bytes),
+ bytes_length=len(target_bytes))
+ return ''.join(delta)
def apply_delta(basis, delta):
"""Apply delta to this object to become new_version_id."""
+ if type(basis) is not str:
+ raise TypeError('basis is not a str')
+ if type(delta) is not str:
+ raise TypeError('delta is not a str')
+ target_length, pos = decode_base128_int(delta)
lines = []
- last_offset = 0
- # eq ranges occur where gaps occur
- # start, end refer to offsets in basis
- for op, start, count, delta_lines in delta:
- if op == 'c':
- lines.append(basis[start:start+count])
- else:
- lines.extend(delta_lines)
- return lines
+ len_delta = len(delta)
+ while pos < len_delta:
+ cmd = ord(delta[pos])
+ pos += 1
+ if cmd & 0x80:
+ offset, length, pos = decode_copy_instruction(delta, cmd, pos)
+ lines.append(basis[offset:offset+length])
+ else: # Insert of 'cmd' bytes
+ if cmd == 0:
+ raise ValueError('Command == 0 not supported yet')
+ lines.append(delta[pos:pos+cmd])
+ pos += cmd
+ bytes = ''.join(lines)
+ if len(bytes) != target_length:
+ raise ValueError('Delta claimed to be %d long, but ended up'
+ ' %d long' % (target_length, len(bytes)))
+ return bytes
=== modified file 'bzrlib/groupcompress.py'
--- a/bzrlib/groupcompress.py 2009-03-27 19:30:46 +0000
+++ b/bzrlib/groupcompress.py 2009-03-27 20:12:12 +0000
@@ -55,33 +55,6 @@
_null_sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'
-def encode_base128_int(val):
- """Convert an integer into a 7-bit lsb encoding."""
- bytes = []
- count = 0
- while val >= 0x80:
- bytes.append(chr((val | 0x80) & 0xFF))
- val >>= 7
- bytes.append(chr(val))
- return ''.join(bytes)
-
-
-def decode_base128_int(bytes):
- """Decode an integer from a 7-bit lsb encoding."""
- offset = 0
- val = 0
- shift = 0
- bval = ord(bytes[offset])
- while bval >= 0x80:
- val |= (bval & 0x7F) << shift
- shift += 7
- offset += 1
- bval = ord(bytes[offset])
- val |= bval << shift
- offset += 1
- return val, offset
-
-
def sort_gc_optimal(parent_map):
"""Sort and group the keys in parent_map into groupcompress order.
@@ -784,8 +757,7 @@
bytes_length = len(bytes)
new_lines = osutils.split_lines(bytes)
out_lines, index_lines = self.line_locations.make_delta(new_lines,
- soft=soft)
- out_lines[2] = encode_base128_int(bytes_length)
+ bytes_length=bytes_length, soft=soft)
delta_length = sum(map(len, out_lines))
if delta_length > max_delta_size:
# The delta is longer than the fulltext, insert a fulltext
@@ -1809,6 +1781,8 @@
from bzrlib._groupcompress_py import (
apply_delta,
+ encode_base128_int,
+ decode_base128_int,
encode_copy_instruction,
EquivalenceTable,
)
=== modified file 'bzrlib/tests/test__groupcompress.py'
--- a/bzrlib/tests/test__groupcompress.py 2009-03-27 19:30:46 +0000
+++ b/bzrlib/tests/test__groupcompress.py 2009-03-27 20:12:12 +0000
@@ -14,7 +14,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-"""Tests for the pyrex extension of groupcompress"""
+"""Tests for the python and pyrex extensions of groupcompress"""
from bzrlib import (
groupcompress,
@@ -138,16 +138,35 @@
ident_delta = self.make_delta(_text3, _text3)
self.assertEqual('\x87\x01\x90\x87', ident_delta)
+ def assertDeltaIn(self, delta1, delta2, delta):
+ """Make sure that the delta bytes match one of the expectations."""
+ # In general, the python delta matcher gives different results than the
+ # pyrex delta matcher. Both should be valid deltas, though.
+ if delta not in (delta1, delta2):
+ self.fail("Delta bytes:\n"
+ " %r\n"
+ "not in %r\n"
+ " or %r"
+ % (delta, delta1, delta2))
+
def test_make_delta(self):
delta = self.make_delta(_text1, _text2)
- self.assertEqual('N\x90/\x1fdiffer from\nagainst other text\n', delta)
+ self.assertDeltaIn(
+ 'N\x90/\x1fdiffer from\nagainst other text\n',
+ 'N\x90\x1d\x1ewhich is meant to differ from\n\x91:\x13',
+ delta)
delta = self.make_delta(_text2, _text1)
- self.assertEqual('M\x90/\x1ebe matched\nagainst other text\n', delta)
+ self.assertDeltaIn(
+ 'M\x90/\x1ebe matched\nagainst other text\n',
+ 'M\x90\x1d\x1dwhich is meant to be matched\n\x91;\x13',
+ delta)
delta = self.make_delta(_text3, _text1)
self.assertEqual('M\x90M', delta)
delta = self.make_delta(_text3, _text2)
- self.assertEqual('N\x90/\x1fdiffer from\nagainst other text\n',
- delta)
+ self.assertDeltaIn(
+ 'N\x90/\x1fdiffer from\nagainst other text\n',
+ 'N\x90\x1d\x1ewhich is meant to differ from\n\x91:\x13',
+ delta)
def test_apply_delta_is_typesafe(self):
self.apply_delta(_text1, 'M\x90M')
@@ -263,3 +282,133 @@
self.assertEqual(_fourth_text,
self._gc_module.apply_delta(source, fifth_delta))
self.assertEqual('\x80\x01\x91\xa7\x7f\x01\n', fifth_delta)
+
+
+class TestCopyInstruction(tests.TestCase):
+
+ def assertEncode(self, expected, offset, length):
+ bytes = _groupcompress_py.encode_copy_instruction(offset, length)
+ if expected != bytes:
+ self.assertEqual([hex(ord(e)) for e in expected],
+ [hex(ord(b)) for b in bytes])
+
+ def assertDecode(self, exp_offset, exp_length, exp_newpos, bytes, pos):
+ cmd = ord(bytes[pos])
+ pos += 1
+ out = _groupcompress_py.decode_copy_instruction(bytes, cmd, pos)
+ self.assertEqual((exp_offset, exp_length, exp_newpos), out)
+
+ def test_encode_no_length(self):
+ self.assertEncode('\x80', 0, None)
+ self.assertEncode('\x81\x01', 1, None)
+ self.assertEncode('\x81\x0a', 10, None)
+ self.assertEncode('\x81\xff', 255, None)
+ self.assertEncode('\x82\x01', 256, None)
+ self.assertEncode('\x83\x01\x01', 257, None)
+ self.assertEncode('\x8F\xff\xff\xff\xff', 0xFFFFFFFF, None)
+ self.assertEncode('\x8E\xff\xff\xff', 0xFFFFFF00, None)
+ self.assertEncode('\x8D\xff\xff\xff', 0xFFFF00FF, None)
+ self.assertEncode('\x8B\xff\xff\xff', 0xFF00FFFF, None)
+ self.assertEncode('\x87\xff\xff\xff', 0x00FFFFFF, None)
+ self.assertEncode('\x8F\x04\x03\x02\x01', 0x01020304, None)
+
+ def test_encode_no_offset(self):
+ self.assertEncode('\x90\x01', 0, 1)
+ self.assertEncode('\x90\x0a', 0, 10)
+ self.assertEncode('\x90\xff', 0, 255)
+ self.assertEncode('\xA0\x01', 0, 256)
+ self.assertEncode('\xB0\x01\x01', 0, 257)
+ self.assertEncode('\xB0\xff\xff', 0, 0xFFFF)
+ # Special case, if copy == 64KiB, then we store exactly 0
+ # Note that this puns with a copy of exactly 0 bytes, but we don't care
+ # about that, as we would never actually copy 0 bytes
+ self.assertEncode('\x80', 0, 64*1024)
+
+ def test_encode(self):
+ self.assertEncode('\x91\x01\x01', 1, 1)
+ self.assertEncode('\x91\x09\x0a', 9, 10)
+ self.assertEncode('\x91\xfe\xff', 254, 255)
+ self.assertEncode('\xA2\x02\x01', 512, 256)
+ self.assertEncode('\xB3\x02\x01\x01\x01', 258, 257)
+ self.assertEncode('\xB0\x01\x01', 0, 257)
+ # Special case, if copy == 64KiB, then we store exactly 0
+ # Note that this puns with a copy of exactly 0 bytes, but we don't care
+ # about that, as we would never actually copy 0 bytes
+ self.assertEncode('\x81\x0a', 10, 64*1024)
+
+ def test_decode_no_length(self):
+ # If length is 0, it is interpreted as 64KiB
+ # The shortest possible instruction is a copy of 64KiB from offset 0
+ self.assertDecode(0, 65536, 1, '\x80', 0)
+ self.assertDecode(1, 65536, 2, '\x81\x01', 0)
+ self.assertDecode(10, 65536, 2, '\x81\x0a', 0)
+ self.assertDecode(255, 65536, 2, '\x81\xff', 0)
+ self.assertDecode(256, 65536, 2, '\x82\x01', 0)
+ self.assertDecode(257, 65536, 3, '\x83\x01\x01', 0)
+ self.assertDecode(0xFFFFFFFF, 65536, 5, '\x8F\xff\xff\xff\xff', 0)
+ self.assertDecode(0xFFFFFF00, 65536, 4, '\x8E\xff\xff\xff', 0)
+ self.assertDecode(0xFFFF00FF, 65536, 4, '\x8D\xff\xff\xff', 0)
+ self.assertDecode(0xFF00FFFF, 65536, 4, '\x8B\xff\xff\xff', 0)
+ self.assertDecode(0x00FFFFFF, 65536, 4, '\x87\xff\xff\xff', 0)
+ self.assertDecode(0x01020304, 65536, 5, '\x8F\x04\x03\x02\x01', 0)
+
+ def test_decode_no_offset(self):
+ self.assertDecode(0, 1, 2, '\x90\x01', 0)
+ self.assertDecode(0, 10, 2, '\x90\x0a', 0)
+ self.assertDecode(0, 255, 2, '\x90\xff', 0)
+ self.assertDecode(0, 256, 2, '\xA0\x01', 0)
+ self.assertDecode(0, 257, 3, '\xB0\x01\x01', 0)
+ self.assertDecode(0, 65535, 3, '\xB0\xff\xff', 0)
+ # Special case, if copy == 64KiB, then we store exactly 0
+ # Note that this puns with a copy of exactly 0 bytes, but we don't care
+ # about that, as we would never actually copy 0 bytes
+ self.assertDecode(0, 65536, 1, '\x80', 0)
+
+ def test_decode(self):
+ self.assertDecode(1, 1, 3, '\x91\x01\x01', 0)
+ self.assertDecode(9, 10, 3, '\x91\x09\x0a', 0)
+ self.assertDecode(254, 255, 3, '\x91\xfe\xff', 0)
+ self.assertDecode(512, 256, 3, '\xA2\x02\x01', 0)
+ self.assertDecode(258, 257, 5, '\xB3\x02\x01\x01\x01', 0)
+ self.assertDecode(0, 257, 3, '\xB0\x01\x01', 0)
+
+ def test_decode_not_start(self):
+ self.assertDecode(1, 1, 6, 'abc\x91\x01\x01def', 3)
+ self.assertDecode(9, 10, 5, 'ab\x91\x09\x0ade', 2)
+ self.assertDecode(254, 255, 6, 'not\x91\xfe\xffcopy', 3)
+
+
+class TestBase128Int(tests.TestCase):
+
+ def assertEqualEncode(self, bytes, val):
+ self.assertEqual(bytes, _groupcompress_py.encode_base128_int(val))
+
+ def assertEqualDecode(self, val, num_decode, bytes):
+ self.assertEqual((val, num_decode),
+ _groupcompress_py.decode_base128_int(bytes))
+
+ def test_encode(self):
+ self.assertEqualEncode('\x01', 1)
+ self.assertEqualEncode('\x02', 2)
+ self.assertEqualEncode('\x7f', 127)
+ self.assertEqualEncode('\x80\x01', 128)
+ self.assertEqualEncode('\xff\x01', 255)
+ self.assertEqualEncode('\x80\x02', 256)
+ self.assertEqualEncode('\xff\xff\xff\xff\x0f', 0xFFFFFFFF)
+
+ def test_decode(self):
+ self.assertEqualDecode(1, 1, '\x01')
+ self.assertEqualDecode(2, 1, '\x02')
+ self.assertEqualDecode(127, 1, '\x7f')
+ self.assertEqualDecode(128, 2, '\x80\x01')
+ self.assertEqualDecode(255, 2, '\xff\x01')
+ self.assertEqualDecode(256, 2, '\x80\x02')
+ self.assertEqualDecode(0xFFFFFFFF, 5, '\xff\xff\xff\xff\x0f')
+
+ def test_decode_with_trailing_bytes(self):
+ self.assertEqualDecode(1, 1, '\x01abcdef')
+ self.assertEqualDecode(127, 1, '\x7f\x01')
+ self.assertEqualDecode(128, 2, '\x80\x01abcdef')
+ self.assertEqualDecode(255, 2, '\xff\x01\xff')
+
+
=== modified file 'bzrlib/tests/test_groupcompress.py'
--- a/bzrlib/tests/test_groupcompress.py 2009-03-27 19:30:46 +0000
+++ b/bzrlib/tests/test_groupcompress.py 2009-03-27 20:12:12 +0000
@@ -279,88 +279,6 @@
self.assertEqual(sum(map(len, expected_lines)), end_point)
-class TestEncodeCopyInstruction(tests.TestCase):
-
- def assertCopyInstruction(self, expected, offset, length):
- bytes = groupcompress.encode_copy_instruction(offset, length)
- if expected != bytes:
- self.assertEqual([hex(ord(e)) for e in expected],
- [hex(ord(b)) for b in bytes])
-
- def test_encode_no_length(self):
- self.assertCopyInstruction('\x80', 0, None)
- self.assertCopyInstruction('\x81\x01', 1, None)
- self.assertCopyInstruction('\x81\x0a', 10, None)
- self.assertCopyInstruction('\x81\xff', 255, None)
- self.assertCopyInstruction('\x82\x01', 256, None)
- self.assertCopyInstruction('\x83\x01\x01', 257, None)
- self.assertCopyInstruction('\x8F\xff\xff\xff\xff', 0xFFFFFFFF, None)
- self.assertCopyInstruction('\x8E\xff\xff\xff', 0xFFFFFF00, None)
- self.assertCopyInstruction('\x8D\xff\xff\xff', 0xFFFF00FF, None)
- self.assertCopyInstruction('\x8B\xff\xff\xff', 0xFF00FFFF, None)
- self.assertCopyInstruction('\x87\xff\xff\xff', 0x00FFFFFF, None)
- self.assertCopyInstruction('\x8F\x04\x03\x02\x01', 0x01020304, None)
-
- def test_encode_no_offset(self):
- self.assertCopyInstruction('\x90\x01', 0, 1)
- self.assertCopyInstruction('\x90\x0a', 0, 10)
- self.assertCopyInstruction('\x90\xff', 0, 255)
- self.assertCopyInstruction('\xA0\x01', 0, 256)
- self.assertCopyInstruction('\xB0\x01\x01', 0, 257)
- self.assertCopyInstruction('\xB0\x01\x01', 0, 257)
- self.assertCopyInstruction('\xB0\xff\xff', 0, 0xFFFF)
- # Special case, if copy == 64KiB, then we store exactly 0
- # Note that this puns with a copy of exactly 0 bytes, but we don't care
- # about that, as we would never actually copy 0 bytes
- self.assertCopyInstruction('\x80', 0, 64*1024)
-
- def test_encode(self):
- self.assertCopyInstruction('\x91\x01\x01', 1, 1)
- self.assertCopyInstruction('\x91\x09\x0a', 9, 10)
- self.assertCopyInstruction('\x91\xfe\xff', 254, 255)
- self.assertCopyInstruction('\xA2\x02\x01', 512, 256)
- self.assertCopyInstruction('\xB3\x02\x01\x01\x01', 258, 257)
- self.assertCopyInstruction('\xB0\x01\x01', 0, 257)
- # Special case, if copy == 64KiB, then we store exactly 0
- # Note that this puns with a copy of exactly 0 bytes, but we don't care
- # about that, as we would never actually copy 0 bytes
- self.assertCopyInstruction('\x81\x0a', 10, 64*1024)
-
-
-class TestBase128Int(tests.TestCase):
-
- def assertEqualEncode(self, bytes, val):
- self.assertEqual(bytes, groupcompress.encode_base128_int(val))
-
- def assertEqualDecode(self, val, num_decode, bytes):
- self.assertEqual((val, num_decode),
- groupcompress.decode_base128_int(bytes))
-
- def test_encode(self):
- self.assertEqualEncode('\x01', 1)
- self.assertEqualEncode('\x02', 2)
- self.assertEqualEncode('\x7f', 127)
- self.assertEqualEncode('\x80\x01', 128)
- self.assertEqualEncode('\xff\x01', 255)
- self.assertEqualEncode('\x80\x02', 256)
- self.assertEqualEncode('\xff\xff\xff\xff\x0f', 0xFFFFFFFF)
-
- def test_decode(self):
- self.assertEqualDecode(1, 1, '\x01')
- self.assertEqualDecode(2, 1, '\x02')
- self.assertEqualDecode(127, 1, '\x7f')
- self.assertEqualDecode(128, 2, '\x80\x01')
- self.assertEqualDecode(255, 2, '\xff\x01')
- self.assertEqualDecode(256, 2, '\x80\x02')
- self.assertEqualDecode(0xFFFFFFFF, 5, '\xff\xff\xff\xff\x0f')
-
- def test_decode_with_trailing_bytes(self):
- self.assertEqualDecode(1, 1, '\x01abcdef')
- self.assertEqualDecode(127, 1, '\x7f\x01')
- self.assertEqualDecode(128, 2, '\x80\x01abcdef')
- self.assertEqualDecode(255, 2, '\xff\x01\xff')
-
-
class TestGroupCompressBlock(tests.TestCase):
def make_block(self, key_to_text):
More information about the bazaar-commits
mailing list