Rev 3461: * Knit record serialisation is now stricter on what it will accept, to in http://people.ubuntu.com/~robertc/baz2.0/mpdiff

Fri May 30 03:22:02 BST 2008

At http://people.ubuntu.com/~robertc/baz2.0/mpdiff

------------------------------------------------------------
revno: 3461
revision-id: robertc at robertcollins.net-20080530022141-r8vfuzpdv1cyb5fp
parent: pqm at pqm.ubuntu.com-20080529210000-bycgfufmrqq63tki
committer: Robert Collins <robertc at robertcollins.net>
branch nick: mpdiff
timestamp: Fri 2008-05-30 12:21:41 +1000
message:
   * Knit record serialisation is now stricter on what it will accept, to
     guard against potential internal bugs, or broken input. (Robert Collins)
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
=== modified file 'NEWS'

--- a/NEWS	2008-05-29 21:00:00 +0000
+++ b/NEWS	2008-05-30 02:21:41 +0000
@@ -115,6 +115,9 @@
 
   INTERNALS:
 
+    * Knit record serialisation is now stricter on what it will accept, to
+      guard against potential internal bugs, or broken input. (Robert Collins)
+
   API CHANGES:
 
     * ``Branch.abspath`` is deprecated; use the Tree or Transport 

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2008-05-12 02:40:40 +0000
+++ b/bzrlib/knit.py	2008-05-30 02:21:41 +0000
@@ -1193,10 +1193,16 @@
                 for i, j, n in seq.get_matching_blocks():
                     if n == 0:
                         continue
-                    # this appears to copy (origin, text) pairs across to the
-                    # new content for any line that matches the last-checked
+                    # this copies (origin, text) pairs across to the new
+                    # content for any line that matches the last-checked
                     # parent.
                     content._lines[j:j+n] = merge_content._lines[i:i+n]
+            if content._lines and content._lines[-1][1][-1] != '\n':
+                # The copied annotation was from a line without a trailing EOL,
+                # reinstate one for the content object, to ensure correct
+                # serialization.
+                line = content._lines[-1][1] + '\n'
+                content._lines[-1] = (content._lines[-1][0], line)
         if delta:
             if delta_seq is None:
                 reference_content = self._get_content(parents[0], parent_texts)
@@ -1339,6 +1345,10 @@
             delta = self._check_should_delta(present_parents)
 
         content = self.factory.make(lines, version_id)
+        if 'no-eol' in options:
+            # Hint to the content object that its text() call should strip the
+            # EOL.
+            content._should_strip_eol = True
         if delta or (self.factory.annotated and len(present_parents) > 0):
             # Merge annotations from parent texts if needed.
             delta_hunks = self._merge_annotations(content, present_parents,
@@ -2697,6 +2707,8 @@
                                      digest)],
             dense_lines or lines,
             ["end %s\n" % version_id]))
+        if lines and lines[-1][-1] != '\n':
+            raise ValueError('corrupt lines value %r' % lines)
         compressed_bytes = bytes_to_gzip(bytes)
         return len(compressed_bytes), compressed_bytes
 

=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py	2008-05-12 02:40:40 +0000
+++ b/bzrlib/tests/test_versionedfile.py	2008-05-30 02:21:41 +0000
@@ -530,6 +530,27 @@
         self.assertRaises(errors.ReservedId, vf.get_lines, 'b:')
         self.assertRaises(errors.ReservedId, vf.get_text, 'b:')
 
+    def test_add_unchanged_last_line_noeol_snapshot(self):
+        """Add a text with an unchanged last line with no eol should work."""
+        # Test adding this in a number of chain lengths; because the interface
+        # for VersionedFile does not allow forcing a specific chain length, we
+        # just use a small base to get the first snapshot, then a much longer
+        # first line for the next add (which will make the third add snapshot)
+        # and so on. 20 has been chosen as an aribtrary figure - knits use 200
+        # as a capped delta length, but ideally we would have some way of
+        # tuning the test to the store (e.g. keep going until a snapshot
+        # happens).
+        for length in range(20):
+            vf = self.get_file('case-%d' % length)
+            prefix = 'step-%d'
+            parents = []
+            for step in range(length):
+                version = prefix % step
+                vf.add_lines(version, parents, (['prelude \n'] * step) + ['line'])
+                parents = [version]
+            vf.add_lines('no-eol', parents, ['line'])
+            self.assertEqualDiff('line', vf.get_text('no-eol'))
+
     def test_make_mpdiffs(self):
         from bzrlib import multiparent
         vf = self.get_file('foo')