Rev 3034: Don't include the pack container length in the lengths given by get_data_stream. in http://people.ubuntu.com/~robertc/baz2.0/pack.read-locks

Mon Nov 26 22:33:39 GMT 2007

At http://people.ubuntu.com/~robertc/baz2.0/pack.read-locks

------------------------------------------------------------
revno: 3034
revision-id:robertc at robertcollins.net-20071126223325-wua9x81prpxueom3
parent: robertc at robertcollins.net-20071126211526-zp59n29oqm2m8s99
committer: Robert Collins <robertc at robertcollins.net>
branch nick: pack.read-locks
timestamp: Tue 2007-11-27 09:33:25 +1100
message:
  Don't include the pack container length in the lengths given by get_data_stream.
modified:
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/tests/test_knit.py      test_knit.py-20051212171302-95d4c00dd5f11f2b
=== modified file 'bzrlib/knit.py'

--- a/bzrlib/knit.py	2007-11-26 21:01:29 +0000
+++ b/bzrlib/knit.py	2007-11-26 22:33:25 +0000
@@ -616,14 +616,14 @@
         # build a list of results to return, plus instructions for data to
         # read from the file
         copy_queue_records = []
-        result_version_list = []
+        temp_version_list = []
         while ready_to_send:
             # XXX: pushing and popping lists may be a bit inefficient
             version_id = ready_to_send.pop(0)
             (index_memo, options, parents) = version_index[version_id]
             copy_queue_records.append((version_id, index_memo))
             none, data_pos, data_size = index_memo
-            result_version_list.append((version_id, options, data_size,
+            temp_version_list.append((version_id, options, data_size,
                 parents))
             if version_id in deferred:
                 # now we can send all the children of this revision - we could
@@ -632,18 +632,20 @@
                 ready_to_send[:0] = deferred.pop(version_id)
         assert len(deferred) == 0, \
             "Still have compressed child versions waiting to be sent"
-        # XXX:
-        # From here down to the return should really be logic in the returned
-        # callable -- in a class that adapts read_records_iter_raw to read
-        # requests.
+        # XXX: The stream format is such that we cannot stream it - we have to
+        # know the length of all the data a-priori.
         raw_datum = []
+        result_version_list = []
         for (version_id, raw_data), \
             (version_id2, options, _, parents) in \
             izip(self._data.read_records_iter_raw(copy_queue_records),
-                 result_version_list):
+                 temp_version_list):
             assert version_id == version_id2, \
                 'logic error, inconsistent results'
             raw_datum.append(raw_data)
+            result_version_list.append(
+                (version_id, options, len(raw_data), parents))
+        # provide a callback to get data incrementally.
         pseudo_file = StringIO(''.join(raw_datum))
         def read(length):
             if length is None:

=== modified file 'bzrlib/tests/test_knit.py'
--- a/bzrlib/tests/test_knit.py	2007-11-26 07:20:17 +0000
+++ b/bzrlib/tests/test_knit.py	2007-11-26 22:33:25 +0000
@@ -2685,3 +2685,37 @@
             set(index.iter_parents(['tip'])))
         self.assertEqual(set(),
             set(index.iter_parents([])))
+
+
+class TestPackKnits(KnitTests):
+    """Tests that use a _PackAccess and KnitGraphIndex."""
+
+    def test_get_data_stream_packs_ignores_pack_overhead(self):
+        # Packs have an encoding overhead that should not be included in the
+        # 'size' field of a data stream, because it is not returned by the
+        # raw_reading functions - it is why index_memo's are opaque, and
+        # get_data_stream was abusing this.
+        packname = 'test.pack'
+        transport = self.get_transport()
+        def write_data(bytes):
+            transport.append_bytes(packname, bytes)
+        writer = pack.ContainerWriter(write_data)
+        writer.begin()
+        index = InMemoryGraphIndex(2)
+        knit_index = KnitGraphIndex(index, add_callback=index.add_nodes,
+            deltas=True)
+        indices = {index:(transport, packname)}
+        access = _PackAccess(indices, writer=(writer, index))
+        k = KnitVersionedFile('test', get_transport('.'),
+            delta=True, create=True, index=knit_index, access_method=access)
+        # insert something into the knit
+        k.add_lines('text-1', [], ["foo\n"])
+        # get a data stream for it
+        stream = k.get_data_stream(['text-1'])
+        # if the stream has been incorrectly assembled, we will get a short read
+        # reading from the stream (as streams have no trailer)
+        expected_length = stream[1][0][2]
+        # we use -1 to do the read, so that if a trailer is added this test
+        # will fail and we'll adjust it to handle that case correctly, rather
+        # than allowing an over-read that is bogus.
+        self.assertEqual(expected_length, len(stream[2](-1)))