Rev 4204: (jam) Change Repository.iter_files_bytes() to return chunked strings. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Wed Mar 25 19:44:28 GMT 2009


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 4204
revision-id: pqm at pqm.ubuntu.com-20090325192730-yt4por4rlu0x7gvi
parent: pqm at pqm.ubuntu.com-20090325184331-1up3t0gh14ttr5zm
parent: john at arbash-meinel.com-20090325162734-ugfiu6gxisdc2m4p
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Wed 2009-03-25 19:27:30 +0000
message:
  (jam) Change Repository.iter_files_bytes() to return chunked strings.
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/revisiontree.py         revisiontree.py-20060724012533-bg8xyryhxd0o0i0h-1
  bzrlib/tests/test_bundle.py    test.py-20050630184834-092aa401ab9f039c
  bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
    ------------------------------------------------------------
    revno: 4202.1.1
    revision-id: john at arbash-meinel.com-20090325162734-ugfiu6gxisdc2m4p
    parent: pqm at pqm.ubuntu.com-20090325042012-23a6pm0mraw7g2kg
    committer: John Arbash Meinel <john at arbash-meinel.com>
    branch nick: iter_files_bytes_chunked
    timestamp: Wed 2009-03-25 11:27:34 -0500
    message:
      Update Repository.iter_files_bytes() to return an iterable of bytestrings.
      Several places in the code expected it to return exactly a string, so these have
      been updated as well.
    modified:
      NEWS                           NEWS-20050323055033-4e00b5db738777ff
      bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
      bzrlib/revisiontree.py         revisiontree.py-20060724012533-bg8xyryhxd0o0i0h-1
      bzrlib/tests/test_bundle.py    test.py-20050630184834-092aa401ab9f039c
      bzrlib/workingtree_4.py        workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
=== modified file 'NEWS'
--- a/NEWS	2009-03-25 04:20:12 +0000
+++ b/NEWS	2009-03-25 16:27:34 +0000
@@ -227,6 +227,12 @@
   whether the repository can efficiently generate deltas between trees
   regardless of tree size. (Robert Collins)
 
+* ``Repository.iter_files_bytes()`` now properly returns an "iterable of
+  byte strings" (aka 'chunked') for the content. It previously was
+  returning a plain string, which worked, but performed very poorly when
+  building a working tree (file.writelines(str) is very inefficient). This
+  can have a large effect on ``bzr checkout`` times. (John Arbash Meinel)
+
 * The smart server verb ``Repository.get_parent_map`` can now include
   information about ghosts when the special revision ``include-missing:``
   is in the requested parents map list. With this flag, ghosts are

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2009-03-25 04:20:12 +0000
+++ b/bzrlib/repository.py	2009-03-25 16:27:34 +0000
@@ -1854,7 +1854,7 @@
         for record in self.texts.get_record_stream(text_keys, 'unordered', True):
             if record.storage_kind == 'absent':
                 raise errors.RevisionNotPresent(record.key, self)
-            yield text_keys[record.key], record.get_bytes_as('fulltext')
+            yield text_keys[record.key], record.get_bytes_as('chunked')
 
     def _generate_text_key_index(self, text_key_references=None,
         ancestors=None):

=== modified file 'bzrlib/revisiontree.py'
--- a/bzrlib/revisiontree.py	2009-03-23 14:59:43 +0000
+++ b/bzrlib/revisiontree.py	2009-03-25 16:27:34 +0000
@@ -64,7 +64,8 @@
         return self._revision_id
 
     def get_file_text(self, file_id, path=None):
-        return list(self.iter_files_bytes([(file_id, None)]))[0][1]
+        _, content = list(self.iter_files_bytes([(file_id, None)]))[0]
+        return ''.join(content)
 
     def get_file(self, file_id, path=None):
         return StringIO(self.get_file_text(file_id))

=== modified file 'bzrlib/tests/test_bundle.py'
--- a/bzrlib/tests/test_bundle.py	2009-03-23 14:59:43 +0000
+++ b/bzrlib/tests/test_bundle.py	2009-03-25 16:27:34 +0000
@@ -1342,10 +1342,14 @@
         install_bundle(target_repo, serializer.read(s))
         target_repo.lock_read()
         self.addCleanup(target_repo.unlock)
+        # Turn the 'iterators_of_bytes' back into simple strings for comparison
+        repo_texts = dict((i, ''.join(content)) for i, content
+                          in target_repo.iter_files_bytes(
+                                [('fileid-2', 'rev1', '1'),
+                                 ('fileid-2', 'rev2', '2')]))
         self.assertEqual({'1':'contents1\nstatic\n',
-            '2':'contents2\nstatic\n'},
-            dict(target_repo.iter_files_bytes(
-                [('fileid-2', 'rev1', '1'), ('fileid-2', 'rev2', '2')])))
+                          '2':'contents2\nstatic\n'},
+                         repo_texts)
         rtree = target_repo.revision_tree('rev2')
         inventory_vf = target_repo.inventories
         # If the inventory store has a graph, it must match the revision graph.

=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py	2009-03-25 04:20:12 +0000
+++ b/bzrlib/workingtree_4.py	2009-03-25 16:27:34 +0000
@@ -1715,7 +1715,8 @@
         return self.inventory[file_id].text_size
 
     def get_file_text(self, file_id, path=None):
-        return list(self.iter_files_bytes([(file_id, None)]))[0][1]
+        _, content = list(self.iter_files_bytes([(file_id, None)]))[0]
+        return ''.join(content)
 
     def get_reference_revision(self, file_id, path=None):
         return self.inventory[file_id].reference_revision




More information about the bazaar-commits mailing list