Rev 4204: (jam) Change Repository.iter_files_bytes() to return chunked strings. in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Wed Mar 25 19:44:28 GMT 2009
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 4204
revision-id: pqm at pqm.ubuntu.com-20090325192730-yt4por4rlu0x7gvi
parent: pqm at pqm.ubuntu.com-20090325184331-1up3t0gh14ttr5zm
parent: john at arbash-meinel.com-20090325162734-ugfiu6gxisdc2m4p
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Wed 2009-03-25 19:27:30 +0000
message:
(jam) Change Repository.iter_files_bytes() to return chunked strings.
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/revisiontree.py revisiontree.py-20060724012533-bg8xyryhxd0o0i0h-1
bzrlib/tests/test_bundle.py test.py-20050630184834-092aa401ab9f039c
bzrlib/workingtree_4.py workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
------------------------------------------------------------
revno: 4202.1.1
revision-id: john at arbash-meinel.com-20090325162734-ugfiu6gxisdc2m4p
parent: pqm at pqm.ubuntu.com-20090325042012-23a6pm0mraw7g2kg
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: iter_files_bytes_chunked
timestamp: Wed 2009-03-25 11:27:34 -0500
message:
Update Repository.iter_files_bytes() to return an iterable of bytestrings.
Several places in the code expected it to return exactly a string, so these have
been updated as well.
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/revisiontree.py revisiontree.py-20060724012533-bg8xyryhxd0o0i0h-1
bzrlib/tests/test_bundle.py test.py-20050630184834-092aa401ab9f039c
bzrlib/workingtree_4.py workingtree_4.py-20070208044105-5fgpc5j3ljlh5q6c-1
=== modified file 'NEWS'
--- a/NEWS 2009-03-25 04:20:12 +0000
+++ b/NEWS 2009-03-25 16:27:34 +0000
@@ -227,6 +227,12 @@
whether the repository can efficiently generate deltas between trees
regardless of tree size. (Robert Collins)
+* ``Repository.iter_files_bytes()`` now properly returns an "iterable of
+ byte strings" (aka 'chunked') for the content. It previously was
+ returning a plain string, which worked, but performed very poorly when
+ building a working tree (file.writelines(str) is very inefficient). This
+ can have a large effect on ``bzr checkout`` times. (John Arbash Meinel)
+
* The smart server verb ``Repository.get_parent_map`` can now include
information about ghosts when the special revision ``include-missing:``
is in the requested parents map list. With this flag, ghosts are
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2009-03-25 04:20:12 +0000
+++ b/bzrlib/repository.py 2009-03-25 16:27:34 +0000
@@ -1854,7 +1854,7 @@
for record in self.texts.get_record_stream(text_keys, 'unordered', True):
if record.storage_kind == 'absent':
raise errors.RevisionNotPresent(record.key, self)
- yield text_keys[record.key], record.get_bytes_as('fulltext')
+ yield text_keys[record.key], record.get_bytes_as('chunked')
def _generate_text_key_index(self, text_key_references=None,
ancestors=None):
=== modified file 'bzrlib/revisiontree.py'
--- a/bzrlib/revisiontree.py 2009-03-23 14:59:43 +0000
+++ b/bzrlib/revisiontree.py 2009-03-25 16:27:34 +0000
@@ -64,7 +64,8 @@
return self._revision_id
def get_file_text(self, file_id, path=None):
- return list(self.iter_files_bytes([(file_id, None)]))[0][1]
+ _, content = list(self.iter_files_bytes([(file_id, None)]))[0]
+ return ''.join(content)
def get_file(self, file_id, path=None):
return StringIO(self.get_file_text(file_id))
=== modified file 'bzrlib/tests/test_bundle.py'
--- a/bzrlib/tests/test_bundle.py 2009-03-23 14:59:43 +0000
+++ b/bzrlib/tests/test_bundle.py 2009-03-25 16:27:34 +0000
@@ -1342,10 +1342,14 @@
install_bundle(target_repo, serializer.read(s))
target_repo.lock_read()
self.addCleanup(target_repo.unlock)
+ # Turn the 'iterators_of_bytes' back into simple strings for comparison
+ repo_texts = dict((i, ''.join(content)) for i, content
+ in target_repo.iter_files_bytes(
+ [('fileid-2', 'rev1', '1'),
+ ('fileid-2', 'rev2', '2')]))
self.assertEqual({'1':'contents1\nstatic\n',
- '2':'contents2\nstatic\n'},
- dict(target_repo.iter_files_bytes(
- [('fileid-2', 'rev1', '1'), ('fileid-2', 'rev2', '2')])))
+ '2':'contents2\nstatic\n'},
+ repo_texts)
rtree = target_repo.revision_tree('rev2')
inventory_vf = target_repo.inventories
# If the inventory store has a graph, it must match the revision graph.
=== modified file 'bzrlib/workingtree_4.py'
--- a/bzrlib/workingtree_4.py 2009-03-25 04:20:12 +0000
+++ b/bzrlib/workingtree_4.py 2009-03-25 16:27:34 +0000
@@ -1715,7 +1715,8 @@
return self.inventory[file_id].text_size
def get_file_text(self, file_id, path=None):
- return list(self.iter_files_bytes([(file_id, None)]))[0][1]
+ _, content = list(self.iter_files_bytes([(file_id, None)]))[0]
+ return ''.join(content)
def get_reference_revision(self, file_id, path=None):
return self.inventory[file_id].reference_revision
More information about the bazaar-commits
mailing list