Rev 3568: Hack together a bunch of changes to allow inserting texts in reverse topological order during the generic fetch. in http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/reordered
John Arbash Meinel
john at arbash-meinel.com
Fri Jul 25 20:42:51 BST 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.7-dev/reordered
------------------------------------------------------------
revno: 3568
revision-id: john at arbash-meinel.com-20080725194239-7phiux160is29uth
parent: pqm at pqm.ubuntu.com-20080721151553-11iasd1407hkznk1
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: reordered
timestamp: Fri 2008-07-25 14:42:39 -0500
message:
Hack together a bunch of changes to allow inserting texts in reverse topological order during the generic fetch.
The biggest thing is that we can't compact the lines into byte-strings as it
grossly bloats memory. (_get_content_maps() shares strings between revisions
of the same text. ''.join() allocates new strings for everything, and doesn't
free the other memory.
Also, GenericRepoFetcher should request fulltexts, rather than requesting the deltas.
We have to unpack the deltas on the other side, and that requires building up from
the delta chain, etc. Which is very inefficient.
The one thing we *could* do is keep a text cache.
But again, going back to a single String bloats memory versus, say, lines or even chunks.
-------------- next part --------------
=== modified file 'bzrlib/fetch.py'
--- a/bzrlib/fetch.py 2008-06-25 10:06:48 +0000
+++ b/bzrlib/fetch.py 2008-07-25 19:42:39 +0000
@@ -179,7 +179,7 @@
to_texts = self.to_repository.texts
from_texts = self.from_repository.texts
to_texts.insert_record_stream(from_texts.get_record_stream(
- text_keys, 'topological', False))
+ text_keys, 'topological', True))
# Cause an error if a text occurs after we have done the
# copy.
text_keys = None
@@ -239,7 +239,7 @@
# corrupt.
to_weave.insert_record_stream(from_weave.get_record_stream(
[(rev_id,) for rev_id in revs],
- 'topological', False))
+ 'topological', True))
finally:
child_pb.finished()
@@ -261,29 +261,22 @@
"""
def _fetch_revision_texts(self, revs, pb):
- """Fetch revision object texts"""
- count = 0
- total = len(revs)
- for rev in revs:
- pb.update('copying revisions', count, total)
- try:
- sig_text = self.from_repository.get_signature_text(rev)
- self.to_repository.add_signature_text(rev, sig_text)
- except errors.NoSuchRevision:
- # not signed.
- pass
- self._copy_revision(rev)
- count += 1
- # fixup inventory if needed:
- # this is expensive because we have no inverse index to current ghosts.
- # but on local disk its a few seconds and sftp push is already insane.
- # so we just-do-it.
- # FIXME: repository should inform if this is needed.
+ # may need to be a InterRevisionStore call here.
+ to_sf = self.to_repository.signatures
+ from_sf = self.from_repository.signatures
+ # A missing signature is just skipped.
+ to_sf.insert_record_stream(filter_absent(from_sf.get_record_stream(
+ [(rev_id,) for rev_id in revs],
+ 'unordered', False)))
+ self._fetch_just_revision_texts(revs)
self.to_repository.reconcile()
- def _copy_revision(self, rev_id):
- rev = self.from_repository.get_revision(rev_id)
- self.to_repository.add_revision(rev_id, rev)
+ def _fetch_just_revision_texts(self, version_ids):
+ to_rf = self.to_repository.revisions
+ from_rf = self.from_repository.revisions
+ to_rf.insert_record_stream(from_rf.get_record_stream(
+ [(rev_id,) for rev_id in version_ids],
+ 'topological', False))
class KnitRepoFetcher(RepoFetcher):
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2008-07-18 03:07:07 +0000
+++ b/bzrlib/knit.py 2008-07-25 19:42:39 +0000
@@ -1208,7 +1208,7 @@
needed_from_fallback - absent_keys)
for key in present_keys:
yield FulltextContentFactory(key, global_map[key], None,
- ''.join(text_map[key]))
+ text_map.pop(key))
else:
for source, keys in source_keys:
if source is parent_maps[0]:
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2008-07-16 14:23:05 +0000
+++ b/bzrlib/repository.py 2008-07-25 19:42:39 +0000
@@ -1532,7 +1532,11 @@
texts = {}
for record in stream:
if record.storage_kind != 'absent':
- texts[record.key] = record.get_bytes_as('fulltext')
+ bytes = record.get_bytes_as('fulltext')
+ if isinstance(bytes, list):
+ texts[record.key] = ''.join(bytes)
+ else:
+ texts[record.key] = bytes
else:
raise errors.NoSuchRevision(self, record.key)
for key in keys:
More information about the bazaar-commits
mailing list