Rev 3193: * ``Repository.get_data_stream`` is now deprecated in favour of in http://people.ubuntu.com/~robertc/baz2.0/search-results
Robert Collins
robertc at robertcollins.net
Thu Jan 17 05:31:24 GMT 2008
At http://people.ubuntu.com/~robertc/baz2.0/search-results
------------------------------------------------------------
revno: 3193
revision-id:robertc at robertcollins.net-20080117053053-2vx6ff0yr2bo9f2v
parent: robertc at robertcollins.net-20080117031420-zrmkezusngok2437
committer: Robert Collins <robertc at robertcollins.net>
branch nick: SearchResultInGetDataStream
timestamp: Thu 2008-01-17 16:30:53 +1100
message:
* ``Repository.get_data_stream`` is now deprecated in favour of
``Repository.get_data_stream_for_search`` which allows less network
traffic when requesting data streams over a smart server. (Robert Collins)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/fetch.py fetch.py-20050818234941-26fea6105696365d
bzrlib/remote.py remote.py-20060720103555-yeeg2x51vn0rbtdp-1
bzrlib/repofmt/knitrepo.py knitrepo.py-20070206081537-pyy4a00xdas0j4pf-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/smart/repository.py repository.py-20061128022038-vr5wy5bubyb8xttk-1
bzrlib/tests/repository_implementations/test_repository.py test_repository.py-20060131092128-ad07f494f5c9d26c
bzrlib/tests/test_remote.py test_remote.py-20060720103555-yeeg2x51vn0rbtdp-2
bzrlib/tests/test_repository.py test_repository.py-20060131075918-65c555b881612f4d
=== modified file 'NEWS'
--- a/NEWS 2008-01-17 03:14:20 +0000
+++ b/NEWS 2008-01-17 05:30:53 +0000
@@ -206,6 +206,10 @@
* Reduce selftest overhead to establish test names by memoization.
(Vincent Ladeuil)
+ * ``Repository.get_data_stream`` is now deprecated in favour of
+ ``Repository.get_data_stream_for_search`` which allows less network
+ traffic when requesting data streams over a smart server. (Robert Collins)
+
* Repository has a new method ``has_revisions`` which signals the presence
of many revisions by returning a set of the revisions listed which are
present. This can be done by index queries without reading data for parent
=== modified file 'bzrlib/fetch.py'
--- a/bzrlib/fetch.py 2008-01-17 03:14:20 +0000
+++ b/bzrlib/fetch.py 2008-01-17 05:30:53 +0000
@@ -34,7 +34,6 @@
import bzrlib
import bzrlib.errors as errors
from bzrlib.errors import InstallFailed
-from bzrlib.graph import SearchResult
from bzrlib.progress import ProgressPhase
from bzrlib.revision import is_null, NULL_REVISION
from bzrlib.symbol_versioning import (deprecated_function,
@@ -133,14 +132,18 @@
pp = ProgressPhase('Transferring', 4, self.pb)
try:
pp.next_phase()
- revs = self._revids_to_fetch().get_keys()
- if not revs:
+ search = self._revids_to_fetch()
+ if search is None:
return
- self._fetch_everything_for_revisions(revs, pp)
+ if getattr(self, '_fetch_everything_for_search', None) is not None:
+ self._fetch_everything_for_search(search, pp)
+ else:
+ # backward compatibility
+ self._fetch_everything_for_revisions(search.get_keys, pp)
finally:
self.pb.clear()
- def _fetch_everything_for_revisions(self, revs, pp):
+ def _fetch_everything_for_search(self, search, pp):
"""Fetch all data for the given set of revisions."""
# The first phase is "file". We pass the progress bar for it directly
# into item_keys_introduced_by, which has more information about how
@@ -153,6 +156,7 @@
phase = 'file'
pb = bzrlib.ui.ui_factory.nested_progress_bar()
try:
+ revs = search.get_keys()
data_to_fetch = self.from_repository.item_keys_introduced_by(revs, pb)
for knit_kind, file_id, revisions in data_to_fetch:
if knit_kind != phase:
@@ -194,10 +198,10 @@
mutter('fetch up to rev {%s}', self._last_revision)
if self._last_revision is NULL_REVISION:
# explicit limit of no revisions needed
- return SearchResult(set(), set(), 0, set())
+ return None
if (self._last_revision is not None and
self.to_repository.has_revision(self._last_revision)):
- return SearchResult(set(), set(), 0, set())
+ return None
try:
return self.to_repository.search_missing_revision_ids(
self.from_repository, self._last_revision,
@@ -409,8 +413,8 @@
class RemoteToOtherFetcher(GenericRepoFetcher):
- def _fetch_everything_for_revisions(self, revs, pp):
- data_stream = self.from_repository.get_data_stream(revs)
+ def _fetch_everything_for_search(self, search, pp):
+ data_stream = self.from_repository.get_data_stream_for_search(search)
self.to_repository.insert_data_stream(data_stream)
=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py 2008-01-14 22:48:07 +0000
+++ b/bzrlib/remote.py 2008-01-17 05:30:53 +0000
@@ -691,6 +691,30 @@
"""RemoteRepositories never create working trees by default."""
return False
+ def revision_ids_to_search_result(self, result_set):
+ """Convert a set of revision ids to a graph SearchResult."""
+ result_parents = set()
+ for parents in self.get_graph().get_parent_map(
+ result_set).itervalues():
+ result_parents.update(parents)
+ included_keys = result_set.intersection(result_parents)
+ start_keys = result_set.difference(included_keys)
+ exclude_keys = result_parents.difference(result_set)
+ result = graph.SearchResult(start_keys, exclude_keys,
+ len(result_set), result_set)
+ return result
+
+ @needs_read_lock
+ def search_missing_revision_ids(self, other, revision_id=None, find_ghosts=True):
+ """Return the revision ids that other has that this does not.
+
+ These are returned in topological order.
+
+ revision_id: only return revision ids included by revision_id.
+ """
+ return repository.InterRepository.get(
+ other, self).search_missing_revision_ids(revision_id, find_ghosts)
+
def fetch(self, source, revision_id=None, pb=None):
if self.has_same_location(source):
# check that last_revision is in 'from' and then return a
@@ -941,7 +965,8 @@
self._ensure_real()
return self._real_repository.has_signature_for_revision_id(revision_id)
- def get_data_stream(self, revision_ids):
+ def get_data_stream_for_search(self, search):
+ revision_ids = search.get_keys()
REQUEST_NAME = 'Repository.stream_revisions_chunked'
path = self.bzrdir._path_for_remote_call(self._client)
response, protocol = self._client.call_expecting_body(
@@ -955,7 +980,7 @@
"bad request u'%s'" % REQUEST_NAME)):
protocol.cancel_read_body()
self._ensure_real()
- return self._real_repository.get_data_stream(revision_ids)
+ return self._real_repository.get_data_stream_for_search(search)
else:
raise errors.UnexpectedSmartServerResponse(response)
=== modified file 'bzrlib/repofmt/knitrepo.py'
--- a/bzrlib/repofmt/knitrepo.py 2008-01-11 05:28:46 +0000
+++ b/bzrlib/repofmt/knitrepo.py 2008-01-17 05:30:53 +0000
@@ -157,10 +157,20 @@
except errors.RevisionNotPresent:
raise errors.NoSuchRevision(self, revision_id)
+ @symbol_versioning.deprecated_method(symbol_versioning.one_two)
@needs_read_lock
def get_data_stream(self, revision_ids):
- """See Repository.get_data_stream."""
- item_keys = self.item_keys_introduced_by(revision_ids)
+ """See Repository.get_data_stream.
+
+ Deprecated in 1.2 for get_data_stream_for_search.
+ """
+ search_result = self.revision_ids_to_search_result(set(revision_ids))
+ return self.get_data_stream_for_search(search_result)
+
+ @needs_read_lock
+ def get_data_stream_for_search(self, search):
+ """See Repository.get_data_stream_for_search."""
+ item_keys = self.item_keys_introduced_by(search.get_keys())
for knit_kind, file_id, versions in item_keys:
name = (knit_kind,)
if knit_kind == 'file':
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2008-01-17 03:14:20 +0000
+++ b/bzrlib/repository.py 2008-01-17 05:30:53 +0000
@@ -813,6 +813,16 @@
def get_data_stream(self, revision_ids):
raise NotImplementedError(self.get_data_stream)
+ def get_data_stream_for_search(self, search_result):
+ """Get a data stream that can be inserted to a repository.
+
+ :param search_result: A bzrlib.graph.SearchResult selecting the
+ revisions to get.
+ :return: A data stream that can be inserted into a repository using
+ insert_data_stream.
+ """
+ raise NotImplementedError(self.get_data_stream_for_search)
+
def insert_data_stream(self, stream):
"""XXX What does this really do?
@@ -1780,6 +1790,19 @@
"""Return an object suitable for checking versioned files."""
return _VersionedFileChecker(self)
+ def revision_ids_to_search_result(self, result_set):
+ """Convert a set of revision ids to a graph SearchResult."""
+ result_parents = set()
+ for parents in self.get_graph().get_parent_map(
+ result_set).itervalues():
+ result_parents.update(parents)
+ included_keys = result_set.intersection(result_parents)
+ start_keys = result_set.difference(included_keys)
+ exclude_keys = result_parents.difference(result_set)
+ result = graph.SearchResult(start_keys, exclude_keys,
+ len(result_set), result_set)
+ return result
+
@needs_write_lock
def set_make_working_trees(self, new_value):
"""Set the policy flag for making working trees when creating branches.
@@ -1866,7 +1889,8 @@
depend on the revision index being consistent.
"""
raise NotImplementedError(self.revision_graph_can_have_wrong_parents)
-
+
+
# remove these delegates a while after bzr 0.15
def __make_delegated(name, from_module):
def _deprecated_repository_forwarder():
@@ -2398,20 +2422,7 @@
else:
source_ids = self.source.all_revision_ids()
result_set = set(source_ids).difference(target_ids)
- return self._set_to_search_result(result_set, self.source)
-
- def _set_to_search_result(self, result_set, repository):
- """Convert a set of revision ids to a graph SearchResult."""
- result_parents = set()
- for parents in repository.get_graph().get_parent_map(
- result_set).itervalues():
- result_parents.update(parents)
- included_keys = result_set.intersection(result_parents)
- start_keys = result_set.difference(included_keys)
- exclude_keys = result_parents.difference(result_set)
- result = graph.SearchResult(start_keys, exclude_keys,
- len(result_set), result_set)
- return result
+ return self.source.revision_ids_to_search_result(result_set)
@staticmethod
def _same_model(source, target):
@@ -2598,7 +2609,7 @@
# that against the revision records.
result_set = set(
self.source._eliminate_revisions_not_present(required_revisions))
- return self._set_to_search_result(result_set, self.source)
+ return self.source.revision_ids_to_search_result(result_set)
class InterKnitRepo(InterSameDataRepository):
@@ -2667,7 +2678,7 @@
# that against the revision records.
result_set = set(
self.source._eliminate_revisions_not_present(required_revisions))
- return self._set_to_search_result(result_set, self.source)
+ return self.source.revision_ids_to_search_result(result_set)
class InterPackRepo(InterSameDataRepository):
@@ -2758,7 +2769,7 @@
# we do not have a revision as that would be pointless.
target_ids = set(self.target.all_revision_ids())
result_set = set(source_ids).difference(target_ids)
- return self._set_to_search_result(result_set, self.source)
+ return self.source.revision_ids_to_search_result(result_set)
class InterModel1and2(InterRepository):
@@ -2863,10 +2874,10 @@
@needs_write_lock
def fetch(self, revision_id=None, pb=None, find_ghosts=False):
"""See InterRepository.fetch()."""
- revision_ids = self.target.missing_revision_ids(self.source,
+ revision_ids = self.target.search_missing_revision_ids(self.source,
revision_id, find_ghosts=find_ghosts).get_keys()
revision_ids = tsort.topo_sort(
- self.get_graph().get_parent_map(revision_ids))
+ self.source.get_graph().get_parent_map(revision_ids))
def revisions_iterator():
for current_revision_id in revision_ids:
revision = self.source.get_revision(current_revision_id)
=== modified file 'bzrlib/smart/repository.py'
--- a/bzrlib/smart/repository.py 2008-01-14 22:48:07 +0000
+++ b/bzrlib/smart/repository.py 2008-01-17 05:30:53 +0000
@@ -316,7 +316,8 @@
repository.unlock()
def _do_repository_request(self, repository, revision_ids):
- stream = repository.get_data_stream(revision_ids)
+ stream = repository.get_data_stream_for_search(
+ repository.revision_ids_to_search_result(set(revision_ids)))
buffer = StringIO()
pack = ContainerSerialiser()
buffer.write(pack.begin())
@@ -334,7 +335,8 @@
def do_repository_request(self, repository, *revision_ids):
repository.lock_read()
try:
- stream = repository.get_data_stream(revision_ids)
+ stream = repository.get_data_stream_for_search(
+ repository.revision_ids_to_search_result(set(revision_ids)))
except Exception:
repository.unlock()
raise
=== modified file 'bzrlib/tests/repository_implementations/test_repository.py'
--- a/bzrlib/tests/repository_implementations/test_repository.py 2008-01-11 05:08:20 +0000
+++ b/bzrlib/tests/repository_implementations/test_repository.py 2008-01-17 05:30:53 +0000
@@ -23,6 +23,7 @@
from bzrlib import (
bzrdir,
errors,
+ graph,
remote,
repository,
)
@@ -35,6 +36,7 @@
)
from bzrlib.revision import NULL_REVISION, Revision
from bzrlib.smart import server
+from bzrlib.symbol_versioning import one_two
from bzrlib.tests import (
KnownFailure,
TestCaseWithTransport,
@@ -389,7 +391,23 @@
repo._format.rich_root_data
repo._format.supports_tree_reference
- def test_get_data_stream(self):
+ def test_get_data_stream_deprecated(self):
+ # If get_data_stream is present it must be deprecated
+ tree = self.make_branch_and_tree('t')
+ self.build_tree(['t/foo'])
+ tree.add('foo', 'file1')
+ tree.commit('message', rev_id='rev_id')
+ repo = tree.branch.repository
+ try:
+ stream = self.applyDeprecated(one_two, repo.get_data_stream,
+ ['rev_id'])
+ except NotImplementedError:
+ raise TestNotApplicable("%s doesn't support get_data_stream"
+ % repo._format)
+ except AttributeError:
+ pass
+
+ def test_get_data_stream_for_search(self):
# Make a repo with a revision
tree = self.make_branch_and_tree('t')
self.build_tree(['t/foo'])
@@ -398,8 +416,10 @@
repo = tree.branch.repository
# Get a data stream (a file-like object) for that revision
+ search = graph.SearchResult(set(['rev_id']), set([NULL_REVISION]), 1,
+ set(['rev_id']))
try:
- stream = repo.get_data_stream(['rev_id'])
+ stream = repo.get_data_stream_for_search(search)
except NotImplementedError:
raise TestNotApplicable("%s doesn't support get_data_stream"
% repo._format)
@@ -441,10 +461,12 @@
source_repo = tree.branch.repository
dest_repo = self.make_repository('dest')
try:
- stream = source_repo.get_data_stream(['rev_id'])
+ stream = source_repo.get_data_stream_for_search(
+ dest_repo.search_missing_revision_ids(source_repo,
+ revision_id='rev_id'))
except NotImplementedError, e:
# Not all repositories support streaming.
- self.assertContainsRe(str(e), 'get_data_stream')
+ self.assertContainsRe(str(e), 'get_data_stream_for_search')
raise TestSkipped('This format does not support streaming.')
dest_repo.lock_write()
=== modified file 'bzrlib/tests/test_remote.py'
--- a/bzrlib/tests/test_remote.py 2008-01-15 12:45:43 +0000
+++ b/bzrlib/tests/test_remote.py 2008-01-17 05:30:53 +0000
@@ -28,6 +28,7 @@
from bzrlib import (
bzrdir,
errors,
+ graph,
pack,
remote,
repository,
@@ -867,7 +868,8 @@
transport_path = 'quack'
repo, client = self.setup_fake_client_and_repository(
responses, transport_path)
- stream = repo.get_data_stream(['revid'])
+ search = graph.SearchResult(set(['revid']), set(), 1, set(['revid']))
+ stream = repo.get_data_stream_for_search(search)
self.assertRaises(errors.SmartProtocolError, list, stream)
def test_backwards_compatibility(self):
@@ -881,7 +883,8 @@
responses, 'path')
self.mock_called = False
repo._real_repository = MockRealRepository(self)
- repo.get_data_stream(['revid'])
+ search = graph.SearchResult(set(['revid']), set(), 1, set(['revid']))
+ repo.get_data_stream_for_search(search)
self.assertTrue(self.mock_called)
self.failIf(client.expecting_body,
"The protocol has been left in an unclean state that will cause "
@@ -894,8 +897,8 @@
def __init__(self, test):
self.test = test
- def get_data_stream(self, revision_ids):
- self.test.assertEqual(['revid'], revision_ids)
+ def get_data_stream_for_search(self, search):
+ self.test.assertEqual(set(['revid']), search.get_keys())
self.test.mock_called = True
=== modified file 'bzrlib/tests/test_repository.py'
--- a/bzrlib/tests/test_repository.py 2008-01-11 05:08:20 +0000
+++ b/bzrlib/tests/test_repository.py 2008-01-17 05:30:53 +0000
@@ -32,6 +32,7 @@
UnknownFormatError,
UnsupportedFormatError,
)
+from bzrlib import graph
from bzrlib.index import GraphIndex, InMemoryGraphIndex
from bzrlib.repository import RepositoryFormat
from bzrlib.smart import server
@@ -757,7 +758,9 @@
"""
broken_repo = self.make_broken_repository()
empty_repo = self.make_repository('empty-repo')
- stream = broken_repo.get_data_stream(['rev1a', 'rev2', 'rev3'])
+ search = graph.SearchResult(set(['rev1a', 'rev2', 'rev3']),
+ set(), 3, ['rev1a', 'rev2', 'rev3'])
+ stream = broken_repo.get_data_stream_for_search(search)
empty_repo.lock_write()
self.addCleanup(empty_repo.unlock)
empty_repo.start_write_group()
More information about the bazaar-commits
mailing list