Rev 3195: * New smart method ``Repository.get_parent_map`` for getting revision in http://people.ubuntu.com/~robertc/baz2.0/search-results
Robert Collins
robertc at robertcollins.net
Fri Jan 18 03:43:35 GMT 2008
At http://people.ubuntu.com/~robertc/baz2.0/search-results
------------------------------------------------------------
revno: 3195
revision-id:robertc at robertcollins.net-20080118034319-tp2b9j8u4h1uxspx
parent: robertc at robertcollins.net-20080118034035-ay3xbm04pypsago2
committer: Robert Collins <robertc at robertcollins.net>
branch nick: Repository.get_parent_map
timestamp: Fri 2008-01-18 14:43:19 +1100
message:
* New smart method ``Repository.get_parent_map`` for getting revision
parent data. This returns additional parent information topologically
adjacent to the requested data to reduce round trip latency impacts.
(Robert Collins)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/graph.py graph_walker.py-20070525030359-y852guab65d4wtn0-1
bzrlib/remote.py remote.py-20060720103555-yeeg2x51vn0rbtdp-1
bzrlib/smart/repository.py repository.py-20061128022038-vr5wy5bubyb8xttk-1
bzrlib/tests/test_remote.py test_remote.py-20060720103555-yeeg2x51vn0rbtdp-2
=== modified file 'NEWS'
--- a/NEWS 2008-01-18 02:36:57 +0000
+++ b/NEWS 2008-01-18 03:43:19 +0000
@@ -25,6 +25,11 @@
* ``merge --preview`` produces a diff of the changes merge would make,
but does not actually perform the merge. (Aaron Bentley)
+ * New smart method ``Repository.get_parent_map`` for getting revision
+ parent data. This returns additional parent information topologically
+ adjacent to the requested data to reduce round trip latency impacts.
+ (Robert Collins)
+
* New smart method, ``Repository.stream_revisions_chunked``, for fetching
revision data that streams revision data via a chunked encoding. This
avoids buffering large amounts of revision data on the server and on the
=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py 2008-01-18 02:36:57 +0000
+++ b/bzrlib/graph.py 2008-01-18 03:43:19 +0000
@@ -544,8 +544,16 @@
# exclude keys for them. However, while we could have a second
# look-ahead result buffer and shuffle things around, this method
# is typically only called once per search - when memoising the
- # results of the search.
- found, ghosts, next, parents = self._do_query(self._next_query)
+ # results of the search.
+ # XXX: TODO: NOTE: ote that we cannot supply the current search to
+ # the call to get_parent_map in this case because it leads to
+ # infinite recursion. (remote.g_p_m -> get_result ->
+ # remote.get_parent_map) - the revisions needed to determine what
+ # keys to exclude to recreate current state are precisely those
+ # being queried. This is likely a problem that will need to be
+ # addressed (perhaps by extending the search recipe to have two
+ # exclude lists - exclude_from and exclude_at). - RBC 20080118
+ found, ghosts, next, parents = self._do_query(self._next_query, False)
# pretend we didn't query: perhaps we should tweak _do_query to be
# entirely stateless?
self.seen.difference_update(next)
@@ -618,7 +626,7 @@
# repeated when ghosts are filled.
self._stopped_keys.update(ghosts)
- def _do_query(self, revisions):
+ def _do_query(self, revisions, provide_self=True):
"""Query for revisions.
Adds revisions to the seen set.
@@ -632,7 +640,12 @@
# revisions may contain nodes that point to other nodes in revisions:
# we want to filter them out.
self.seen.update(revisions)
- parent_map = self._parents_provider.get_parent_map(revisions, self)
+ if provide_self:
+ current_search = self
+ else:
+ current_search = None
+ parent_map = self._parents_provider.get_parent_map(revisions,
+ current_search)
for rev_id, parents in parent_map.iteritems():
found_parents.add(rev_id)
parents_of_found.update(p for p in parents if p not in self.seen)
=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py 2008-01-18 02:36:57 +0000
+++ b/bzrlib/remote.py 2008-01-18 03:43:19 +0000
@@ -767,7 +767,8 @@
ancestry = self._parents_map
missing_revisions = set(key for key in keys if key not in ancestry)
if missing_revisions:
- self._parents_map.update(self._get_parent_map(missing_revisions))
+ self._parents_map.update(
+ self._get_parent_map(missing_revisions, current_search))
return dict((k, ancestry[k]) for k in keys if k in ancestry)
def _response_is_unknown_method(self, response, verb):
@@ -788,7 +789,7 @@
return True
return False
- def _get_parent_map(self, keys):
+ def _get_parent_map(self, keys, current_search):
"""Helper for get_parent_map that performs the RPC."""
keys = set(keys)
if NULL_REVISION in keys:
@@ -798,12 +799,18 @@
return found_parents
else:
found_parents = {}
+ if current_search is not None:
+ recipe = current_search.get_result().get_recipe()
+ else:
+ recipe = (set(), set(), 0)
+ body = self._serialise_search_recipe(recipe)
path = self.bzrdir._path_for_remote_call(self._client)
for key in keys:
assert type(key) is str
verb = 'Repository.get_parent_map'
- response = self._client.call_expecting_body(
- verb, path, *keys)
+ args = (path,) + tuple(keys)
+ response = self._client.call_with_body_bytes_expecting_body(
+ verb, args, self._serialise_search_recipe(recipe))
if self._response_is_unknown_method(response, verb):
# Server that does not support this method, get the whole graph.
response = self._client.call_expecting_body(
@@ -968,11 +975,7 @@
def get_data_stream_for_search(self, search):
REQUEST_NAME = 'Repository.stream_revisions_chunked'
path = self.bzrdir._path_for_remote_call(self._client)
- recipe = search.get_recipe()
- start_keys = ' '.join(recipe[0])
- stop_keys = ' '.join(recipe[1])
- count = str(recipe[2])
- body = '\n'.join((start_keys, stop_keys, count))
+ body = self._serialise_search_recipe(search.get_recipe())
response, protocol = self._client.call_with_body_bytes_expecting_body(
REQUEST_NAME, (path,), body)
@@ -1033,6 +1036,17 @@
def _make_parents_provider(self):
return self
+ def _serialise_search_recipe(self, recipe):
+ """Serialise a graph search recipe.
+
+ :param recipe: A search recipe (start, stop, count).
+ :return: Serialised bytes.
+ """
+ start_keys = ' '.join(recipe[0])
+ stop_keys = ' '.join(recipe[1])
+ count = str(recipe[2])
+ return '\n'.join((start_keys, stop_keys, count))
+
class RemoteBranchLockableFiles(LockableFiles):
"""A 'LockableFiles' implementation that talks to a smart server.
=== modified file 'bzrlib/smart/repository.py'
--- a/bzrlib/smart/repository.py 2008-01-17 07:47:52 +0000
+++ b/bzrlib/smart/repository.py 2008-01-18 03:43:19 +0000
@@ -58,33 +58,77 @@
# is expected)
return None
+ def recreate_search(self, repository, recipe_bytes):
+ lines = recipe_bytes.split('\n')
+ start_keys = set(lines[0].split(' '))
+ exclude_keys = set(lines[1].split(' '))
+ revision_count = int(lines[2])
+ repository.lock_read()
+ try:
+ search = repository.get_graph()._make_breadth_first_searcher(
+ start_keys)
+ while True:
+ try:
+ next_revs = search.next()
+ except StopIteration:
+ break
+ search.stop_searching_any(exclude_keys.intersection(next_revs))
+ search_result = search.get_result()
+ if search_result.get_recipe()[2] != revision_count:
+ # we got back a different amount of data than expected, this
+ # gets reported as NoSuchRevision, because less revisions
+ # indicates missing revisions, and more should never happen as
+ # the excludes list considers ghosts and ensures that ghost
+ # filling races are not a problem.
+ return (None, FailedSmartServerResponse(('NoSuchRevision',)))
+ return (search, None)
+ finally:
+ repository.unlock()
+
class SmartServerRepositoryGetParentMap(SmartServerRepositoryRequest):
+ """Bzr 1.2+ - get parent data for revisions during a graph search."""
def do_repository_request(self, repository, *revision_ids):
- repository.lock_read()
- try:
- return self._do_repository_request(repository, revision_ids)
- finally:
- repository.unlock()
-
- def _do_repository_request(self, repository, revision_ids):
"""Get parent details for some revisions.
All the parents for revision_ids are returned. Additionally up to 64KB
of additional parent data found by performing a breadth first search
- from revision_ids is returned.
+ from revision_ids is returned. The verb takes a body containing the
+ current search state, see do_body for details.
:param repository: The repository to query in.
:param revision_ids: The utf8 encoded revision_id to answer for.
+ """
+ self._revision_ids = revision_ids
+ return None # Signal that we want a body.
+
+ def do_body(self, body_bytes):
+ """Process the current search state and perform the parent lookup.
+
:return: A smart server response where the body contains an utf8
encoded flattened list of the parents of the revisions, (the same
format as Repository.get_revision_graph).
"""
+ repository = self._repository
+ repository.lock_read()
+ try:
+ return self._do_repository_request(body_bytes)
+ finally:
+ repository.unlock()
+
+ def _do_repository_request(self, body_bytes):
+ repository = self._repository
+ revision_ids = set(self._revision_ids)
+ search, error = self.recreate_search(repository, body_bytes)
+ if error is not None:
+ return error
+ # TODO might be nice to start up the search again; but thats not
+ # written or tested yet.
+ queried_revs = set(search.get_result().get_keys())
lines = []
repo_graph = repository.get_graph()
result = {}
- queried_revs = set()
size_so_far = 0
next_revs = revision_ids
first_loop_done = False
@@ -342,31 +386,15 @@
"""Bzr 1.1+ streaming pull."""
def do_body(self, body_bytes):
- lines = body_bytes.split('\n')
- start_keys = set(lines[0].split(' '))
- exclude_keys = set(lines[1].split(' '))
- revision_count = int(lines[2])
repository = self._repository
repository.lock_read()
try:
- search = repository.get_graph()._make_breadth_first_searcher(
- start_keys)
- while True:
- try:
- next_revs = search.next()
- except StopIteration:
- break
- search.stop_searching_any(exclude_keys.intersection(next_revs))
- search_result = search.get_result()
- if search_result.get_recipe()[2] != revision_count:
- # we got back a different amount of data than expected, this
- # gets reported as NoSuchRevision, because less revisions
- # indicates missing revisions, and more should never happen as
- # the excludes list considers ghosts and ensures that ghost
- # filling races are not a problem.
- return FailedSmartServerResponse(('NoSuchRevision',))
- stream = repository.get_data_stream_for_search(search_result)
+ search, error = self.recreate_search(repository, body_bytes)
+ if error is not None:
+ return error
+ stream = repository.get_data_stream_for_search(search.get_result())
except Exception:
+ # On non-error, unlocking is done by the body stream handler.
repository.unlock()
raise
return SuccessfulSmartServerResponse(('ok',),
=== modified file 'bzrlib/tests/test_remote.py'
--- a/bzrlib/tests/test_remote.py 2008-01-17 07:47:52 +0000
+++ b/bzrlib/tests/test_remote.py 2008-01-18 03:43:19 +0000
@@ -599,8 +599,8 @@
parents = graph.get_parent_map([r1])
self.assertEqual({r1: (NULL_REVISION,)}, parents)
self.assertEqual(
- [('call_expecting_body', 'Repository.get_parent_map',
- ('quack/', r2))],
+ [('call_with_body_bytes_expecting_body',
+ 'Repository.get_parent_map', ('quack/', r2), '\n\n0')],
client._calls)
repo.unlock()
# now we call again, and it should use the second response.
@@ -609,10 +609,10 @@
parents = graph.get_parent_map([r1])
self.assertEqual({r1: (NULL_REVISION,)}, parents)
self.assertEqual(
- [('call_expecting_body', 'Repository.get_parent_map',
- ('quack/', r2)),
- ('call_expecting_body', 'Repository.get_parent_map',
- ('quack/', r1))
+ [('call_with_body_bytes_expecting_body',
+ 'Repository.get_parent_map', ('quack/', r2), '\n\n0'),
+ ('call_with_body_bytes_expecting_body',
+ 'Repository.get_parent_map', ('quack/', r1), '\n\n0'),
],
client._calls)
repo.unlock()
More information about the bazaar-commits
mailing list