Rev 5903: (jelmer) Add limit argument to Branch.fetch(). (Jelmer Vernooij) in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Fri May 20 13:28:53 UTC 2011


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 5903 [merge]
revision-id: pqm at pqm.ubuntu.com-20110520132835-3rf01eu5mbkz3zos
parent: pqm at pqm.ubuntu.com-20110520123724-hamrkqa9gtyazxyl
parent: jelmer at samba.org-20110520082515-lb5zok499a6q3g7g
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Fri 2011-05-20 13:28:35 +0000
message:
  (jelmer) Add limit argument to Branch.fetch(). (Jelmer Vernooij)
modified:
  bzrlib/branch.py               branch.py-20050309040759-e4baf4e0d046576e
  bzrlib/fetch.py                fetch.py-20050818234941-26fea6105696365d
  bzrlib/graph.py                graph_walker.py-20070525030359-y852guab65d4wtn0-1
  bzrlib/plugins/weave_fmt/repository.py presplitout.py-20070125045333-wfav3tsh73oxu3zk-1
  bzrlib/remote.py               remote.py-20060720103555-yeeg2x51vn0rbtdp-1
  bzrlib/repofmt/knitrepo.py     knitrepo.py-20070206081537-pyy4a00xdas0j4pf-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/per_interbranch/test_fetch.py test_fetch.py-20110326014954-0viktxgz0w71zcld-1
  bzrlib/tests/per_interrepository/test_interrepository.py test_interrepository.py-20060220061411-1ec13fa99e5e3eee
  bzrlib/vf_repository.py        vf_repository.py-20110502151858-yh9nnoxpokg86unk-1
  doc/en/release-notes/bzr-2.4.txt bzr2.4.txt-20110114053217-k7ym9jfz243fddjm-1
=== modified file 'bzrlib/branch.py'
--- a/bzrlib/branch.py	2011-05-20 12:37:24 +0000
+++ b/bzrlib/branch.py	2011-05-20 13:28:35 +0000
@@ -669,15 +669,16 @@
         raise errors.UnsupportedOperation(self.get_reference_info, self)
 
     @needs_write_lock
-    def fetch(self, from_branch, last_revision=None):
+    def fetch(self, from_branch, last_revision=None, limit=None):
         """Copy revisions from from_branch into this branch.
 
         :param from_branch: Where to copy from.
         :param last_revision: What revision to stop at (None for at the end
                               of the branch.
+        :param limit: Optional rough limit of revisions to fetch
         :return: None
         """
-        return InterBranch.get(from_branch, self).fetch(last_revision)
+        return InterBranch.get(from_branch, self).fetch(last_revision, limit=limit)
 
     def get_bound_location(self):
         """Return the URL of the branch we are bound to.
@@ -3251,10 +3252,11 @@
         raise NotImplementedError(self.copy_content_into)
 
     @needs_write_lock
-    def fetch(self, stop_revision=None):
+    def fetch(self, stop_revision=None, limit=None):
         """Fetch revisions.
 
         :param stop_revision: Last revision to fetch
+        :param limit: Optional rough limit of revisions to fetch
         """
         raise NotImplementedError(self.fetch)
 
@@ -3298,7 +3300,7 @@
             self.source.tags.merge_to(self.target.tags)
 
     @needs_write_lock
-    def fetch(self, stop_revision=None):
+    def fetch(self, stop_revision=None, limit=None):
         if self.target.base == self.source.base:
             return (0, [])
         self.source.lock_read()
@@ -3309,6 +3311,7 @@
             fetch_spec_factory.source_repo = self.source.repository
             fetch_spec_factory.target_repo = self.target.repository
             fetch_spec_factory.target_repo_kind = fetch.TargetRepoKinds.PREEXISTING
+            fetch_spec_factory.limit = limit
             fetch_spec = fetch_spec_factory.make_fetch_spec()
             return self.target.repository.fetch(self.source.repository,
                 fetch_spec=fetch_spec)

=== modified file 'bzrlib/fetch.py'
--- a/bzrlib/fetch.py	2011-04-27 10:55:49 +0000
+++ b/bzrlib/fetch.py	2011-05-20 08:25:15 +0000
@@ -28,7 +28,7 @@
 from bzrlib.lazy_import import lazy_import
 lazy_import(globals(), """
 from bzrlib import (
-    graph,
+    graph as _mod_graph,
     tsort,
     versionedfile,
     )
@@ -160,13 +160,13 @@
         elif self._last_revision == NULL_REVISION:
             # fetch_spec is None + last_revision is null => empty fetch.
             # explicit limit of no revisions needed
-            return graph.EmptySearchResult()
+            return _mod_graph.EmptySearchResult()
         elif self._last_revision is not None:
-            return graph.NotInOtherForRevs(self.to_repository,
+            return _mod_graph.NotInOtherForRevs(self.to_repository,
                 self.from_repository, [self._last_revision],
                 find_ghosts=self.find_ghosts).execute()
         else: # self._last_revision is None:
-            return graph.EverythingNotInOther(self.to_repository,
+            return _mod_graph.EverythingNotInOther(self.to_repository,
                 self.from_repository,
                 find_ghosts=self.find_ghosts).execute()
 
@@ -371,24 +371,28 @@
         self.source_repo = None
         self.target_repo = None
         self.target_repo_kind = None
+        self.limit = None
 
     def add_revision_ids(self, revision_ids):
         """Add revision_ids to the set of revision_ids to be fetched."""
         self._explicit_rev_ids.update(revision_ids)
-        
+
     def make_fetch_spec(self):
         """Build a SearchResult or PendingAncestryResult or etc."""
         if self.target_repo_kind is None or self.source_repo is None:
             raise AssertionError(
                 'Incomplete FetchSpecFactory: %r' % (self.__dict__,))
         if len(self._explicit_rev_ids) == 0 and self.source_branch is None:
+            if self.limit is not None:
+                raise NotImplementedError(
+                    "limit is only supported with a source branch set")
             # Caller hasn't specified any revisions or source branch
             if self.target_repo_kind == TargetRepoKinds.EMPTY:
-                return graph.EverythingResult(self.source_repo)
+                return _mod_graph.EverythingResult(self.source_repo)
             else:
                 # We want everything not already in the target (or target's
                 # fallbacks).
-                return graph.EverythingNotInOther(
+                return _mod_graph.EverythingNotInOther(
                     self.target_repo, self.source_repo).execute()
         heads_to_fetch = set(self._explicit_rev_ids)
         if self.source_branch is not None:
@@ -411,9 +415,14 @@
             # heads_to_fetch will almost certainly be present so this doesn't
             # matter much.
             all_heads = heads_to_fetch.union(if_present_fetch)
-            return graph.PendingAncestryResult(all_heads, self.source_repo)
-        return graph.NotInOtherForRevs(self.target_repo, self.source_repo,
-            required_ids=heads_to_fetch, if_present_ids=if_present_fetch
-            ).execute()
-
-
+            ret = _mod_graph.PendingAncestryResult(all_heads, self.source_repo)
+            if self.limit is not None:
+                graph = self.source_repo.get_graph()
+                topo_order = list(graph.iter_topo_order(ret.get_keys()))
+                result_set = topo_order[:self.limit]
+                ret = self.source_repo.revision_ids_to_search_result(result_set)
+            return ret
+        else:
+            return _mod_graph.NotInOtherForRevs(self.target_repo, self.source_repo,
+                required_ids=heads_to_fetch, if_present_ids=if_present_fetch,
+                limit=self.limit).execute()

=== modified file 'bzrlib/graph.py'
--- a/bzrlib/graph.py	2011-05-18 16:42:48 +0000
+++ b/bzrlib/graph.py	2011-05-20 13:28:35 +0000
@@ -1849,7 +1849,7 @@
     """Find all revisions missing in one repo for a some specific heads."""
 
     def __init__(self, to_repo, from_repo, required_ids, if_present_ids=None,
-            find_ghosts=False):
+            find_ghosts=False, limit=None):
         """Constructor.
 
         :param required_ids: revision IDs of heads that must be found, or else
@@ -1859,12 +1859,14 @@
         :param if_present_ids: revision IDs of heads that may be absent in the
             source repository.  If present, then their ancestry not already
             found in other will be included in the search result.
+        :param limit: maximum number of revisions to fetch
         """
         self.to_repo = to_repo
         self.from_repo = from_repo
         self.find_ghosts = find_ghosts
         self.required_ids = required_ids
         self.if_present_ids = if_present_ids
+        self.limit = limit
 
     def __repr__(self):
         if len(self.required_ids) > 5:
@@ -1876,14 +1878,17 @@
         else:
             ifp_revs_repr = repr(self.if_present_ids)
 
-        return "<%s from:%r to:%r find_ghosts:%r req'd:%r if-present:%r>" % (
-            self.__class__.__name__, self.from_repo, self.to_repo,
-            self.find_ghosts, reqd_revs_repr, ifp_revs_repr)
+        return ("<%s from:%r to:%r find_ghosts:%r req'd:%r if-present:%r"
+                "limit:%r>") % (
+                self.__class__.__name__, self.from_repo, self.to_repo,
+                self.find_ghosts, reqd_revs_repr, ifp_revs_repr,
+                self.limit)
 
     def execute(self):
         return self.to_repo.search_missing_revision_ids(
             self.from_repo, revision_ids=self.required_ids,
-            if_present_ids=self.if_present_ids, find_ghosts=self.find_ghosts)
+            if_present_ids=self.if_present_ids, find_ghosts=self.find_ghosts,
+            limit=self.limit)
 
 
 def collapse_linear_regions(parent_map):

=== modified file 'bzrlib/plugins/weave_fmt/repository.py'
--- a/bzrlib/plugins/weave_fmt/repository.py	2011-05-08 11:34:17 +0000
+++ b/bzrlib/plugins/weave_fmt/repository.py	2011-05-18 10:41:51 +0000
@@ -27,6 +27,8 @@
 
 from bzrlib.lazy_import import lazy_import
 lazy_import(globals(), """
+import itertools
+
 from bzrlib import (
     xml5,
     graph as _mod_graph,
@@ -817,7 +819,8 @@
     @needs_read_lock
     def search_missing_revision_ids(self,
             revision_id=symbol_versioning.DEPRECATED_PARAMETER,
-            find_ghosts=True, revision_ids=None, if_present_ids=None):
+            find_ghosts=True, revision_ids=None, if_present_ids=None,
+            limit=None):
         """See InterRepository.search_missing_revision_ids()."""
         # we want all revisions to satisfy revision_id in source.
         # but we don't want to stat every file here and there.
@@ -863,6 +866,9 @@
             # that against the revision records.
             result_set = set(
                 self.source._eliminate_revisions_not_present(required_revisions))
+        if limit is not None:
+            topo_ordered = self.get_graph().iter_topo_order(result_set)
+            result_set = set(itertools.islice(topo_ordered, limit))
         return self.source.revision_ids_to_search_result(result_set)
 
 

=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py	2011-05-18 16:42:48 +0000
+++ b/bzrlib/remote.py	2011-05-20 13:28:35 +0000
@@ -1606,7 +1606,8 @@
     @needs_read_lock
     def search_missing_revision_ids(self, other,
             revision_id=symbol_versioning.DEPRECATED_PARAMETER,
-            find_ghosts=True, revision_ids=None, if_present_ids=None):
+            find_ghosts=True, revision_ids=None, if_present_ids=None,
+            limit=None):
         """Return the revision ids that other has that this does not.
 
         These are returned in topological order.
@@ -1626,7 +1627,7 @@
         inter_repo = _mod_repository.InterRepository.get(other, self)
         return inter_repo.search_missing_revision_ids(
             find_ghosts=find_ghosts, revision_ids=revision_ids,
-            if_present_ids=if_present_ids)
+            if_present_ids=if_present_ids, limit=limit)
 
     def fetch(self, source, revision_id=None, find_ghosts=False,
             fetch_spec=None):

=== modified file 'bzrlib/repofmt/knitrepo.py'
--- a/bzrlib/repofmt/knitrepo.py	2011-05-08 11:34:17 +0000
+++ b/bzrlib/repofmt/knitrepo.py	2011-05-18 10:41:51 +0000
@@ -16,6 +16,8 @@
 
 from bzrlib.lazy_import import lazy_import
 lazy_import(globals(), """
+import itertools
+
 from bzrlib import (
     bzrdir,
     errors,
@@ -507,7 +509,8 @@
     @needs_read_lock
     def search_missing_revision_ids(self,
             revision_id=symbol_versioning.DEPRECATED_PARAMETER,
-            find_ghosts=True, revision_ids=None, if_present_ids=None):
+            find_ghosts=True, revision_ids=None, if_present_ids=None,
+            limit=None):
         """See InterRepository.search_missing_revision_ids()."""
         if symbol_versioning.deprecated_passed(revision_id):
             symbol_versioning.warn(
@@ -542,6 +545,9 @@
             # that against the revision records.
             result_set = set(
                 self.source._eliminate_revisions_not_present(required_revisions))
+        if limit is not None:
+            topo_ordered = self.source.get_graph().iter_topo_order(result_set)
+            result_set = set(itertools.islice(topo_ordered, limit))
         return self.source.revision_ids_to_search_result(result_set)
 
 

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2011-05-15 17:58:19 +0000
+++ b/bzrlib/repository.py	2011-05-18 10:24:05 +0000
@@ -16,6 +16,7 @@
 
 from bzrlib.lazy_import import lazy_import
 lazy_import(globals(), """
+import itertools
 import time
 
 from bzrlib import (
@@ -584,7 +585,8 @@
     @needs_read_lock
     def search_missing_revision_ids(self, other,
             revision_id=symbol_versioning.DEPRECATED_PARAMETER,
-            find_ghosts=True, revision_ids=None, if_present_ids=None):
+            find_ghosts=True, revision_ids=None, if_present_ids=None,
+            limit=None):
         """Return the revision ids that other has that this does not.
 
         These are returned in topological order.
@@ -603,7 +605,7 @@
                 revision_ids = [revision_id]
         return InterRepository.get(other, self).search_missing_revision_ids(
             find_ghosts=find_ghosts, revision_ids=revision_ids,
-            if_present_ids=if_present_ids)
+            if_present_ids=if_present_ids, limit=limit)
 
     @staticmethod
     def open(base):
@@ -1711,7 +1713,8 @@
     @needs_read_lock
     def search_missing_revision_ids(self,
             revision_id=symbol_versioning.DEPRECATED_PARAMETER,
-            find_ghosts=True, revision_ids=None, if_present_ids=None):
+            find_ghosts=True, revision_ids=None, if_present_ids=None,
+            limit=None):
         """Return the revision ids that source has that target does not.
 
         :param revision_id: only return revision ids included by this
@@ -1725,6 +1728,8 @@
             to fetch for tags, which may reference absent revisions.
         :param find_ghosts: If True find missing revisions in deep history
             rather than just finding the surface difference.
+        :param limit: Maximum number of revisions to return, topologically
+            ordered
         :return: A bzrlib.graph.SearchResult.
         """
         raise NotImplementedError(self.search_missing_revision_ids)

=== modified file 'bzrlib/tests/per_interbranch/test_fetch.py'
--- a/bzrlib/tests/per_interbranch/test_fetch.py	2011-03-26 01:53:34 +0000
+++ b/bzrlib/tests/per_interbranch/test_fetch.py	2011-05-20 08:25:15 +0000
@@ -44,4 +44,52 @@
         self.addCleanup(tree.unlock)
         self.assertEqual(tree.get_file_text('foo-id'), 'hello')
 
-
+    def test_fetch_revisions_limit(self):
+        """Test fetch-revision operation."""
+        builder = self.make_branch_builder('b1',
+            format=self.branch_format_from._matchingbzrdir)
+        builder.start_series()
+        builder.build_commit(rev_id='revision-1')
+        builder.build_commit(rev_id='revision-2')
+        builder.build_commit(rev_id='revision-3')
+        builder.finish_series()
+        b1 = builder.get_branch()
+        b2 = self.make_to_branch('b2')
+        b2.fetch(b1, limit=1)
+
+        # fetch does not update the last revision
+        self.assertEquals(NULL_REVISION, b2.last_revision())
+
+        self.assertEquals(
+            set(['revision-1']),
+            b2.repository.has_revisions(
+                ['revision-1', 'revision-2', 'revision-3']))
+
+    def test_fetch_revisions_limit_incremental(self):
+        """Test incremental fetch-revision operation with limit."""
+        wt = self.make_from_branch_and_tree('b1')
+        b1 = wt.branch
+        self.build_tree_contents([('b1/foo', 'hello')])
+        wt.add(['foo'], ['foo-id'])
+        wt.commit('lala!', rev_id='revision-1', allow_pointless=False)
+
+        b2 = self.make_to_branch('b2')
+        b2.fetch(b1, limit=1)
+
+        self.assertEquals(
+            set(['revision-1']),
+            b2.repository.has_revisions(
+                ['revision-1', 'revision-2', 'revision-3']))
+
+        wt.commit('hmm', rev_id='revision-2')
+        wt.commit('hmmm', rev_id='revision-3')
+
+        b2.fetch(b1, limit=1)
+
+        # fetch does not update the last revision
+        self.assertEquals(NULL_REVISION, b2.last_revision())
+
+        self.assertEquals(
+            set(['revision-1', 'revision-2']),
+            b2.repository.has_revisions(
+                ['revision-1', 'revision-2', 'revision-3']))

=== modified file 'bzrlib/tests/per_interrepository/test_interrepository.py'
--- a/bzrlib/tests/per_interrepository/test_interrepository.py	2011-03-28 04:24:47 +0000
+++ b/bzrlib/tests/per_interrepository/test_interrepository.py	2011-05-18 11:43:37 +0000
@@ -130,6 +130,17 @@
         self.assertEqual(('search', set(['rev1']), set([NULL_REVISION]), 1),
             result.get_recipe())
 
+    def test_search_missing_revision_ids_limit(self):
+        # The limit= argument makes fetch() limit
+        # the results to the first X topo-sorted revisions.
+        repo_b = self.make_to_repository('rev1_only')
+        repo_a = self.bzrdir.open_repository()
+        # check the test will be valid
+        self.assertFalse(repo_b.has_revision('rev2'))
+        result = repo_b.search_missing_revision_ids(repo_a, limit=1)
+        self.assertEqual(('search', set(['rev1']), set(['null:']), 1),
+            result.get_recipe())
+
     def test_fetch_fetches_signatures_too(self):
         from_repo = self.bzrdir.open_repository()
         from_signature = from_repo.get_signature_text('rev2')

=== modified file 'bzrlib/vf_repository.py'
--- a/bzrlib/vf_repository.py	2011-05-19 09:32:38 +0000
+++ b/bzrlib/vf_repository.py	2011-05-20 13:28:35 +0000
@@ -19,6 +19,8 @@
 
 from bzrlib.lazy_import import lazy_import
 lazy_import(globals(), """
+import itertools
+
 from bzrlib import (
     check,
     debug,
@@ -2552,7 +2554,8 @@
     @needs_read_lock
     def search_missing_revision_ids(self,
             revision_id=symbol_versioning.DEPRECATED_PARAMETER,
-            find_ghosts=True, revision_ids=None, if_present_ids=None):
+            find_ghosts=True, revision_ids=None, if_present_ids=None,
+            limit=None):
         """Return the revision ids that source has that target does not.
 
         :param revision_id: only return revision ids included by this
@@ -2582,13 +2585,20 @@
         # stop searching at found target revisions.
         if not find_ghosts and (revision_ids is not None or if_present_ids is
                 not None):
-            return self._walk_to_common_revisions(revision_ids,
+            result = self._walk_to_common_revisions(revision_ids,
                     if_present_ids=if_present_ids)
-        # generic, possibly worst case, slow code path.
-        target_ids = set(self.target.all_revision_ids())
-        source_ids = self._present_source_revisions_for(
-            revision_ids, if_present_ids)
-        result_set = set(source_ids).difference(target_ids)
+            if limit is None:
+                return result
+            result_set = result.get_keys()
+        else:
+            # generic, possibly worst case, slow code path.
+            target_ids = set(self.target.all_revision_ids())
+            source_ids = self._present_source_revisions_for(
+                revision_ids, if_present_ids)
+            result_set = set(source_ids).difference(target_ids)
+        if limit is not None:
+            topo_ordered = self.source.get_graph().iter_topo_order(result_set)
+            result_set = set(itertools.islice(topo_ordered, limit))
         return self.source.revision_ids_to_search_result(result_set)
 
     def _present_source_revisions_for(self, revision_ids, if_present_ids=None):

=== modified file 'doc/en/release-notes/bzr-2.4.txt'
--- a/doc/en/release-notes/bzr-2.4.txt	2011-05-19 18:20:37 +0000
+++ b/doc/en/release-notes/bzr-2.4.txt	2011-05-20 13:28:35 +0000
@@ -128,6 +128,9 @@
 * ``annotate_file`` has been deprecated in favor of
   ``annotate_file_revision_tree``. (Jelmer Vernooij, #775598)
 
+* ``Branch.fetch`` now takes an optional ``limit`` argument.
+  (Andrew Bennetts, Jelmer Vernooij, #750175)
+
 * ``Inter.get`` now raises ``NoCompatibleInter`` if there are no
   compatible optimisers rather than an instance of the class it is called
   on. (Jelmer Vernooij)




More information about the bazaar-commits mailing list