Rev 2669: A knit iter_parents API. in http://people.ubuntu.com/~robertc/baz2.0/repository

Robert Collins robertc at robertcollins.net
Thu Jul 19 02:20:53 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/repository

------------------------------------------------------------
revno: 2669
revision-id: robertc at robertcollins.net-20070719012049-xgdzwfpnnmoqeqjp
parent: robertc at robertcollins.net-20070718062421-z67w9x2nak16qt7h
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Thu 2007-07-19 11:20:49 +1000
message:
  A knit iter_parents API.
modified:
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/repofmt/knitrepo.py     knitrepo.py-20070206081537-pyy4a00xdas0j4pf-1
  bzrlib/smart/repository.py     repository.py-20061128022038-vr5wy5bubyb8xttk-1
  bzrlib/tests/interversionedfile_implementations/test_join.py test_join.py-20060302012326-9b5e9b0f0a03fedc
  bzrlib/tests/test_knit.py      test_knit.py-20051212171302-95d4c00dd5f11f2b
  bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
  bzrlib/versionedfile.py        versionedfile.py-20060222045106-5039c71ee3b65490
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2007-07-16 06:44:27 +0000
+++ b/bzrlib/knit.py	2007-07-19 01:20:49 +0000
@@ -911,6 +911,19 @@
 
         pb.update('Walking content.', total, total)
         
+    def iter_parents(self, version_ids):
+        """Iterate through the parents for many version ids.
+
+        :param version_ids: An iterable yielding version_ids.
+        :return: An iterator that yields (version_id, parents). Requested 
+            version_ids not present in the versioned file are simply skipped.
+            The order is undefined, allowing for different optimisations in
+            the underlying implementation.
+        """
+        version_ids = [osutils.safe_revision_id(version_id) for
+            version_id in version_ids]
+        return self._index.iter_parents(version_ids)
+
     def num_versions(self):
         """See VersionedFile.num_versions()."""
         return self._index.num_versions()
@@ -1210,6 +1223,21 @@
                 graph[version] = parents
         return topo_sort(graph.items())
 
+    def iter_parents(self, version_ids):
+        """Iterate through the parents for many version ids.
+
+        :param version_ids: An iterable yielding version_ids.
+        :return: An iterator that yields (version_id, parents). Requested 
+            version_ids not present in the versioned file are simply skipped.
+            The order is undefined, allowing for different optimisations in
+            the underlying implementation.
+        """
+        for version_id in version_ids:
+            try:
+                yield version_id, tuple(self.get_parents(version_id))
+            except KeyError:
+                pass
+
     def num_versions(self):
         return len(self._history)
 
@@ -1339,15 +1367,23 @@
             raise KnitCorrupt(self, "Cannot do delta compression without "
                 "parent tracking.")
 
-    def _get_entries(self, version_ids):
+    def _get_entries(self, version_ids, check_present=False):
         """Get the entries for version_ids."""
+        version_ids = set(version_ids)
+        found_keys = set()
         if self._parents:
             for node in self._graph_index.iter_entries(version_ids):
                 yield node
+                found_keys.add(node[0])
         else:
             # adapt parentless index to the rest of the code.
             for node in self._graph_index.iter_entries(version_ids):
                 yield node[0], node[1], ()
+                found_keys.add(node[0])
+        if check_present:
+            missing_keys = version_ids.difference(found_keys)
+            if missing_keys:
+                raise RevisionNotPresent(missing_keys.pop(), self)
 
     def _present_keys(self, version_ids):
         return set([
@@ -1431,6 +1467,35 @@
         return [(key, refs[0]) for (key, value, refs) in 
             self._graph_index.iter_all_entries()]
 
+    def iter_parents(self, version_ids):
+        """Iterate through the parents for many version ids.
+
+        :param version_ids: An iterable yielding version_ids.
+        :return: An iterator that yields (version_id, parents). Requested 
+            version_ids not present in the versioned file are simply skipped.
+            The order is undefined, allowing for different optimisations in
+            the underlying implementation.
+        """
+        if self._parents:
+            all_nodes = set(self._get_entries(version_ids))
+            all_parents = set()
+            present_parents = set()
+            for node in all_nodes:
+                all_parents.update(node[2][0])
+                # any node we are querying must be present
+                present_parents.add(node[0])
+            unknown_parents = all_parents.difference(present_parents)
+            present_parents.update(self._present_keys(unknown_parents))
+            for node in all_nodes:
+                parents = []
+                for parent in node[2][0]:
+                    if parent in present_parents:
+                        parents.append(parent)
+                yield node[0], tuple(parents)
+        else:
+            for node in self._get_entries(version_ids):
+                yield node[0], ()
+
     def num_versions(self):
         return len(list(self._graph_index.iter_all_entries()))
 
@@ -1442,7 +1507,7 @@
     
     def has_version(self, version_id):
         """True if the version is in the index."""
-        return len(list(self._get_entries([version_id]))) == 1
+        return len(self._present_keys([version_id])) == 1
 
     def get_position(self, version_id):
         """Return data position and size of specified version."""
@@ -1481,17 +1546,18 @@
 
     def get_parents(self, version_id):
         """Return parents of specified version ignoring ghosts."""
-        if not self._parents:
-            return ()
-        parents = self.get_parents_with_ghosts(version_id)
-        present_parents = self._present_keys(parents)
-        return [key for key in parents if key in present_parents]
+        parents = list(self.iter_parents([version_id]))
+        if not parents:
+            # missing key
+            raise errors.RevisionNotPresent(version_id, self)
+        return parents[0][1]
 
     def get_parents_with_ghosts(self, version_id):
         """Return parents of specified version with ghosts."""
+        nodes = list(self._get_entries([version_id], check_present=True))
         if not self._parents:
             return ()
-        return self._get_node(version_id)[2][0]
+        return nodes[0][2][0]
 
     def check_versions_present(self, version_ids):
         """Check that all specified versions are present."""

=== modified file 'bzrlib/repofmt/knitrepo.py'
--- a/bzrlib/repofmt/knitrepo.py	2007-07-18 06:24:21 +0000
+++ b/bzrlib/repofmt/knitrepo.py	2007-07-19 01:20:49 +0000
@@ -156,10 +156,10 @@
             return {}
         revision_id = osutils.safe_revision_id(revision_id)
         a_weave = self._get_revision_vf()
-        entire_graph = a_weave.get_graph()
         if revision_id is None:
             return a_weave.get_graph()
-        elif revision_id not in a_weave:
+        entire_graph = a_weave.get_graph()
+        if revision_id not in a_weave:
             raise errors.NoSuchRevision(self, revision_id)
         else:
             return a_weave.get_graph([revision_id])

=== modified file 'bzrlib/smart/repository.py'
--- a/bzrlib/smart/repository.py	2007-07-04 08:09:12 +0000
+++ b/bzrlib/smart/repository.py	2007-07-19 01:20:49 +0000
@@ -72,7 +72,7 @@
             return FailedSmartServerResponse(('nosuchrevision', revision_id), '')
 
         for revision, parents in revision_graph.items():
-            lines.append(' '.join([revision,] + parents))
+            lines.append(' '.join((revision,) + parents))
 
         return SuccessfulSmartServerResponse(('ok', ), '\n'.join(lines))
 

=== modified file 'bzrlib/tests/interversionedfile_implementations/test_join.py'
--- a/bzrlib/tests/interversionedfile_implementations/test_join.py	2006-10-11 23:08:27 +0000
+++ b/bzrlib/tests/interversionedfile_implementations/test_join.py	2007-07-19 01:20:49 +0000
@@ -278,7 +278,7 @@
         # legacy apis should behave
         self.assertEqual(['notbase'], source.get_ancestry(['notbase']))
         self.assertEqual([], source.get_parents('notbase'))
-        self.assertEqual({'notbase':[]}, source.get_graph())
+        self.assertEqual({'notbase':()}, source.get_graph())
         self.assertFalse(source.has_version('base'))
         if source_ghosts:
             # ghost data should have been preserved
@@ -293,8 +293,8 @@
         target.join(source, version_ids=['base']) 
         self.assertEqual(['base', 'notbase'], target.get_ancestry(['notbase']))
         self.assertEqual(['base'], target.get_parents('notbase'))
-        self.assertEqual({'base':[],
-                          'notbase':['base'],
+        self.assertEqual({'base':(),
+                          'notbase':('base', ),
                           },
                          target.get_graph())
         self.assertTrue(target.has_version('base'))

=== modified file 'bzrlib/tests/test_knit.py'
--- a/bzrlib/tests/test_knit.py	2007-07-16 06:44:27 +0000
+++ b/bzrlib/tests/test_knit.py	2007-07-19 01:20:49 +0000
@@ -464,6 +464,36 @@
         self.assertRaises(RevisionNotPresent,
             index.get_ancestry_with_ghosts, ["e"])
 
+    def test_iter_parents(self):
+        transport = MockTransport()
+        index = self.get_knit_index(transport, "filename", "w", create=True)
+        # no parents
+        index.add_version('r0', ['option'], 0, 1, [])
+        # 1 parent
+        index.add_version('r1', ['option'], 0, 1, ['r0'])
+        # 2 parents
+        index.add_version('r2', ['option'], 0, 1, ['r1', 'r0'])
+        # XXX TODO a ghost
+        # cases: each sample data individually:
+        self.assertEqual(set([('r0', ())]),
+            set(index.iter_parents(['r0'])))
+        self.assertEqual(set([('r1', ('r0', ))]),
+            set(index.iter_parents(['r1'])))
+        self.assertEqual(set([('r2', ('r1', 'r0'))]),
+            set(index.iter_parents(['r2'])))
+        # no nodes returned for a missing node
+        self.assertEqual(set(),
+            set(index.iter_parents(['missing'])))
+        # 1 node returned with missing nodes skipped
+        self.assertEqual(set([('r1', ('r0', ))]),
+            set(index.iter_parents(['ghost1', 'r1', 'ghost'])))
+        # 2 nodes returned
+        self.assertEqual(set([('r0', ()), ('r1', ('r0', ))]),
+            set(index.iter_parents(['r0', 'r1'])))
+        # 2 nodes returned, missing skipped
+        self.assertEqual(set([('r0', ()), ('r1', ('r0', ))]),
+            set(index.iter_parents(['a', 'r0', 'b', 'r1', 'c'])))
+
     def test_num_versions(self):
         transport = MockTransport([
             _KnitIndex.HEADER
@@ -1669,11 +1699,17 @@
     def test_get_parents(self):
         # get_parents ignores ghosts
         index = self.two_graph_index()
-        self.assertEqual(['tail'], index.get_parents('parent'))
+        self.assertEqual(('tail', ), index.get_parents('parent'))
+        # and errors on ghosts.
+        self.assertRaises(errors.RevisionNotPresent,
+            index.get_parents, 'ghost')
 
     def test_get_parents_with_ghosts(self):
         index = self.two_graph_index()
         self.assertEqual(('tail', 'ghost'), index.get_parents_with_ghosts('parent'))
+        # and errors on ghosts.
+        self.assertRaises(errors.RevisionNotPresent,
+            index.get_parents_with_ghosts, 'ghost')
 
     def test_check_versions_present(self):
         # ghosts should not be considered present
@@ -1790,6 +1826,39 @@
              ('tip', 'no-eol,line-delta', 0, 100, ['parent'])])
         self.assertEqual([], self.caught_entries)
 
+    def test_iter_parents(self):
+        index1 = self.make_g_index('1', 1, [
+        # no parents
+            ('r0', 'N0 100', ([], )),
+        # 1 parent
+            ('r1', '', (['r0'], ))])
+        index2 = self.make_g_index('2', 1, [
+        # 2 parents
+            ('r2', 'N0 100', (['r1', 'r0'], )),
+            ])
+        combined_index = CombinedGraphIndex([index1, index2])
+        index = KnitGraphIndex(combined_index)
+        # XXX TODO a ghost
+        # cases: each sample data individually:
+        self.assertEqual(set([('r0', ())]),
+            set(index.iter_parents(['r0'])))
+        self.assertEqual(set([('r1', ('r0', ))]),
+            set(index.iter_parents(['r1'])))
+        self.assertEqual(set([('r2', ('r1', 'r0'))]),
+            set(index.iter_parents(['r2'])))
+        # no nodes returned for a missing node
+        self.assertEqual(set(),
+            set(index.iter_parents(['missing'])))
+        # 1 node returned with missing nodes skipped
+        self.assertEqual(set([('r1', ('r0', ))]),
+            set(index.iter_parents(['ghost1', 'r1', 'ghost'])))
+        # 2 nodes returned
+        self.assertEqual(set([('r0', ()), ('r1', ('r0', ))]),
+            set(index.iter_parents(['r0', 'r1'])))
+        # 2 nodes returned, missing skipped
+        self.assertEqual(set([('r0', ()), ('r1', ('r0', ))]),
+            set(index.iter_parents(['a', 'r0', 'b', 'r1', 'c'])))
+
 
 class TestNoParentsGraphIndexKnit(KnitTests):
     """Tests for knits using KnitGraphIndex with no parents."""
@@ -1901,10 +1970,16 @@
     def test_get_parents(self):
         index = self.two_graph_index()
         self.assertEqual((), index.get_parents('parent'))
+        # and errors on ghosts.
+        self.assertRaises(errors.RevisionNotPresent,
+            index.get_parents, 'ghost')
 
     def test_get_parents_with_ghosts(self):
         index = self.two_graph_index()
         self.assertEqual((), index.get_parents_with_ghosts('parent'))
+        # and errors on ghosts.
+        self.assertRaises(errors.RevisionNotPresent,
+            index.get_parents_with_ghosts, 'ghost')
 
     def test_check_versions_present(self):
         index = self.two_graph_index()
@@ -2014,4 +2089,13 @@
              ('tip', 'no-eol,line-delta', 0, 100, [])])
         self.assertEqual([], self.caught_entries)
 
-
+    def test_iter_parents(self):
+        index = self.two_graph_index()
+        self.assertEqual(set([
+            ('tip', ()), ('tail', ()), ('parent', ()), ('separate', ())
+            ]),
+            set(index.iter_parents(['tip', 'tail', 'ghost', 'parent', 'separate'])))
+        self.assertEqual(set([('tip', ())]),
+            set(index.iter_parents(['tip'])))
+        self.assertEqual(set(),
+            set(index.iter_parents([])))

=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py	2007-06-17 17:07:04 +0000
+++ b/bzrlib/tests/test_versionedfile.py	2007-07-19 01:20:49 +0000
@@ -453,9 +453,9 @@
     def test_get_graph(self):
         f = self.get_file()
         graph = {
-            'v1': [],
-            'v2': ['v1'],
-            'v3': ['v2']}
+            'v1': (),
+            'v2': ('v1', ),
+            'v3': ('v2', )}
         self.build_graph(f, graph)
         self.assertEqual(graph, f.get_graph())
     
@@ -463,21 +463,21 @@
         f = self.get_file()
         complex_graph = {}
         simple_a = {
-            'c': [],
-            'b': ['c'],
-            'a': ['b'],
+            'c': (),
+            'b': ('c', ),
+            'a': ('b', ),
             }
         complex_graph.update(simple_a)
         simple_b = {
-            'c': [],
-            'b': ['c'],
+            'c': (),
+            'b': ('c', ),
             }
         complex_graph.update(simple_b)
         simple_gam = {
-            'c': [],
-            'oo': [],
-            'bar': ['oo', 'c'],
-            'gam': ['bar'],
+            'c': (),
+            'oo': (),
+            'bar': ('oo', 'c'),
+            'gam': ('bar', ),
             }
         complex_graph.update(simple_gam)
         simple_b_gam = {}
@@ -560,6 +560,37 @@
         """Open the versioned file from disk again."""
         raise NotImplementedError(self.reopen_file)
 
+    def test_iter_parents(self):
+        """iter_parents returns the parents for many nodes."""
+        f = self.get_file()
+        # sample data:
+        # no parents
+        f.add_lines('r0', [], ['a\n', 'b\n'])
+        # 1 parents
+        f.add_lines('r1', ['r0'], ['a\n', 'b\n'])
+        # 2 parents
+        f.add_lines('r2', ['r1', 'r0'], ['a\n', 'b\n'])
+        # XXX TODO a ghost
+        # cases: each sample data individually:
+        self.assertEqual(set([('r0', ())]),
+            set(f.iter_parents(['r0'])))
+        self.assertEqual(set([('r1', ('r0', ))]),
+            set(f.iter_parents(['r1'])))
+        self.assertEqual(set([('r2', ('r1', 'r0'))]),
+            set(f.iter_parents(['r2'])))
+        # no nodes returned for a missing node
+        self.assertEqual(set(),
+            set(f.iter_parents(['missing'])))
+        # 1 node returned with missing nodes skipped
+        self.assertEqual(set([('r1', ('r0', ))]),
+            set(f.iter_parents(['ghost1', 'r1', 'ghost'])))
+        # 2 nodes returned
+        self.assertEqual(set([('r0', ()), ('r1', ('r0', ))]),
+            set(f.iter_parents(['r0', 'r1'])))
+        # 2 nodes returned, missing skipped
+        self.assertEqual(set([('r0', ()), ('r1', ('r0', ))]),
+            set(f.iter_parents(['a', 'r0', 'b', 'r1', 'c'])))
+
     def test_iter_lines_added_or_present_in_versions(self):
         # test that we get at least an equalset of the lines added by
         # versions in the weave 
@@ -690,7 +721,7 @@
         # - these are ghost unaware and must not be reflect ghosts
         self.assertEqual(['notbxbfse'], vf.get_ancestry('notbxbfse'))
         self.assertEqual([], vf.get_parents('notbxbfse'))
-        self.assertEqual({'notbxbfse':[]}, vf.get_graph())
+        self.assertEqual({'notbxbfse':()}, vf.get_graph())
         self.assertFalse(self.callDeprecated([osutils._revision_id_warning],
                          vf.has_version, parent_id_unicode))
         self.assertFalse(vf.has_version(parent_id_utf8))
@@ -707,8 +738,8 @@
                             vf.add_lines, parent_id_unicode, [], [])
         self.assertEqual([parent_id_utf8, 'notbxbfse'], vf.get_ancestry(['notbxbfse']))
         self.assertEqual([parent_id_utf8], vf.get_parents('notbxbfse'))
-        self.assertEqual({parent_id_utf8:[],
-                          'notbxbfse':[parent_id_utf8],
+        self.assertEqual({parent_id_utf8:(),
+                          'notbxbfse':(parent_id_utf8, ),
                           },
                          vf.get_graph())
         self.assertTrue(self.callDeprecated([osutils._revision_id_warning],

=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py	2007-06-28 06:16:19 +0000
+++ b/bzrlib/versionedfile.py	2007-07-19 01:20:49 +0000
@@ -332,22 +332,17 @@
         :param version_ids: Versions to select.
                             None means retrieve all versions.
         """
+        if version_ids is None:
+            return dict(self.iter_parents(self.versions()))
         result = {}
-        if version_ids is None:
-            for version in self.versions():
-                result[version] = self.get_parents(version)
-        else:
-            pending = set(osutils.safe_revision_id(v) for v in version_ids)
-            while pending:
-                version = pending.pop()
-                if version in result:
-                    continue
-                parents = self.get_parents(version)
-                for parent in parents:
-                    if parent in result:
-                        continue
-                    pending.add(parent)
+        pending = set(osutils.safe_revision_id(v) for v in version_ids)
+        while pending:
+            this_iteration = pending
+            pending = set()
+            for version, parents in self.iter_parents(this_iteration):
                 result[version] = parents
+                pending.update(parents)
+            pending.difference_update(result)
         return result
 
     def get_graph_with_ghosts(self):
@@ -443,6 +438,21 @@
         """
         raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
 
+    def iter_parents(self, version_ids):
+        """Iterate through the parents for many version ids.
+
+        :param version_ids: An iterable yielding version_ids.
+        :return: An iterator that yields (version_id, parents). Requested 
+            version_ids not present in the versioned file are simply skipped.
+            The order is undefined, allowing for different optimisations in
+            the underlying implementation.
+        """
+        for version_id in version_ids:
+            try:
+                yield version_id, tuple(self.get_parents(version_id))
+            except errors.RevisionNotPresent:
+                pass
+
     def transaction_finished(self):
         """The transaction that this file was opened in has finished.
 



More information about the bazaar-commits mailing list