Rev 1: Some basic functionality to make sure the graph implementations work. in http://bzr.arbash-meinel.com/plugins/test_graph

Mon Dec 10 19:48:01 GMT 2007

At http://bzr.arbash-meinel.com/plugins/test_graph

------------------------------------------------------------
revno: 1
revision-id:john at arbash-meinel.com-20071210194759-ii8q80gow8690x99
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: test_graph
timestamp: Mon 2007-12-10 13:47:59 -0600
message:
  Some basic functionality to make sure the graph implementations work.
added:
  __init__.py                    __init__.py-20071210194758-1pwa1q7e3wnjf418-1
  graph_testing.py               graph_testing.py-20071210194758-1pwa1q7e3wnjf418-2
-------------- next part --------------
=== added file '__init__.py'

--- a/__init__.py	1970-01-01 00:00:00 +0000
+++ b/__init__.py	2007-12-10 19:47:59 +0000
@@ -0,0 +1,37 @@
+# Copyright (C) 2007 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Some real-world graph tests and benchmarks."""
+
+version_info = (0, 1, 0, 'dev', 0)
+
+from bzrlib import commands
+
+
+class cmd_test_graph(commands.Command):
+    """Test that graph.find_difference() and .heads() return correct values.
+
+    This goes through the current branch, and uses the slow apis and compares
+    the values with the fast apis.
+    """
+
+    takes_args = ['location?']
+
+    def run(self, location='.'):
+        from graph_testing import test_all
+        test_all(location)
+
+commands.register_command(cmd_test_graph)

=== added file 'graph_testing.py'
--- a/graph_testing.py	1970-01-01 00:00:00 +0000
+++ b/graph_testing.py	2007-12-10 19:47:59 +0000
@@ -0,0 +1,152 @@
+# Copyright (C) 2007 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Basic functions for testing graph operations."""
+
+from bzrlib import (
+    branch,
+    errors,
+    tsort,
+    )
+
+
+
+class GraphTester(object):
+
+    def __init__(self, rev_graph):
+        self.rev_graph = rev_graph
+        self.head_times = {}
+        self.find_difference_times = {}
+        self.hold_graph = False
+
+    def get_interesting_nodes(self):
+        """Go through the revision graph, and look for interesting nodes.
+
+        Currently that is just defined as nodes which have more than one parent.
+        The returned list is in reverse topological order. (The newest revision
+        comes first.)
+        """
+        interesting = []
+        for seq_num, revision_id, depth, eom in tsort.merge_sort(self.rev_graph):
+            parent_ids = self.rev_graph[revision_id]
+            if len(parent_ids) > 1:
+                interesting.append(revision_id)
+        self.interesting = interesting
+
+    def get_ancestry_for_interesting(self):
+        """For each interesting node, figure out its ancestry set."""
+        ancestries = {}
+        # We go in reverse order, so that we will tend to find the ancestries in
+        # the map
+        for revision_id in reversed(self.interesting):
+            ancestry = set([revision_id])
+            needed = set([revision_id])
+            while needed:
+                next = needed.pop()
+                parent_ids = self.rev_graph[next]
+                ancestry.update(parent_ids)
+                for parent_id in parent_ids:
+                    if parent_id in ancestries:
+                        ancestry.update(ancestries[parent_id])
+                    else: # We need to probe in this direction
+                        needed.add(parent_id)
+            ancestries[revision_id] = ancestry
+        self.ancestries = ancestries
+
+    def time_functions(self, a_branch):
+        if self.hold_graph:
+            graph = a_branch.repository.get_graph()
+
+        for revision_id in reversed(interesting):
+            parent_ids = self.rev_graph[revision_id]
+            a_branch.lock_read()
+            try:
+                if not self.hold_graph:
+                    graph = a_branch.repository.get_graph()
+                t_start = time.clock()
+                heads = graph.heads(parent_ids)
+                t_heads = time.clock() - t_start
+            finally:
+                a_branch.unlock()
+            self.head_times[revision_id] = t_heads
+            if heads != set(parent_ids):
+                print 'heads() mismatch for: %s' % (revision_id,)
+            if len(parent_ids) < 2:
+                continue
+            try:
+                if not self.hold_graph:
+                    graph = a_branch.repository.get_graph()
+                t_start = time.clock()
+                left, right = graph.find_difference(parent_ids[0],
+                                                    parent_ids[1])
+                t_diff = t_start - t_start
+            finally:
+                a_branch.unlock()
+            self.find_difference_times[revision_id] = t_diff
+            left_ancestry = self.ancestries[parent_ids[0]]
+            right_ancestry = self.ancestries[parent_ids[1]]
+            left_correct = left == (left_ancestry - right_ancestry)
+            right_correct = right == (right_ancestry - left_ancestry)
+            if not left_correct and not right_correct:
+                print 'left and right incorrect for: %s' % (revision_id,)
+            elif not left_correct:
+                print 'left incorrect for: %s' % (revision_id,)
+            elif not right_correct:
+                print 'right incorrect for: %s' % (revision_id,)
+
+    def get_statistics(self, times):
+        min_time = None
+        min_rev = None
+        max_time = None
+        max_rev = None
+        total_time = 0.0
+        for revision_id, value in times.iteritems():
+            total_time += value
+            if min_time is None or value < min_time:
+                min_time = value
+                min_rev = revision_id
+            if max_time is None or value > max_time:
+                max_time = value
+                max_rev = revision_id
+        avg_time = total_time / (len(times))
+        return {'count':len(times),
+                'min_time':min_time,
+                'min_rev':min_rev,
+                'max_time':max_time,
+                'avg_time':avg_time,
+                'total_time':total_time,
+                }
+
+
+def test_all(location):
+    the_branch = branch.Branch.open(location)
+    the_branch.lock_read()
+    try:
+        rev_graph = the_branch.repository.get_revision_graph(
+                        the_branch.last_revision())
+        rev_id_map = the_branch.get_revision_id_to_revno_map()
+    finally:
+        the_branch.unlock()
+    tester = GraphTester(rev_graph)
+    tester.get_interesting_nodes()
+    tester.get_ancestry_for_interesting()
+    tester.time_functions()
+    import pprint
+    print 'heads:'
+    pprint.pprint(tester.get_statistics(tester.head_times))
+    print 'diffs:'
+    pprint.pprint(tester.get_statistics(tester.find_difference_times)
+    import pdb; pdb.set_trace()