Rev 23: Clean up the builder, start using it for big speed gains. in http://bzr.arbash-meinel.com/plugins/git

John Arbash Meinel john at arbash-meinel.com
Fri Nov 9 15:53:31 GMT 2007


At http://bzr.arbash-meinel.com/plugins/git

------------------------------------------------------------
revno: 23
revision-id:john at arbash-meinel.com-20071109155324-kg06nis3idelqva0
parent: john at arbash-meinel.com-20071109060811-toeslo9r1gnn24rr
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: git
timestamp: Fri 2007-11-09 10:53:24 -0500
message:
  Clean up the builder, start using it for big speed gains.
modified:
  tests/__init__.py              __init__.py-20070202180350-njrb42t7fnv35d1k-2
  tests/test_builder.py          test_builder.py-20071109060756-yqmwmyqdr3pqeh3s-1
  tests/test_git_repository.py   test_git_repository.-20071108234408-ygidvy5hviixghsd-5
-------------- next part --------------
=== modified file 'tests/__init__.py'
--- a/tests/__init__.py	2007-11-09 06:08:11 +0000
+++ b/tests/__init__.py	2007-11-09 15:53:24 +0000
@@ -18,11 +18,13 @@
 
 import subprocess
 import time
+import tempfile
 
 from bzrlib import (
     tests,
     trace,
     )
+from  bzrlib.plugins.git.gitlib import errors
 
 TestCase = tests.TestCase
 TestCaseInTempDir = tests.TestCaseInTempDir
@@ -60,23 +62,61 @@
 
 class GitBranchBuilder(object):
 
-    def __init__(self, stream):
+    def __init__(self, stream=None):
         self.commit_info = []
         self.stream = stream
+        self._process = None
         self._counter = 0
         self._branch = 'refs/head/master'
+        if stream is None:
+            self._marks_file = tempfile.NamedTemporaryFile(
+                prefix='tmp-git-marks')
+            self._process = subprocess.Popen(
+                ['git', 'fast-import', '--quiet',
+                 # GIT doesn't support '--export-marks foo'
+                 # it only supports '--export-marks=foo'
+                 # And gives a 'unknown option' otherwise.
+                 '--export-marks='+self._marks_file.name,
+                ],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                stdin=subprocess.PIPE,
+                )
+            self.stream = self._process.stdin
+        else:
+            self._process = None
 
     def set_branch(self, branch):
         """Set the branch we are committing."""
         self._branch = branch
 
+    def _write(self, text):
+        try:
+            self.stream.write(text)
+        except IOError, e:
+            if self._process is None:
+                raise
+            raise errors.GitCommandError(self._process.returncode,
+                                         'git fast-import',
+                                         self._process.stderr.read())
+
+    def _writelines(self, lines):
+        try:
+            self.stream.writelines(lines)
+        except IOError, e:
+            if self._process is None:
+                raise
+            raise errors.GitCommandError(self._process.returncode,
+                                         'git fast-import',
+                                         self._process.stderr.read())
+
     def _create_blob(self, content):
         self._counter += 1
-        self.stream.write('blob\n')
-        self.stream.write('mark :%d\n' % (self._counter,))
-        self.stream.write('data %d\n' % (len(content),))
-        self.stream.write(content)
-        self.stream.write('\n')
+        self._write('blob\n')
+        self._write('mark :%d\n' % (self._counter,))
+        self._write('data %d\n' % (len(content),))
+        self._write(content)
+        self._write('\n')
         return self._counter
 
     def set_file(self, path, content, executable):
@@ -99,29 +139,64 @@
         """This will delete files or symlinks at the given location."""
         self.commit_info.append('D %s\n' % (path.encode('utf-8'),))
 
+    # TODO: Author
+    # TODO: Author timestamp+timezone
     def commit(self, committer, message, timestamp=None,
-               timezone='+0000', author=None):
+               timezone='+0000', author=None,
+               merge=None, base=None):
+        """Commit the new content.
+
+        :param committer: The name and address for the committer
+        :param message: The commit message
+        :param timestamp: The timestamp for the commit
+        :param timezone: The timezone of the commit, such as '+0000' or '-1000'
+        :param author: The name and address of the author (if different from
+            committer)
+        :param merge: A list of marks if this should merge in another commit
+        :param base: An id for the base revision (primary parent) if that
+            is not the last commit.
+        :return: A mark which can be used in the future to reference this
+            commit.
+        """
         self._counter += 1
         mark = self._counter
-        self.stream.write('commit %s\n' % (branch,))
-        self.stream.write('mark :%d\n' % (mark,))
-        self.stream.write('committer %s %s %s\n'
-                          % (committer, timestamp, timezone))
+        if timestamp is None:
+            timestamp = int(time.time())
+        self._write('commit %s\n' % (self._branch,))
+        self._write('mark :%d\n' % (mark,))
+        self._write('committer %s %s %s\n'
+                    % (committer, timestamp, timezone))
         message = message.encode('UTF-8')
-        self.stream.write('data %d\n' % (len(message),))
-        self.stream.write(message)
-        self.stream.write('\n')
-        self.stream.writelines(self.commit_info)
-        self.stream.write('\n')
+        self._write('data %d\n' % (len(message),))
+        self._write(message)
+        self._write('\n')
+        if base is not None:
+            self._write('from :%d\n' % (base,))
+        if merge is not None:
+            for m in merge:
+                self._write('merge :%d\n' % (m,))
+        self._writelines(self.commit_info)
+        self._write('\n')
         self.commit_info = []
         return mark
 
-
-class GitBranchBuilder(object):
-    """This uses git-fast-import to build up something directly."""
-
-    def __init__(self, git_dir):
-        self.git_dir = git_dir
+    def finish(self):
+        """We are finished building, close the stream, get the id mapping"""
+        self.stream.close()
+        if self._process is None:
+            return {}
+        if self._process.wait() != 0:
+            raise errors.GitCommandError(self._process.returncode,
+                                         'git fast-import',
+                                         self._process.stderr.read())
+        self._marks_file.seek(0)
+        mapping = {}
+        for line in self._marks_file:
+            mark, shasum = line.split()
+            assert mark.startswith(':')
+            mapping[int(mark[1:])] = shasum
+        self._marks_file.close()
+        return mapping
 
 
 def test_suite():

=== modified file 'tests/test_builder.py'
--- a/tests/test_builder.py	2007-11-09 06:08:11 +0000
+++ b/tests/test_builder.py	2007-11-09 15:53:24 +0000
@@ -21,18 +21,18 @@
 from bzrlib.plugins.git import tests
 
 
-class TestCommitBuilder(tests.TestCase):
+class TestGitBranchBuilder(tests.TestCase):
 
     def test__create_blob(self):
         stream = StringIO()
-        builder = tests.GitCommitBuilder(stream)
+        builder = tests.GitBranchBuilder(stream)
         self.assertEqual(1, builder._create_blob('foo\nbar\n'))
         self.assertEqualDiff('blob\nmark :1\ndata 8\nfoo\nbar\n\n',
                              stream.getvalue())
 
     def test_set_file(self):
         stream = StringIO()
-        builder = tests.GitCommitBuilder(stream)
+        builder = tests.GitBranchBuilder(stream)
         builder.set_file('foobar', 'foo\nbar\n', False)
         self.assertEqualDiff('blob\nmark :1\ndata 8\nfoo\nbar\n\n',
                              stream.getvalue())
@@ -40,7 +40,7 @@
 
     def test_set_file_unicode(self):
         stream = StringIO()
-        builder = tests.GitCommitBuilder(stream)
+        builder = tests.GitBranchBuilder(stream)
         builder.set_file(u'f\xb5/bar', 'contents\nbar\n', False)
         self.assertEqualDiff('blob\nmark :1\ndata 13\ncontents\nbar\n\n',
                              stream.getvalue())
@@ -48,7 +48,7 @@
 
     def test_set_file_executable(self):
         stream = StringIO()
-        builder = tests.GitCommitBuilder(stream)
+        builder = tests.GitBranchBuilder(stream)
         builder.set_file(u'f\xb5/bar', 'contents\nbar\n', True)
         self.assertEqualDiff('blob\nmark :1\ndata 13\ncontents\nbar\n\n',
                              stream.getvalue())
@@ -56,7 +56,7 @@
 
     def test_set_link(self):
         stream = StringIO()
-        builder = tests.GitCommitBuilder(stream)
+        builder = tests.GitBranchBuilder(stream)
         builder.set_link(u'f\xb5/bar', 'link/contents')
         self.assertEqualDiff('blob\nmark :1\ndata 13\nlink/contents\n',
                              stream.getvalue())
@@ -64,17 +64,16 @@
 
     def test_delete_entry(self):
         stream = StringIO()
-        builder = tests.GitCommitBuilder(stream)
+        builder = tests.GitBranchBuilder(stream)
         builder.delete_entry(u'path/to/f\xb5')
         self.assertEqual(['D path/to/f\xc2\xb5\n'], builder.commit_info)
 
     def test_add_and_commit(self):
         stream = StringIO()
-        builder = tests.GitCommitBuilder(stream)
+        builder = tests.GitBranchBuilder(stream)
 
         builder.set_file(u'f\xb5/bar', 'contents\nbar\n', False)
-        self.assertEqual(2, builder.commit('refs/head/master',
-                                           'Joe Foo <joe at foo.com>',
+        self.assertEqual(2, builder.commit('Joe Foo <joe at foo.com>',
                                            u'committing f\xb5/bar',
                                            timestamp=1194586400,
                                            timezone='+0100'))
@@ -88,3 +87,112 @@
                              'M 100644 :1 f\xc2\xb5/bar\n'
                              '\n',
                              stream.getvalue())
+
+    def test_commit_base(self):
+        stream = StringIO()
+        builder = tests.GitBranchBuilder(stream)
+
+        builder.set_file(u'foo', 'contents\nfoo\n', False)
+        r1 = builder.commit('Joe Foo <joe at foo.com>', u'first',
+                            timestamp=1194586400)
+        r2 = builder.commit('Joe Foo <joe at foo.com>', u'second',
+                            timestamp=1194586405)
+        r3 = builder.commit('Joe Foo <joe at foo.com>', u'third',
+                            timestamp=1194586410,
+                            base=r1)
+
+        self.assertEqualDiff('blob\nmark :1\ndata 13\ncontents\nfoo\n\n'
+                             'commit refs/head/master\n'
+                             'mark :2\n'
+                             'committer Joe Foo <joe at foo.com> 1194586400 +0000\n'
+                             'data 5\n'
+                             'first'
+                             '\n'
+                             'M 100644 :1 foo\n'
+                             '\n'
+                             'commit refs/head/master\n'
+                             'mark :3\n'
+                             'committer Joe Foo <joe at foo.com> 1194586405 +0000\n'
+                             'data 6\n'
+                             'second'
+                             '\n'
+                             '\n'
+                             'commit refs/head/master\n'
+                             'mark :4\n'
+                             'committer Joe Foo <joe at foo.com> 1194586410 +0000\n'
+                             'data 5\n'
+                             'third'
+                             '\n'
+                             'from :2\n'
+                             '\n', stream.getvalue())
+
+    def test_commit_merge(self):
+        stream = StringIO()
+        builder = tests.GitBranchBuilder(stream)
+
+        builder.set_file(u'foo', 'contents\nfoo\n', False)
+        r1 = builder.commit('Joe Foo <joe at foo.com>', u'first',
+                            timestamp=1194586400)
+        r2 = builder.commit('Joe Foo <joe at foo.com>', u'second',
+                            timestamp=1194586405)
+        r3 = builder.commit('Joe Foo <joe at foo.com>', u'third',
+                            timestamp=1194586410,
+                            base=r1)
+        r4 = builder.commit('Joe Foo <joe at foo.com>', u'Merge',
+                            timestamp=1194586415,
+                            merge=[r2])
+
+        self.assertEqualDiff('blob\nmark :1\ndata 13\ncontents\nfoo\n\n'
+                             'commit refs/head/master\n'
+                             'mark :2\n'
+                             'committer Joe Foo <joe at foo.com> 1194586400 +0000\n'
+                             'data 5\n'
+                             'first'
+                             '\n'
+                             'M 100644 :1 foo\n'
+                             '\n'
+                             'commit refs/head/master\n'
+                             'mark :3\n'
+                             'committer Joe Foo <joe at foo.com> 1194586405 +0000\n'
+                             'data 6\n'
+                             'second'
+                             '\n'
+                             '\n'
+                             'commit refs/head/master\n'
+                             'mark :4\n'
+                             'committer Joe Foo <joe at foo.com> 1194586410 +0000\n'
+                             'data 5\n'
+                             'third'
+                             '\n'
+                             'from :2\n'
+                             '\n'
+                             'commit refs/head/master\n'
+                             'mark :5\n'
+                             'committer Joe Foo <joe at foo.com> 1194586415 +0000\n'
+                             'data 5\n'
+                             'Merge'
+                             '\n'
+                             'merge :3\n'
+                             '\n', stream.getvalue())
+
+    def test_auto_timestamp(self):
+        stream = StringIO()
+        builder = tests.GitBranchBuilder(stream)
+        builder.commit('Joe Foo <joe at foo.com>', u'message')
+        self.assertContainsRe(stream.getvalue(),
+                              r'committer Joe Foo <joe at foo\.com> \d+ \+0000')
+
+
+class TestGitBranchBuilderReal(tests.TestCaseInTempDir):
+
+    def test_create_real_branch(self):
+        tests.run_git('init')
+
+        builder = tests.GitBranchBuilder()
+        builder.set_file(u'foo', 'contents\nfoo\n', False)
+        r1 = builder.commit('Joe Foo <joe at foo.com>', u'first',
+                            timestamp=1194586400)
+        mapping = builder.finish()
+        self.assertEqual({1:'44411e8e9202177dd19b6599d7a7991059fa3cb4',
+                          2: 'b0b62e674f67306fddcf72fa888c3b56df100d64',
+                         }, mapping)

=== modified file 'tests/test_git_repository.py'
--- a/tests/test_git_repository.py	2007-11-09 04:53:15 +0000
+++ b/tests/test_git_repository.py	2007-11-09 15:53:24 +0000
@@ -46,28 +46,37 @@
 
     def test_revision_graph(self):
         tests.run_git('init')
-        self.build_tree(['a'])
-        tests.run_git('add', 'a')
-        tests.run_git('commit', '-m', 'a')
-        tests.run_git('branch', 'foo')
-        self.build_tree_contents([('a', 'new a\n')])
-        tests.run_git('commit', '-a', '-m', 'new a')
-        tests.run_git('checkout', 'foo')
-        self.build_tree(['b'])
-        tests.run_git('add', 'b')
-        tests.run_git('commit', '-m', 'b')
-        tests.run_git('merge', 'master')
-
-        revisions = tests.run_git('rev-list', '--topo-order', 'HEAD')
-        revisions = [ids.convert_revision_id_git_to_bzr(r)
-                     for r in revisions.splitlines()]
-        graph = {revisions[0]:[revisions[2], revisions[1]],
-                 revisions[1]:[revisions[3]],
-                 revisions[2]:[revisions[3]],
-                 revisions[3]:[],
+        builder = tests.GitBranchBuilder()
+        file_handle = builder.set_file('a', 'text for a\n', False)
+        commit1_handle = builder.commit('Joe Foo <joe at foo.com>', u'message')
+        file2_handle = builder.set_file('a', 'new a\n', False)
+        commit2_handle = builder.commit('Joe Foo <joe at foo.com>', u'new a')
+        file3_handle = builder.set_file('b', 'text for b\n', False)
+        commit3_handle = builder.commit('Jerry Bar <jerry at foo.com>', u'b',
+                                        base=commit1_handle)
+        commit4_handle = builder.commit('Jerry Bar <jerry at foo.com>', u'merge',
+                                        base=commit3_handle,
+                                        merge=[commit2_handle],)
+
+        mapping = builder.finish()
+        commit1_id = mapping[commit1_handle]
+        commit2_id = mapping[commit2_handle]
+        commit3_id = mapping[commit3_handle]
+        commit4_id = mapping[commit4_handle]
+
+        revisions = tests.run_git('rev-list', '--topo-order',
+                                  commit4_id)
+        revisions = revisions.splitlines()
+        self.assertEqual([commit4_id, commit2_id, commit3_id, commit1_id],
+                         revisions)
+        bzr_revisions = [ids.convert_revision_id_git_to_bzr(r) for r in revisions]
+        graph = {bzr_revisions[0]:[bzr_revisions[2], bzr_revisions[1]],
+                 bzr_revisions[1]:[bzr_revisions[3]],
+                 bzr_revisions[2]:[bzr_revisions[3]],
+                 bzr_revisions[3]:[],
                 }
 
         repo = repository.Repository.open('.')
-        self.assertEqual(graph, repo.get_revision_graph(revisions[0]))
-        self.assertEqual({revisions[3]:[]},
-                         repo.get_revision_graph(revisions[3]))
+        self.assertEqual(graph, repo.get_revision_graph(bzr_revisions[0]))
+        self.assertEqual({bzr_revisions[3]:[]},
+                         repo.get_revision_graph(bzr_revisions[3]))



More information about the bazaar-commits mailing list