Rev 23: Clean up the builder, start using it for big speed gains. in http://bzr.arbash-meinel.com/plugins/git
John Arbash Meinel
john at arbash-meinel.com
Fri Nov 9 15:53:31 GMT 2007
At http://bzr.arbash-meinel.com/plugins/git
------------------------------------------------------------
revno: 23
revision-id:john at arbash-meinel.com-20071109155324-kg06nis3idelqva0
parent: john at arbash-meinel.com-20071109060811-toeslo9r1gnn24rr
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: git
timestamp: Fri 2007-11-09 10:53:24 -0500
message:
Clean up the builder, start using it for big speed gains.
modified:
tests/__init__.py __init__.py-20070202180350-njrb42t7fnv35d1k-2
tests/test_builder.py test_builder.py-20071109060756-yqmwmyqdr3pqeh3s-1
tests/test_git_repository.py test_git_repository.-20071108234408-ygidvy5hviixghsd-5
-------------- next part --------------
=== modified file 'tests/__init__.py'
--- a/tests/__init__.py 2007-11-09 06:08:11 +0000
+++ b/tests/__init__.py 2007-11-09 15:53:24 +0000
@@ -18,11 +18,13 @@
import subprocess
import time
+import tempfile
from bzrlib import (
tests,
trace,
)
+from bzrlib.plugins.git.gitlib import errors
TestCase = tests.TestCase
TestCaseInTempDir = tests.TestCaseInTempDir
@@ -60,23 +62,61 @@
class GitBranchBuilder(object):
- def __init__(self, stream):
+ def __init__(self, stream=None):
self.commit_info = []
self.stream = stream
+ self._process = None
self._counter = 0
self._branch = 'refs/head/master'
+ if stream is None:
+ self._marks_file = tempfile.NamedTemporaryFile(
+ prefix='tmp-git-marks')
+ self._process = subprocess.Popen(
+ ['git', 'fast-import', '--quiet',
+ # GIT doesn't support '--export-marks foo'
+ # it only supports '--export-marks=foo'
+ # And gives a 'unknown option' otherwise.
+ '--export-marks='+self._marks_file.name,
+ ],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ stdin=subprocess.PIPE,
+ )
+ self.stream = self._process.stdin
+ else:
+ self._process = None
def set_branch(self, branch):
"""Set the branch we are committing."""
self._branch = branch
+ def _write(self, text):
+ try:
+ self.stream.write(text)
+ except IOError, e:
+ if self._process is None:
+ raise
+ raise errors.GitCommandError(self._process.returncode,
+ 'git fast-import',
+ self._process.stderr.read())
+
+ def _writelines(self, lines):
+ try:
+ self.stream.writelines(lines)
+ except IOError, e:
+ if self._process is None:
+ raise
+ raise errors.GitCommandError(self._process.returncode,
+ 'git fast-import',
+ self._process.stderr.read())
+
def _create_blob(self, content):
self._counter += 1
- self.stream.write('blob\n')
- self.stream.write('mark :%d\n' % (self._counter,))
- self.stream.write('data %d\n' % (len(content),))
- self.stream.write(content)
- self.stream.write('\n')
+ self._write('blob\n')
+ self._write('mark :%d\n' % (self._counter,))
+ self._write('data %d\n' % (len(content),))
+ self._write(content)
+ self._write('\n')
return self._counter
def set_file(self, path, content, executable):
@@ -99,29 +139,64 @@
"""This will delete files or symlinks at the given location."""
self.commit_info.append('D %s\n' % (path.encode('utf-8'),))
+ # TODO: Author
+ # TODO: Author timestamp+timezone
def commit(self, committer, message, timestamp=None,
- timezone='+0000', author=None):
+ timezone='+0000', author=None,
+ merge=None, base=None):
+ """Commit the new content.
+
+ :param committer: The name and address for the committer
+ :param message: The commit message
+ :param timestamp: The timestamp for the commit
+ :param timezone: The timezone of the commit, such as '+0000' or '-1000'
+ :param author: The name and address of the author (if different from
+ committer)
+ :param merge: A list of marks if this should merge in another commit
+ :param base: An id for the base revision (primary parent) if that
+ is not the last commit.
+ :return: A mark which can be used in the future to reference this
+ commit.
+ """
self._counter += 1
mark = self._counter
- self.stream.write('commit %s\n' % (branch,))
- self.stream.write('mark :%d\n' % (mark,))
- self.stream.write('committer %s %s %s\n'
- % (committer, timestamp, timezone))
+ if timestamp is None:
+ timestamp = int(time.time())
+ self._write('commit %s\n' % (self._branch,))
+ self._write('mark :%d\n' % (mark,))
+ self._write('committer %s %s %s\n'
+ % (committer, timestamp, timezone))
message = message.encode('UTF-8')
- self.stream.write('data %d\n' % (len(message),))
- self.stream.write(message)
- self.stream.write('\n')
- self.stream.writelines(self.commit_info)
- self.stream.write('\n')
+ self._write('data %d\n' % (len(message),))
+ self._write(message)
+ self._write('\n')
+ if base is not None:
+ self._write('from :%d\n' % (base,))
+ if merge is not None:
+ for m in merge:
+ self._write('merge :%d\n' % (m,))
+ self._writelines(self.commit_info)
+ self._write('\n')
self.commit_info = []
return mark
-
-class GitBranchBuilder(object):
- """This uses git-fast-import to build up something directly."""
-
- def __init__(self, git_dir):
- self.git_dir = git_dir
+ def finish(self):
+ """We are finished building, close the stream, get the id mapping"""
+ self.stream.close()
+ if self._process is None:
+ return {}
+ if self._process.wait() != 0:
+ raise errors.GitCommandError(self._process.returncode,
+ 'git fast-import',
+ self._process.stderr.read())
+ self._marks_file.seek(0)
+ mapping = {}
+ for line in self._marks_file:
+ mark, shasum = line.split()
+ assert mark.startswith(':')
+ mapping[int(mark[1:])] = shasum
+ self._marks_file.close()
+ return mapping
def test_suite():
=== modified file 'tests/test_builder.py'
--- a/tests/test_builder.py 2007-11-09 06:08:11 +0000
+++ b/tests/test_builder.py 2007-11-09 15:53:24 +0000
@@ -21,18 +21,18 @@
from bzrlib.plugins.git import tests
-class TestCommitBuilder(tests.TestCase):
+class TestGitBranchBuilder(tests.TestCase):
def test__create_blob(self):
stream = StringIO()
- builder = tests.GitCommitBuilder(stream)
+ builder = tests.GitBranchBuilder(stream)
self.assertEqual(1, builder._create_blob('foo\nbar\n'))
self.assertEqualDiff('blob\nmark :1\ndata 8\nfoo\nbar\n\n',
stream.getvalue())
def test_set_file(self):
stream = StringIO()
- builder = tests.GitCommitBuilder(stream)
+ builder = tests.GitBranchBuilder(stream)
builder.set_file('foobar', 'foo\nbar\n', False)
self.assertEqualDiff('blob\nmark :1\ndata 8\nfoo\nbar\n\n',
stream.getvalue())
@@ -40,7 +40,7 @@
def test_set_file_unicode(self):
stream = StringIO()
- builder = tests.GitCommitBuilder(stream)
+ builder = tests.GitBranchBuilder(stream)
builder.set_file(u'f\xb5/bar', 'contents\nbar\n', False)
self.assertEqualDiff('blob\nmark :1\ndata 13\ncontents\nbar\n\n',
stream.getvalue())
@@ -48,7 +48,7 @@
def test_set_file_executable(self):
stream = StringIO()
- builder = tests.GitCommitBuilder(stream)
+ builder = tests.GitBranchBuilder(stream)
builder.set_file(u'f\xb5/bar', 'contents\nbar\n', True)
self.assertEqualDiff('blob\nmark :1\ndata 13\ncontents\nbar\n\n',
stream.getvalue())
@@ -56,7 +56,7 @@
def test_set_link(self):
stream = StringIO()
- builder = tests.GitCommitBuilder(stream)
+ builder = tests.GitBranchBuilder(stream)
builder.set_link(u'f\xb5/bar', 'link/contents')
self.assertEqualDiff('blob\nmark :1\ndata 13\nlink/contents\n',
stream.getvalue())
@@ -64,17 +64,16 @@
def test_delete_entry(self):
stream = StringIO()
- builder = tests.GitCommitBuilder(stream)
+ builder = tests.GitBranchBuilder(stream)
builder.delete_entry(u'path/to/f\xb5')
self.assertEqual(['D path/to/f\xc2\xb5\n'], builder.commit_info)
def test_add_and_commit(self):
stream = StringIO()
- builder = tests.GitCommitBuilder(stream)
+ builder = tests.GitBranchBuilder(stream)
builder.set_file(u'f\xb5/bar', 'contents\nbar\n', False)
- self.assertEqual(2, builder.commit('refs/head/master',
- 'Joe Foo <joe at foo.com>',
+ self.assertEqual(2, builder.commit('Joe Foo <joe at foo.com>',
u'committing f\xb5/bar',
timestamp=1194586400,
timezone='+0100'))
@@ -88,3 +87,112 @@
'M 100644 :1 f\xc2\xb5/bar\n'
'\n',
stream.getvalue())
+
+ def test_commit_base(self):
+ stream = StringIO()
+ builder = tests.GitBranchBuilder(stream)
+
+ builder.set_file(u'foo', 'contents\nfoo\n', False)
+ r1 = builder.commit('Joe Foo <joe at foo.com>', u'first',
+ timestamp=1194586400)
+ r2 = builder.commit('Joe Foo <joe at foo.com>', u'second',
+ timestamp=1194586405)
+ r3 = builder.commit('Joe Foo <joe at foo.com>', u'third',
+ timestamp=1194586410,
+ base=r1)
+
+ self.assertEqualDiff('blob\nmark :1\ndata 13\ncontents\nfoo\n\n'
+ 'commit refs/head/master\n'
+ 'mark :2\n'
+ 'committer Joe Foo <joe at foo.com> 1194586400 +0000\n'
+ 'data 5\n'
+ 'first'
+ '\n'
+ 'M 100644 :1 foo\n'
+ '\n'
+ 'commit refs/head/master\n'
+ 'mark :3\n'
+ 'committer Joe Foo <joe at foo.com> 1194586405 +0000\n'
+ 'data 6\n'
+ 'second'
+ '\n'
+ '\n'
+ 'commit refs/head/master\n'
+ 'mark :4\n'
+ 'committer Joe Foo <joe at foo.com> 1194586410 +0000\n'
+ 'data 5\n'
+ 'third'
+ '\n'
+ 'from :2\n'
+ '\n', stream.getvalue())
+
+ def test_commit_merge(self):
+ stream = StringIO()
+ builder = tests.GitBranchBuilder(stream)
+
+ builder.set_file(u'foo', 'contents\nfoo\n', False)
+ r1 = builder.commit('Joe Foo <joe at foo.com>', u'first',
+ timestamp=1194586400)
+ r2 = builder.commit('Joe Foo <joe at foo.com>', u'second',
+ timestamp=1194586405)
+ r3 = builder.commit('Joe Foo <joe at foo.com>', u'third',
+ timestamp=1194586410,
+ base=r1)
+ r4 = builder.commit('Joe Foo <joe at foo.com>', u'Merge',
+ timestamp=1194586415,
+ merge=[r2])
+
+ self.assertEqualDiff('blob\nmark :1\ndata 13\ncontents\nfoo\n\n'
+ 'commit refs/head/master\n'
+ 'mark :2\n'
+ 'committer Joe Foo <joe at foo.com> 1194586400 +0000\n'
+ 'data 5\n'
+ 'first'
+ '\n'
+ 'M 100644 :1 foo\n'
+ '\n'
+ 'commit refs/head/master\n'
+ 'mark :3\n'
+ 'committer Joe Foo <joe at foo.com> 1194586405 +0000\n'
+ 'data 6\n'
+ 'second'
+ '\n'
+ '\n'
+ 'commit refs/head/master\n'
+ 'mark :4\n'
+ 'committer Joe Foo <joe at foo.com> 1194586410 +0000\n'
+ 'data 5\n'
+ 'third'
+ '\n'
+ 'from :2\n'
+ '\n'
+ 'commit refs/head/master\n'
+ 'mark :5\n'
+ 'committer Joe Foo <joe at foo.com> 1194586415 +0000\n'
+ 'data 5\n'
+ 'Merge'
+ '\n'
+ 'merge :3\n'
+ '\n', stream.getvalue())
+
+ def test_auto_timestamp(self):
+ stream = StringIO()
+ builder = tests.GitBranchBuilder(stream)
+ builder.commit('Joe Foo <joe at foo.com>', u'message')
+ self.assertContainsRe(stream.getvalue(),
+ r'committer Joe Foo <joe at foo\.com> \d+ \+0000')
+
+
+class TestGitBranchBuilderReal(tests.TestCaseInTempDir):
+
+ def test_create_real_branch(self):
+ tests.run_git('init')
+
+ builder = tests.GitBranchBuilder()
+ builder.set_file(u'foo', 'contents\nfoo\n', False)
+ r1 = builder.commit('Joe Foo <joe at foo.com>', u'first',
+ timestamp=1194586400)
+ mapping = builder.finish()
+ self.assertEqual({1:'44411e8e9202177dd19b6599d7a7991059fa3cb4',
+ 2: 'b0b62e674f67306fddcf72fa888c3b56df100d64',
+ }, mapping)
=== modified file 'tests/test_git_repository.py'
--- a/tests/test_git_repository.py 2007-11-09 04:53:15 +0000
+++ b/tests/test_git_repository.py 2007-11-09 15:53:24 +0000
@@ -46,28 +46,37 @@
def test_revision_graph(self):
tests.run_git('init')
- self.build_tree(['a'])
- tests.run_git('add', 'a')
- tests.run_git('commit', '-m', 'a')
- tests.run_git('branch', 'foo')
- self.build_tree_contents([('a', 'new a\n')])
- tests.run_git('commit', '-a', '-m', 'new a')
- tests.run_git('checkout', 'foo')
- self.build_tree(['b'])
- tests.run_git('add', 'b')
- tests.run_git('commit', '-m', 'b')
- tests.run_git('merge', 'master')
-
- revisions = tests.run_git('rev-list', '--topo-order', 'HEAD')
- revisions = [ids.convert_revision_id_git_to_bzr(r)
- for r in revisions.splitlines()]
- graph = {revisions[0]:[revisions[2], revisions[1]],
- revisions[1]:[revisions[3]],
- revisions[2]:[revisions[3]],
- revisions[3]:[],
+ builder = tests.GitBranchBuilder()
+ file_handle = builder.set_file('a', 'text for a\n', False)
+ commit1_handle = builder.commit('Joe Foo <joe at foo.com>', u'message')
+ file2_handle = builder.set_file('a', 'new a\n', False)
+ commit2_handle = builder.commit('Joe Foo <joe at foo.com>', u'new a')
+ file3_handle = builder.set_file('b', 'text for b\n', False)
+ commit3_handle = builder.commit('Jerry Bar <jerry at foo.com>', u'b',
+ base=commit1_handle)
+ commit4_handle = builder.commit('Jerry Bar <jerry at foo.com>', u'merge',
+ base=commit3_handle,
+ merge=[commit2_handle],)
+
+ mapping = builder.finish()
+ commit1_id = mapping[commit1_handle]
+ commit2_id = mapping[commit2_handle]
+ commit3_id = mapping[commit3_handle]
+ commit4_id = mapping[commit4_handle]
+
+ revisions = tests.run_git('rev-list', '--topo-order',
+ commit4_id)
+ revisions = revisions.splitlines()
+ self.assertEqual([commit4_id, commit2_id, commit3_id, commit1_id],
+ revisions)
+ bzr_revisions = [ids.convert_revision_id_git_to_bzr(r) for r in revisions]
+ graph = {bzr_revisions[0]:[bzr_revisions[2], bzr_revisions[1]],
+ bzr_revisions[1]:[bzr_revisions[3]],
+ bzr_revisions[2]:[bzr_revisions[3]],
+ bzr_revisions[3]:[],
}
repo = repository.Repository.open('.')
- self.assertEqual(graph, repo.get_revision_graph(revisions[0]))
- self.assertEqual({revisions[3]:[]},
- repo.get_revision_graph(revisions[3]))
+ self.assertEqual(graph, repo.get_revision_graph(bzr_revisions[0]))
+ self.assertEqual({bzr_revisions[3]:[]},
+ repo.get_revision_graph(bzr_revisions[3]))
More information about the bazaar-commits
mailing list