[MERGE] Readable and properly encoded diff headers
Adeodato Simó
dato at net.com.org.es
Tue Aug 15 15:15:33 BST 2006
Hi all.
While working on the commit_templates branch I noticed that the ===
headers as printed by `bzr diff` are not readable for non-ascii
filenames, since %r is used to format them. I created a branch some days
ago to fix this, and with some bits of advice from John yesterday, I
think I got it fixed now.
The attached patch changes %r to '%s' in the headers, and also encodes
the filenames in the proper encoding, not hardcoded 'utf8'. To achieve
the same for the ---/+++ headers, a wrapper around internal_diff() is
also created, and a path_encoding option is introduced for external_diff()
(maybe this last one was unnecessary and Popen encodes it to the proper
encoding, but without it it can't be tested).
Seeking +1 and inclusion in 0.10.
Mergeable URL:
http://people.debian.org/~adeodato/code/branches/bzr/fix_diff_header_encoding
Thanks,
--
Adeodato Simó dato at net.com.org.es
Debian Developer adeodato at debian.org
Listening to: Franz Ferdinand - This Fire
-------------- next part --------------
# Bazaar revision bundle v0.8
#
# message:
# Make `bzr diff` headers readable and properly encoded.
#
# _show_diff_trees():
# - use '%s' instead of %r to format filenames.
# - encode filenames to get_terminal_encoding() instead of hard-coded 'utf8'.
# - wrap internal_diff() with a diff_file() that sets path_encoding.
# - pass path_encoding parameter to external_diff().
#
# external_diff():
# - add path_encoding parameter, like internal_diff has.
#
# committer: Adeodato Simó <dato at net.com.org.es>
# date: Tue 2006-08-15 16:14:03.522000074 +0200
=== modified file bzrlib/diff.py
--- bzrlib/diff.py
+++ bzrlib/diff.py
@@ -89,7 +89,7 @@
def external_diff(old_filename, oldlines, new_filename, newlines, to_file,
- diff_opts):
+ diff_opts, path_encoding='utf8'):
"""Display a diff by calling out to the external diff program."""
# make sure our own output is properly ordered before the diff
to_file.flush()
@@ -116,9 +116,9 @@
if not diff_opts:
diff_opts = []
diffcmd = ['diff',
- '--label', old_filename,
+ '--label', old_filename.encode(path_encoding),
old_abspath,
- '--label', new_filename,
+ '--label', new_filename.encode(path_encoding),
new_abspath,
'--binary',
]
@@ -335,13 +335,18 @@
# TODO: Generation of pseudo-diffs for added/deleted files could
# be usefully made into a much faster special case.
+ encoding = bzrlib.osutils.get_terminal_encoding()
+
if external_diff_options:
assert isinstance(external_diff_options, basestring)
opts = external_diff_options.split()
def diff_file(olab, olines, nlab, nlines, to_file):
- external_diff(olab, olines, nlab, nlines, to_file, opts)
+ external_diff(olab, olines, nlab, nlines, to_file, opts,
+ path_encoding=encoding)
else:
- diff_file = internal_diff
+ def diff_file(olab, olines, nlab, nlines, to_file):
+ internal_diff(olab, olines, nlab, nlines, to_file,
+ path_encoding=encoding)
delta = new_tree.changes_from(old_tree,
specific_files=specific_files,
@@ -350,7 +355,7 @@
has_changes = 0
for path, file_id, kind in delta.removed:
has_changes = 1
- print >>to_file, '=== removed %s %r' % (kind, path.encode('utf8'))
+ print >>to_file, "=== removed %s '%s'" % (kind, path.encode(encoding))
old_name = '%s%s\t%s' % (old_label, path,
_patch_header_date(old_tree, file_id, path))
new_name = '%s%s\t%s' % (new_label, path, EPOCH_DATE)
@@ -358,7 +363,7 @@
new_name, None, None, to_file)
for path, file_id, kind in delta.added:
has_changes = 1
- print >>to_file, '=== added %s %r' % (kind, path.encode('utf8'))
+ print >>to_file, "=== added %s '%s'" % (kind, path.encode(encoding))
old_name = '%s%s\t%s' % (old_label, path, EPOCH_DATE)
new_name = '%s%s\t%s' % (new_label, path,
_patch_header_date(new_tree, file_id, path))
@@ -369,9 +374,8 @@
text_modified, meta_modified) in delta.renamed:
has_changes = 1
prop_str = get_prop_change(meta_modified)
- print >>to_file, '=== renamed %s %r => %r%s' % (
- kind, old_path.encode('utf8'),
- new_path.encode('utf8'), prop_str)
+ print >>to_file, "=== renamed %s '%s' => '%s'%s" % (kind,
+ old_path.encode(encoding), new_path.encode(encoding), prop_str)
old_name = '%s%s\t%s' % (old_label, old_path,
_patch_header_date(old_tree, file_id,
old_path))
@@ -384,7 +388,8 @@
for path, file_id, kind, text_modified, meta_modified in delta.modified:
has_changes = 1
prop_str = get_prop_change(meta_modified)
- print >>to_file, '=== modified %s %r%s' % (kind, path.encode('utf8'), prop_str)
+ print >>to_file, "=== modified %s '%s'%s" % (kind,
+ path.encode(encoding), prop_str)
old_name = '%s%s\t%s' % (old_label, path,
_patch_header_date(old_tree, file_id, path))
new_name = '%s%s\t%s' % (new_label, path,
=== modified file bzrlib/tests/blackbox/test_non_ascii.py
--- bzrlib/tests/blackbox/test_non_ascii.py
+++ bzrlib/tests/blackbox/test_non_ascii.py
@@ -342,13 +342,6 @@
# so there is nothing which would fail in ascii encoding
txt = bzr('ancestry')
- def test_diff(self):
- # TODO: jam 20060106 diff is a difficult one to test, because it
- # shouldn't encode the file contents, but it needs some sort
- # of encoding for the paths, etc which are displayed.
- open(self.info['filename'], 'ab').write('newline\n')
- txt = self.run_bzr('diff', retcode=1)[0]
-
def test_deleted(self):
bzr = self.run_bzr_decode
@@ -524,3 +517,43 @@
txt = bzr('missing', 'empty-tree', encoding='ascii', retcode=1)
self.assertEqual(-1, txt.find(msg))
self.assertNotEqual(-1, txt.find(msg.encode('ascii', 'replace')))
+
+ def get_diff_header_lines(self, retcode=1, external=False):
+ command = [ 'diff' ]
+ if external:
+ command.extend(' --diff-options -N'.split())
+ return self.run_bzr_decode(retcode=retcode, *command).splitlines()[0:3]
+
+ def test_diff_label_removed(self):
+ fname = self.info['filename']
+ self.wt.remove(fname)
+ lines = self.get_diff_header_lines()
+
+ self.assertEqual("=== removed file '%s'" % fname, lines[0])
+
+ def test_diff_label_added(self):
+ fname = self.info['filename'] + '-2'
+ file(fname, 'wt').write('\n')
+ self.wt.add(fname)
+ lines = self.get_diff_header_lines()
+
+ self.assertEqual("=== added file '%s'" % fname, lines[0])
+
+ def test_diff_label_renamed(self):
+ fname1 = self.info['filename']
+ fname2 = self.info['filename'] + '-2'
+ self.run_bzr('rename', fname1, fname2)
+ lines = self.get_diff_header_lines()
+
+ self.assertEqual("=== renamed file '%s' => '%s'" % (fname1, fname2), lines[0])
+
+ def test_diff_label_modified(self):
+ fname = self.info['filename']
+ file(fname, 'a').write('\n')
+
+ for external in (True, False):
+ lines = self.get_diff_header_lines(external=external)
+
+ self.assertEqual("=== modified file '%s'" % fname, lines[0])
+ self.failUnless(lines[1].startswith("--- %s" % fname))
+ self.failUnless(lines[2].startswith("+++ %s" % fname))
# revision id: dato at net.com.org.es-20060815141403-3d2bbbb5ddc15658
# sha1: 80b6315df7f5851847649763889302ccffd3760c
# inventory sha1: 602533dc1e8dad2f094cd70557e3f04a4d5f0423
# parent ids:
# pqm at pqm.ubuntu.com-20060815131500-a6ba92a17caa4aab
# base id: pqm at pqm.ubuntu.com-20060815131500-a6ba92a17caa4aab
# properties:
# branch-nick: fix_diff_header_encoding
More information about the bazaar
mailing list