[MERGE] Readable and properly encoded diff headers
Adeodato Simó
dato at net.com.org.es
Tue Aug 15 18:18:11 BST 2006
* John Arbash Meinel [Tue, 15 Aug 2006 11:06:23 -0500]:
> I really do believe you want to put the parameter on show_diff_trees().
> It isn't up to bzrlib/diff.py to determine what the output encoding is.
Okay, I've done this now.
> That is another thing that you aren't testing. Graceful degradation. If
> you have LANG=C, and you do a diff on the file 'جوجو.txt' what should be
> output?
Oh, thanks for spotting this. I now pass 'replace' to path.encode(), and
I've extended test_diff_label_modified to check for this as well.
Attaching bundles for these changes, present also the HTTP branch.
--
Adeodato Simó dato at net.com.org.es
Debian Developer adeodato at debian.org
Listening to: Leonard Cohen - The window
-------------- next part --------------
# Bazaar revision bundle v0.8
#
# message:
# Do not call get_terminal_encoding() in _show_diff_trees().
#
# Instead, get the preferred encoding for the paths in headers from the
# new path_encoding parameter, which cmd_diff().run sets to the terminal
# encoding now.
#
# Suggested by John Arbash Meinel after the first revision of the patch.
#
# committer: Adeodato Simó <dato at net.com.org.es>
# date: Tue 2006-08-15 18:57:09.581000090 +0200
=== modified file bzrlib/builtins.py
--- bzrlib/builtins.py
+++ bzrlib/builtins.py
@@ -1135,6 +1135,8 @@
prefix=None):
from bzrlib.diff import diff_cmd_helper, show_diff_trees
+ path_encoding = osutils.get_terminal_encoding()
+
if (prefix is None) or (prefix == '0'):
# diff -p0 format
old_label = ''
@@ -1177,11 +1179,13 @@
if (len(revision) == 1) or (revision[1].spec is None):
return diff_cmd_helper(tree1, file_list, diff_options,
revision[0],
- old_label=old_label, new_label=new_label)
+ old_label=old_label, new_label=new_label,
+ path_encoding=path_encoding)
elif len(revision) == 2:
return diff_cmd_helper(tree1, file_list, diff_options,
revision[0], revision[1],
- old_label=old_label, new_label=new_label)
+ old_label=old_label, new_label=new_label,
+ path_encoding=path_encoding)
else:
raise BzrCommandError('bzr diff --revision takes exactly one or two revision identifiers')
else:
@@ -1189,10 +1193,12 @@
return show_diff_trees(tree1, tree2, sys.stdout,
specific_files=file_list,
external_diff_options=diff_options,
- old_label=old_label, new_label=new_label)
+ old_label=old_label, new_label=new_label,
+ path_encoding=path_encoding)
else:
return diff_cmd_helper(tree1, file_list, diff_options,
- old_label=old_label, new_label=new_label)
+ old_label=old_label, new_label=new_label,
+ path_encoding=path_encoding)
class cmd_deleted(Command):
=== modified file bzrlib/diff.py
--- bzrlib/diff.py
+++ bzrlib/diff.py
@@ -244,7 +244,7 @@
def diff_cmd_helper(tree, specific_files, external_diff_options,
old_revision_spec=None, new_revision_spec=None,
- old_label='a/', new_label='b/'):
+ old_label='a/', new_label='b/', path_encoding='utf8'):
"""Helper for cmd_diff.
tree
@@ -292,13 +292,13 @@
return show_diff_trees(old_tree, new_tree, sys.stdout, specific_files,
external_diff_options,
old_label=old_label, new_label=new_label,
- extra_trees=extra_trees)
+ extra_trees=extra_trees, path_encoding=path_encoding)
def show_diff_trees(old_tree, new_tree, to_file, specific_files=None,
external_diff_options=None,
old_label='a/', new_label='b/',
- extra_trees=None):
+ extra_trees=None, path_encoding='utf8'):
"""Show in text form the changes from one tree to another.
to_files
@@ -317,7 +317,8 @@
return _show_diff_trees(old_tree, new_tree, to_file,
specific_files, external_diff_options,
old_label=old_label, new_label=new_label,
- extra_trees=extra_trees)
+ extra_trees=extra_trees,
+ path_encoding=path_encoding)
finally:
new_tree.unlock()
finally:
@@ -326,7 +327,8 @@
def _show_diff_trees(old_tree, new_tree, to_file,
specific_files, external_diff_options,
- old_label='a/', new_label='b/', extra_trees=None):
+ old_label='a/', new_label='b/', extra_trees=None,
+ path_encoding='utf8'):
# GNU Patch uses the epoch date to detect files that are being added
# or removed in a diff.
@@ -335,18 +337,16 @@
# TODO: Generation of pseudo-diffs for added/deleted files could
# be usefully made into a much faster special case.
- encoding = bzrlib.osutils.get_terminal_encoding()
-
if external_diff_options:
assert isinstance(external_diff_options, basestring)
opts = external_diff_options.split()
def diff_file(olab, olines, nlab, nlines, to_file):
external_diff(olab, olines, nlab, nlines, to_file, opts,
- path_encoding=encoding)
+ path_encoding=path_encoding)
else:
def diff_file(olab, olines, nlab, nlines, to_file):
internal_diff(olab, olines, nlab, nlines, to_file,
- path_encoding=encoding)
+ path_encoding=path_encoding)
delta = new_tree.changes_from(old_tree,
specific_files=specific_files,
@@ -355,7 +355,8 @@
has_changes = 0
for path, file_id, kind in delta.removed:
has_changes = 1
- print >>to_file, "=== removed %s '%s'" % (kind, path.encode(encoding))
+ print >>to_file, "=== removed %s '%s'" % (kind,
+ path.encode(path_encoding))
old_name = '%s%s\t%s' % (old_label, path,
_patch_header_date(old_tree, file_id, path))
new_name = '%s%s\t%s' % (new_label, path, EPOCH_DATE)
@@ -363,7 +364,8 @@
new_name, None, None, to_file)
for path, file_id, kind in delta.added:
has_changes = 1
- print >>to_file, "=== added %s '%s'" % (kind, path.encode(encoding))
+ print >>to_file, "=== added %s '%s'" % (kind,
+ path.encode(path_encoding))
old_name = '%s%s\t%s' % (old_label, path, EPOCH_DATE)
new_name = '%s%s\t%s' % (new_label, path,
_patch_header_date(new_tree, file_id, path))
@@ -374,8 +376,9 @@
text_modified, meta_modified) in delta.renamed:
has_changes = 1
prop_str = get_prop_change(meta_modified)
- print >>to_file, "=== renamed %s '%s' => '%s'%s" % (kind,
- old_path.encode(encoding), new_path.encode(encoding), prop_str)
+ print >>to_file, "=== renamed %s '%s' => '%s'%s" % (
+ kind, old_path.encode(path_encoding),
+ new_path.encode(path_encoding), prop_str)
old_name = '%s%s\t%s' % (old_label, old_path,
_patch_header_date(old_tree, file_id,
old_path))
@@ -389,7 +392,7 @@
has_changes = 1
prop_str = get_prop_change(meta_modified)
print >>to_file, "=== modified %s '%s'%s" % (kind,
- path.encode(encoding), prop_str)
+ path.encode(path_encoding), prop_str)
old_name = '%s%s\t%s' % (old_label, path,
_patch_header_date(old_tree, file_id, path))
new_name = '%s%s\t%s' % (new_label, path,
# revision id: dato at net.com.org.es-20060815165709-85a4640820ceaa36
# sha1: 174bb95f5372d6ea0f5c842c087e08044f338090
# inventory sha1: ffb8da855aee76e0b0ee310b4dbe1fe8f0aeb16b
# parent ids:
# dato at net.com.org.es-20060815141403-3d2bbbb5ddc15658
# base id: dato at net.com.org.es-20060815141403-3d2bbbb5ddc15658
# properties:
# branch-nick: fix_diff_header_encoding
-------------- next part --------------
# Bazaar revision bundle v0.8
#
# message:
# Pass 'replace' as argument when encoding paths in diff.py.
#
# committer: Adeodato Simó <dato at net.com.org.es>
# date: Tue 2006-08-15 19:14:01.638999939 +0200
=== modified file bzrlib/diff.py
--- bzrlib/diff.py
+++ bzrlib/diff.py
@@ -66,8 +66,8 @@
if sequence_matcher is None:
sequence_matcher = bzrlib.patiencediff.PatienceSequenceMatcher
ud = unified_diff(oldlines, newlines,
- fromfile=old_filename.encode(path_encoding),
- tofile=new_filename.encode(path_encoding),
+ fromfile=old_filename.encode(path_encoding, 'replace'),
+ tofile=new_filename.encode(path_encoding, 'replace'),
sequencematcher=sequence_matcher)
ud = list(ud)
@@ -116,9 +116,9 @@
if not diff_opts:
diff_opts = []
diffcmd = ['diff',
- '--label', old_filename.encode(path_encoding),
+ '--label', old_filename.encode(path_encoding, 'replace'),
old_abspath,
- '--label', new_filename.encode(path_encoding),
+ '--label', new_filename.encode(path_encoding, 'replace'),
new_abspath,
'--binary',
]
@@ -356,7 +356,7 @@
for path, file_id, kind in delta.removed:
has_changes = 1
print >>to_file, "=== removed %s '%s'" % (kind,
- path.encode(path_encoding))
+ path.encode(path_encoding, 'replace'))
old_name = '%s%s\t%s' % (old_label, path,
_patch_header_date(old_tree, file_id, path))
new_name = '%s%s\t%s' % (new_label, path, EPOCH_DATE)
@@ -365,7 +365,7 @@
for path, file_id, kind in delta.added:
has_changes = 1
print >>to_file, "=== added %s '%s'" % (kind,
- path.encode(path_encoding))
+ path.encode(path_encoding, 'replace'))
old_name = '%s%s\t%s' % (old_label, path, EPOCH_DATE)
new_name = '%s%s\t%s' % (new_label, path,
_patch_header_date(new_tree, file_id, path))
@@ -377,8 +377,8 @@
has_changes = 1
prop_str = get_prop_change(meta_modified)
print >>to_file, "=== renamed %s '%s' => '%s'%s" % (
- kind, old_path.encode(path_encoding),
- new_path.encode(path_encoding), prop_str)
+ kind, old_path.encode(path_encoding, 'replace'),
+ new_path.encode(path_encoding, 'replace'), prop_str)
old_name = '%s%s\t%s' % (old_label, old_path,
_patch_header_date(old_tree, file_id,
old_path))
@@ -392,7 +392,7 @@
has_changes = 1
prop_str = get_prop_change(meta_modified)
print >>to_file, "=== modified %s '%s'%s" % (kind,
- path.encode(path_encoding), prop_str)
+ path.encode(path_encoding, 'replace'), prop_str)
old_name = '%s%s\t%s' % (old_label, path,
_patch_header_date(old_tree, file_id, path))
new_name = '%s%s\t%s' % (new_label, path,
=== modified file bzrlib/tests/blackbox/test_non_ascii.py
--- bzrlib/tests/blackbox/test_non_ascii.py
+++ bzrlib/tests/blackbox/test_non_ascii.py
@@ -518,11 +518,16 @@
self.assertEqual(-1, txt.find(msg))
self.assertNotEqual(-1, txt.find(msg.encode('ascii', 'replace')))
- def get_diff_header_lines(self, retcode=1, external=False):
+ def get_diff_header_lines(self, retcode=1, external=False, encoding=None):
command = [ 'diff' ]
if external:
command.extend(' --diff-options -N'.split())
- return self.run_bzr_decode(retcode=retcode, *command).splitlines()[0:3]
+
+ kwargs = { 'retcode': retcode }
+ if encoding is not None:
+ kwargs['encoding'] = encoding
+
+ return self.run_bzr_decode(*command, **kwargs).splitlines()[0:3]
def test_diff_label_removed(self):
fname = self.info['filename']
@@ -549,6 +554,7 @@
def test_diff_label_modified(self):
fname = self.info['filename']
+ fname_ascii = fname.encode('ascii', 'replace')
file(fname, 'a').write('\n')
for external in (True, False):
@@ -557,3 +563,10 @@
self.assertEqual("=== modified file '%s'" % fname, lines[0])
self.failUnless(lines[1].startswith("--- %s" % fname))
self.failUnless(lines[2].startswith("+++ %s" % fname))
+
+ lines = self.get_diff_header_lines(external=external, encoding='ascii')
+
+ self.assertEqual("=== modified file '%s'" % fname_ascii, lines[0])
+ self.failUnless(lines[1].startswith("--- %s" % fname_ascii))
+ self.failUnless(lines[2].startswith("+++ %s" % fname_ascii))
+
# revision id: dato at net.com.org.es-20060815171401-48ad9dbe668fea0e
# sha1: 5aac123729c0fdd17362c301303eb6c02b6ad17b
# inventory sha1: 0559d5a224becb690afbb1e63a65e3a4e5e7530e
# parent ids:
# dato at net.com.org.es-20060815165709-85a4640820ceaa36
# base id: dato at net.com.org.es-20060815165709-85a4640820ceaa36
# properties:
# branch-nick: fix_diff_header_encoding
More information about the bazaar
mailing list