[MERGE] Readable and properly encoded diff headers

Adeodato Simó dato at net.com.org.es
Tue Aug 15 15:15:33 BST 2006


Hi all.

While working on the commit_templates branch I noticed that the ===
headers as printed by `bzr diff` are not readable for non-ascii
filenames, since %r is used to format them. I created a branch some days
ago to fix this, and with some bits of advice from John yesterday, I
think I got it fixed now.

The attached patch changes %r to '%s' in the headers, and also encodes
the filenames in the proper encoding, not hardcoded 'utf8'. To achieve
the same for the ---/+++ headers, a wrapper around internal_diff() is
also created, and a path_encoding option is introduced for external_diff()
(maybe this last one was unnecessary and Popen encodes it to the proper
encoding, but without it it can't be tested).

Seeking +1 and inclusion in 0.10.

Mergeable URL:

  http://people.debian.org/~adeodato/code/branches/bzr/fix_diff_header_encoding

Thanks,

-- 
Adeodato Simó                                     dato at net.com.org.es
Debian Developer                                  adeodato at debian.org
 
                               Listening to: Franz Ferdinand - This Fire
-------------- next part --------------
# Bazaar revision bundle v0.8
#
# message:
#   Make `bzr diff` headers readable and properly encoded.
#   
#   _show_diff_trees(): 
#     - use '%s' instead of %r to format filenames.
#     - encode filenames to get_terminal_encoding() instead of hard-coded 'utf8'.
#     - wrap internal_diff() with a diff_file() that sets path_encoding.
#     - pass path_encoding parameter to external_diff().
#   
#   external_diff():
#     - add path_encoding parameter, like internal_diff has.
#   
# committer: Adeodato Simó <dato at net.com.org.es>
# date: Tue 2006-08-15 16:14:03.522000074 +0200

=== modified file bzrlib/diff.py
--- bzrlib/diff.py
+++ bzrlib/diff.py
@@ -89,7 +89,7 @@
 
 
 def external_diff(old_filename, oldlines, new_filename, newlines, to_file,
-                  diff_opts):
+                  diff_opts, path_encoding='utf8'):
     """Display a diff by calling out to the external diff program."""
     # make sure our own output is properly ordered before the diff
     to_file.flush()
@@ -116,9 +116,9 @@
         if not diff_opts:
             diff_opts = []
         diffcmd = ['diff',
-                   '--label', old_filename,
+                   '--label', old_filename.encode(path_encoding),
                    old_abspath,
-                   '--label', new_filename,
+                   '--label', new_filename.encode(path_encoding),
                    new_abspath,
                    '--binary',
                   ]
@@ -335,13 +335,18 @@
     # TODO: Generation of pseudo-diffs for added/deleted files could
     # be usefully made into a much faster special case.
 
+    encoding = bzrlib.osutils.get_terminal_encoding()
+
     if external_diff_options:
         assert isinstance(external_diff_options, basestring)
         opts = external_diff_options.split()
         def diff_file(olab, olines, nlab, nlines, to_file):
-            external_diff(olab, olines, nlab, nlines, to_file, opts)
+            external_diff(olab, olines, nlab, nlines, to_file, opts,
+                    path_encoding=encoding)
     else:
-        diff_file = internal_diff
+        def diff_file(olab, olines, nlab, nlines, to_file):
+            internal_diff(olab, olines, nlab, nlines, to_file,
+                    path_encoding=encoding)
     
     delta = new_tree.changes_from(old_tree,
         specific_files=specific_files,
@@ -350,7 +355,7 @@
     has_changes = 0
     for path, file_id, kind in delta.removed:
         has_changes = 1
-        print >>to_file, '=== removed %s %r' % (kind, path.encode('utf8'))
+        print >>to_file, "=== removed %s '%s'" % (kind, path.encode(encoding))
         old_name = '%s%s\t%s' % (old_label, path,
                                  _patch_header_date(old_tree, file_id, path))
         new_name = '%s%s\t%s' % (new_label, path, EPOCH_DATE)
@@ -358,7 +363,7 @@
                                          new_name, None, None, to_file)
     for path, file_id, kind in delta.added:
         has_changes = 1
-        print >>to_file, '=== added %s %r' % (kind, path.encode('utf8'))
+        print >>to_file, "=== added %s '%s'" % (kind, path.encode(encoding))
         old_name = '%s%s\t%s' % (old_label, path, EPOCH_DATE)
         new_name = '%s%s\t%s' % (new_label, path,
                                  _patch_header_date(new_tree, file_id, path))
@@ -369,9 +374,8 @@
          text_modified, meta_modified) in delta.renamed:
         has_changes = 1
         prop_str = get_prop_change(meta_modified)
-        print >>to_file, '=== renamed %s %r => %r%s' % (
-                    kind, old_path.encode('utf8'),
-                    new_path.encode('utf8'), prop_str)
+        print >>to_file, "=== renamed %s '%s' => '%s'%s" % (kind,
+                old_path.encode(encoding), new_path.encode(encoding), prop_str)
         old_name = '%s%s\t%s' % (old_label, old_path,
                                  _patch_header_date(old_tree, file_id,
                                                     old_path))
@@ -384,7 +388,8 @@
     for path, file_id, kind, text_modified, meta_modified in delta.modified:
         has_changes = 1
         prop_str = get_prop_change(meta_modified)
-        print >>to_file, '=== modified %s %r%s' % (kind, path.encode('utf8'), prop_str)
+        print >>to_file, "=== modified %s '%s'%s" % (kind,
+                path.encode(encoding), prop_str)
         old_name = '%s%s\t%s' % (old_label, path,
                                  _patch_header_date(old_tree, file_id, path))
         new_name = '%s%s\t%s' % (new_label, path,

=== modified file bzrlib/tests/blackbox/test_non_ascii.py
--- bzrlib/tests/blackbox/test_non_ascii.py
+++ bzrlib/tests/blackbox/test_non_ascii.py
@@ -342,13 +342,6 @@
         #       so there is nothing which would fail in ascii encoding
         txt = bzr('ancestry')
 
-    def test_diff(self):
-        # TODO: jam 20060106 diff is a difficult one to test, because it 
-        #       shouldn't encode the file contents, but it needs some sort
-        #       of encoding for the paths, etc which are displayed.
-        open(self.info['filename'], 'ab').write('newline\n')
-        txt = self.run_bzr('diff', retcode=1)[0]
-
     def test_deleted(self):
         bzr = self.run_bzr_decode
 
@@ -524,3 +517,43 @@
         txt = bzr('missing', 'empty-tree', encoding='ascii', retcode=1)
         self.assertEqual(-1, txt.find(msg))
         self.assertNotEqual(-1, txt.find(msg.encode('ascii', 'replace')))
+
+    def get_diff_header_lines(self, retcode=1, external=False):
+        command = [ 'diff' ]
+        if external:
+            command.extend(' --diff-options -N'.split())
+        return self.run_bzr_decode(retcode=retcode, *command).splitlines()[0:3]
+
+    def test_diff_label_removed(self):
+        fname = self.info['filename']
+        self.wt.remove(fname)
+        lines = self.get_diff_header_lines()
+
+        self.assertEqual("=== removed file '%s'" % fname, lines[0])
+
+    def test_diff_label_added(self):
+        fname = self.info['filename'] + '-2'
+        file(fname, 'wt').write('\n')
+        self.wt.add(fname)
+        lines = self.get_diff_header_lines()
+
+        self.assertEqual("=== added file '%s'" % fname, lines[0])
+
+    def test_diff_label_renamed(self):
+        fname1 = self.info['filename']
+        fname2 = self.info['filename'] + '-2'
+        self.run_bzr('rename', fname1, fname2)
+        lines = self.get_diff_header_lines()
+
+        self.assertEqual("=== renamed file '%s' => '%s'" % (fname1, fname2), lines[0])
+
+    def test_diff_label_modified(self):
+        fname = self.info['filename']
+        file(fname, 'a').write('\n')
+
+        for external in (True, False):
+            lines = self.get_diff_header_lines(external=external)
+
+            self.assertEqual("=== modified file '%s'" % fname, lines[0])
+            self.failUnless(lines[1].startswith("--- %s" % fname))
+            self.failUnless(lines[2].startswith("+++ %s" % fname))

# revision id: dato at net.com.org.es-20060815141403-3d2bbbb5ddc15658
# sha1: 80b6315df7f5851847649763889302ccffd3760c
# inventory sha1: 602533dc1e8dad2f094cd70557e3f04a4d5f0423
# parent ids:
#   pqm at pqm.ubuntu.com-20060815131500-a6ba92a17caa4aab
# base id: pqm at pqm.ubuntu.com-20060815131500-a6ba92a17caa4aab
# properties:
#   branch-nick: fix_diff_header_encoding



More information about the bazaar mailing list