[MERGE] Readable and properly encoded diff headers

Adeodato Simó dato at net.com.org.es
Tue Aug 15 18:18:11 BST 2006


* John Arbash Meinel [Tue, 15 Aug 2006 11:06:23 -0500]:

> I really do believe you want to put the parameter on show_diff_trees().
> It isn't up to bzrlib/diff.py to determine what the output encoding is.

Okay, I've done this now.

> That is another thing that you aren't testing. Graceful degradation. If
> you have LANG=C, and you do a diff on the file 'جوجو.txt' what should be
> output?

Oh, thanks for spotting this. I now pass 'replace' to path.encode(), and
I've extended test_diff_label_modified to check for this as well.

Attaching bundles for these changes, present also the HTTP branch.

-- 
Adeodato Simó                                     dato at net.com.org.es
Debian Developer                                  adeodato at debian.org
 
                                Listening to: Leonard Cohen - The window
-------------- next part --------------
# Bazaar revision bundle v0.8
#
# message:
#   Do not call get_terminal_encoding() in _show_diff_trees().
#   
#   Instead, get the preferred encoding for the paths in headers from the
#   new path_encoding parameter, which cmd_diff().run sets to the terminal
#   encoding now.
#   
#   Suggested by John Arbash Meinel after the first revision of the patch.
#   
# committer: Adeodato Simó <dato at net.com.org.es>
# date: Tue 2006-08-15 18:57:09.581000090 +0200

=== modified file bzrlib/builtins.py
--- bzrlib/builtins.py
+++ bzrlib/builtins.py
@@ -1135,6 +1135,8 @@
             prefix=None):
         from bzrlib.diff import diff_cmd_helper, show_diff_trees
 
+        path_encoding = osutils.get_terminal_encoding()
+
         if (prefix is None) or (prefix == '0'):
             # diff -p0 format
             old_label = ''
@@ -1177,11 +1179,13 @@
             if (len(revision) == 1) or (revision[1].spec is None):
                 return diff_cmd_helper(tree1, file_list, diff_options,
                                        revision[0], 
-                                       old_label=old_label, new_label=new_label)
+                                       old_label=old_label, new_label=new_label,
+                                       path_encoding=path_encoding)
             elif len(revision) == 2:
                 return diff_cmd_helper(tree1, file_list, diff_options,
                                        revision[0], revision[1],
-                                       old_label=old_label, new_label=new_label)
+                                       old_label=old_label, new_label=new_label,
+                                       path_encoding=path_encoding)
             else:
                 raise BzrCommandError('bzr diff --revision takes exactly one or two revision identifiers')
         else:
@@ -1189,10 +1193,12 @@
                 return show_diff_trees(tree1, tree2, sys.stdout, 
                                        specific_files=file_list,
                                        external_diff_options=diff_options,
-                                       old_label=old_label, new_label=new_label)
+                                       old_label=old_label, new_label=new_label,
+                                       path_encoding=path_encoding)
             else:
                 return diff_cmd_helper(tree1, file_list, diff_options,
-                                       old_label=old_label, new_label=new_label)
+                                       old_label=old_label, new_label=new_label,
+                                       path_encoding=path_encoding)
 
 
 class cmd_deleted(Command):

=== modified file bzrlib/diff.py
--- bzrlib/diff.py
+++ bzrlib/diff.py
@@ -244,7 +244,7 @@
 
 def diff_cmd_helper(tree, specific_files, external_diff_options, 
                     old_revision_spec=None, new_revision_spec=None,
-                    old_label='a/', new_label='b/'):
+                    old_label='a/', new_label='b/', path_encoding='utf8'):
     """Helper for cmd_diff.
 
    tree 
@@ -292,13 +292,13 @@
     return show_diff_trees(old_tree, new_tree, sys.stdout, specific_files,
                            external_diff_options,
                            old_label=old_label, new_label=new_label,
-                           extra_trees=extra_trees)
+                           extra_trees=extra_trees, path_encoding=path_encoding)
 
 
 def show_diff_trees(old_tree, new_tree, to_file, specific_files=None,
                     external_diff_options=None,
                     old_label='a/', new_label='b/',
-                    extra_trees=None):
+                    extra_trees=None, path_encoding='utf8'):
     """Show in text form the changes from one tree to another.
 
     to_files
@@ -317,7 +317,8 @@
             return _show_diff_trees(old_tree, new_tree, to_file,
                                     specific_files, external_diff_options,
                                     old_label=old_label, new_label=new_label,
-                                    extra_trees=extra_trees)
+                                    extra_trees=extra_trees,
+                                    path_encoding=path_encoding)
         finally:
             new_tree.unlock()
     finally:
@@ -326,7 +327,8 @@
 
 def _show_diff_trees(old_tree, new_tree, to_file,
                      specific_files, external_diff_options, 
-                     old_label='a/', new_label='b/', extra_trees=None):
+                     old_label='a/', new_label='b/', extra_trees=None,
+                     path_encoding='utf8'):
 
     # GNU Patch uses the epoch date to detect files that are being added
     # or removed in a diff.
@@ -335,18 +337,16 @@
     # TODO: Generation of pseudo-diffs for added/deleted files could
     # be usefully made into a much faster special case.
 
-    encoding = bzrlib.osutils.get_terminal_encoding()
-
     if external_diff_options:
         assert isinstance(external_diff_options, basestring)
         opts = external_diff_options.split()
         def diff_file(olab, olines, nlab, nlines, to_file):
             external_diff(olab, olines, nlab, nlines, to_file, opts,
-                    path_encoding=encoding)
+                    path_encoding=path_encoding)
     else:
         def diff_file(olab, olines, nlab, nlines, to_file):
             internal_diff(olab, olines, nlab, nlines, to_file,
-                    path_encoding=encoding)
+                    path_encoding=path_encoding)
     
     delta = new_tree.changes_from(old_tree,
         specific_files=specific_files,
@@ -355,7 +355,8 @@
     has_changes = 0
     for path, file_id, kind in delta.removed:
         has_changes = 1
-        print >>to_file, "=== removed %s '%s'" % (kind, path.encode(encoding))
+        print >>to_file, "=== removed %s '%s'" % (kind,
+                path.encode(path_encoding))
         old_name = '%s%s\t%s' % (old_label, path,
                                  _patch_header_date(old_tree, file_id, path))
         new_name = '%s%s\t%s' % (new_label, path, EPOCH_DATE)
@@ -363,7 +364,8 @@
                                          new_name, None, None, to_file)
     for path, file_id, kind in delta.added:
         has_changes = 1
-        print >>to_file, "=== added %s '%s'" % (kind, path.encode(encoding))
+        print >>to_file, "=== added %s '%s'" % (kind,
+                path.encode(path_encoding))
         old_name = '%s%s\t%s' % (old_label, path, EPOCH_DATE)
         new_name = '%s%s\t%s' % (new_label, path,
                                  _patch_header_date(new_tree, file_id, path))
@@ -374,8 +376,9 @@
          text_modified, meta_modified) in delta.renamed:
         has_changes = 1
         prop_str = get_prop_change(meta_modified)
-        print >>to_file, "=== renamed %s '%s' => '%s'%s" % (kind,
-                old_path.encode(encoding), new_path.encode(encoding), prop_str)
+        print >>to_file, "=== renamed %s '%s' => '%s'%s" % (
+                kind, old_path.encode(path_encoding),
+                new_path.encode(path_encoding), prop_str)
         old_name = '%s%s\t%s' % (old_label, old_path,
                                  _patch_header_date(old_tree, file_id,
                                                     old_path))
@@ -389,7 +392,7 @@
         has_changes = 1
         prop_str = get_prop_change(meta_modified)
         print >>to_file, "=== modified %s '%s'%s" % (kind,
-                path.encode(encoding), prop_str)
+                path.encode(path_encoding), prop_str)
         old_name = '%s%s\t%s' % (old_label, path,
                                  _patch_header_date(old_tree, file_id, path))
         new_name = '%s%s\t%s' % (new_label, path,

# revision id: dato at net.com.org.es-20060815165709-85a4640820ceaa36
# sha1: 174bb95f5372d6ea0f5c842c087e08044f338090
# inventory sha1: ffb8da855aee76e0b0ee310b4dbe1fe8f0aeb16b
# parent ids:
#   dato at net.com.org.es-20060815141403-3d2bbbb5ddc15658
# base id: dato at net.com.org.es-20060815141403-3d2bbbb5ddc15658
# properties:
#   branch-nick: fix_diff_header_encoding

-------------- next part --------------
# Bazaar revision bundle v0.8
#
# message:
#   Pass 'replace' as argument when encoding paths in diff.py.
#   
# committer: Adeodato Simó <dato at net.com.org.es>
# date: Tue 2006-08-15 19:14:01.638999939 +0200

=== modified file bzrlib/diff.py
--- bzrlib/diff.py
+++ bzrlib/diff.py
@@ -66,8 +66,8 @@
     if sequence_matcher is None:
         sequence_matcher = bzrlib.patiencediff.PatienceSequenceMatcher
     ud = unified_diff(oldlines, newlines,
-                      fromfile=old_filename.encode(path_encoding),
-                      tofile=new_filename.encode(path_encoding),
+                      fromfile=old_filename.encode(path_encoding, 'replace'),
+                      tofile=new_filename.encode(path_encoding, 'replace'),
                       sequencematcher=sequence_matcher)
 
     ud = list(ud)
@@ -116,9 +116,9 @@
         if not diff_opts:
             diff_opts = []
         diffcmd = ['diff',
-                   '--label', old_filename.encode(path_encoding),
+                   '--label', old_filename.encode(path_encoding, 'replace'),
                    old_abspath,
-                   '--label', new_filename.encode(path_encoding),
+                   '--label', new_filename.encode(path_encoding, 'replace'),
                    new_abspath,
                    '--binary',
                   ]
@@ -356,7 +356,7 @@
     for path, file_id, kind in delta.removed:
         has_changes = 1
         print >>to_file, "=== removed %s '%s'" % (kind,
-                path.encode(path_encoding))
+                path.encode(path_encoding, 'replace'))
         old_name = '%s%s\t%s' % (old_label, path,
                                  _patch_header_date(old_tree, file_id, path))
         new_name = '%s%s\t%s' % (new_label, path, EPOCH_DATE)
@@ -365,7 +365,7 @@
     for path, file_id, kind in delta.added:
         has_changes = 1
         print >>to_file, "=== added %s '%s'" % (kind,
-                path.encode(path_encoding))
+                path.encode(path_encoding, 'replace'))
         old_name = '%s%s\t%s' % (old_label, path, EPOCH_DATE)
         new_name = '%s%s\t%s' % (new_label, path,
                                  _patch_header_date(new_tree, file_id, path))
@@ -377,8 +377,8 @@
         has_changes = 1
         prop_str = get_prop_change(meta_modified)
         print >>to_file, "=== renamed %s '%s' => '%s'%s" % (
-                kind, old_path.encode(path_encoding),
-                new_path.encode(path_encoding), prop_str)
+                kind, old_path.encode(path_encoding, 'replace'),
+                new_path.encode(path_encoding, 'replace'), prop_str)
         old_name = '%s%s\t%s' % (old_label, old_path,
                                  _patch_header_date(old_tree, file_id,
                                                     old_path))
@@ -392,7 +392,7 @@
         has_changes = 1
         prop_str = get_prop_change(meta_modified)
         print >>to_file, "=== modified %s '%s'%s" % (kind,
-                path.encode(path_encoding), prop_str)
+                path.encode(path_encoding, 'replace'), prop_str)
         old_name = '%s%s\t%s' % (old_label, path,
                                  _patch_header_date(old_tree, file_id, path))
         new_name = '%s%s\t%s' % (new_label, path,

=== modified file bzrlib/tests/blackbox/test_non_ascii.py
--- bzrlib/tests/blackbox/test_non_ascii.py
+++ bzrlib/tests/blackbox/test_non_ascii.py
@@ -518,11 +518,16 @@
         self.assertEqual(-1, txt.find(msg))
         self.assertNotEqual(-1, txt.find(msg.encode('ascii', 'replace')))
 
-    def get_diff_header_lines(self, retcode=1, external=False):
+    def get_diff_header_lines(self, retcode=1, external=False, encoding=None):
         command = [ 'diff' ]
         if external:
             command.extend(' --diff-options -N'.split())
-        return self.run_bzr_decode(retcode=retcode, *command).splitlines()[0:3]
+
+        kwargs = { 'retcode': retcode }
+        if encoding is not None:
+            kwargs['encoding'] = encoding
+
+        return self.run_bzr_decode(*command, **kwargs).splitlines()[0:3]
 
     def test_diff_label_removed(self):
         fname = self.info['filename']
@@ -549,6 +554,7 @@
 
     def test_diff_label_modified(self):
         fname = self.info['filename']
+        fname_ascii = fname.encode('ascii', 'replace')
         file(fname, 'a').write('\n')
 
         for external in (True, False):
@@ -557,3 +563,10 @@
             self.assertEqual("=== modified file '%s'" % fname, lines[0])
             self.failUnless(lines[1].startswith("--- %s" % fname))
             self.failUnless(lines[2].startswith("+++ %s" % fname))
+
+            lines = self.get_diff_header_lines(external=external, encoding='ascii')
+
+            self.assertEqual("=== modified file '%s'" % fname_ascii, lines[0])
+            self.failUnless(lines[1].startswith("--- %s" % fname_ascii))
+            self.failUnless(lines[2].startswith("+++ %s" % fname_ascii))
+

# revision id: dato at net.com.org.es-20060815171401-48ad9dbe668fea0e
# sha1: 5aac123729c0fdd17362c301303eb6c02b6ad17b
# inventory sha1: 0559d5a224becb690afbb1e63a65e3a4e5e7530e
# parent ids:
#   dato at net.com.org.es-20060815165709-85a4640820ceaa36
# base id: dato at net.com.org.es-20060815165709-85a4640820ceaa36
# properties:
#   branch-nick: fix_diff_header_encoding



More information about the bazaar mailing list