Rev 6367: (gz) Override Py_FileSystemDefaultEncoding to utf-8 from ascii for the bzr in file:///srv/pqm.bazaar-vcs.org/archives/thelove/bzr/%2Btrunk/

Patch Queue Manager pqm at pqm.ubuntu.com
Wed Dec 14 17:32:58 UTC 2011


At file:///srv/pqm.bazaar-vcs.org/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 6367 [merge]
revision-id: pqm at pqm.ubuntu.com-20111214173258-gt3ywqak3no5ybo0
parent: pqm at pqm.ubuntu.com-20111214155359-u6b2wjk5efk20g06
parent: martin.packman at canonical.com-20111214170711-giown090rjfgrsyo
committer: Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Wed 2011-12-14 17:32:58 +0000
message:
  (gz) Override Py_FileSystemDefaultEncoding to utf-8 from ascii for the bzr
   script (Martin Packman)
modified:
  bzr                            bzr.py-20050313053754-5485f144c7006fa6
  bzrlib/__init__.py             __init__.py-20050309040759-33e65acf91bbcd5d
  bzrlib/osutils.py              osutils.py-20050309040759-eeaff12fbf77ac86
  bzrlib/tests/blackbox/test_exceptions.py test_exceptions.py-20060604211237-yi2cxg0ose3xk4id-1
  bzrlib/tests/test_osutils.py   test_osutils.py-20051201224856-e48ee24c12182989
  doc/en/release-notes/bzr-2.5.txt bzr2.5.txt-20110708125756-587p0hpw7oke4h05-1
  doc/en/whats-new/whats-new-in-2.5.txt whatsnewin2.5.txt-20110711065040-xz9b4xba1qzlwp7m-1
=== modified file 'bzr'
--- a/bzr	2011-10-27 15:38:14 +0000
+++ b/bzr	2011-12-09 17:13:21 +0000
@@ -84,6 +84,10 @@
             '  Although this should be no problem for bzr itself, it might\n'
             '  cause problems with some plugins. To investigate the issue,\n'
             '  look at the output of the locale(1p) tool.\n' % e)
+    # Use better default than ascii with posix filesystems that deal in bytes
+    # natively even when the C locale or no locale at all is given. Note that
+    # we need an immortal string for the hack, hence the lack of a hyphen.
+    sys._bzr_default_fs_enc = "utf8"
 
 
 # The python2.6 release includes some libraries that have deprecation warnings

=== modified file 'bzrlib/__init__.py'
--- a/bzrlib/__init__.py	2011-12-08 09:24:06 +0000
+++ b/bzrlib/__init__.py	2011-12-14 16:26:32 +0000
@@ -37,6 +37,7 @@
 # timestamps relative to program start in the log file kept by bzrlib.trace.
 _start_time = time.time()
 
+import codecs
 import sys
 
 
@@ -131,6 +132,47 @@
 __version__ = _format_version_tuple(version_info)
 version_string = __version__
 
+
+def _patch_filesystem_default_encoding(new_enc):
+    """Change the Python process global encoding for filesystem names
+    
+    The effect is to change how open() and other builtin functions handle
+    unicode filenames on posix systems. This should only be done near startup.
+
+    The new encoding string passed to this function must survive until process
+    termination, otherwise the interpreter may access uninitialized memory.
+    The use of intern() may defer breakage is but is not enough, the string
+    object should be secure against module reloading and during teardown.
+    """
+    try:
+        import ctypes
+    except ImportError:
+        return
+    pythonapi = getattr(ctypes, "pythonapi", None)
+    if pythonapi is None:
+        # Not CPython ctypes implementation
+        return
+    old_ptr = ctypes.c_void_p.in_dll(pythonapi, "Py_FileSystemDefaultEncoding")
+    new_ptr = ctypes.cast(ctypes.c_char_p(intern(new_enc)), ctypes.c_void_p)
+    old_ptr.value = new_ptr.value
+    if sys.getfilesystemencoding() != new_enc:
+        raise RuntimeError("Failed to change the filesystem default encoding")
+    return new_enc
+
+
+# When running under the bzr script, override bad filesystem default encoding.
+# This is not safe to do for all users of bzrlib, other scripts should instead
+# just ensure a usable locale is set via the $LANG variable on posix systems.
+_fs_enc = sys.getfilesystemencoding()
+if getattr(sys, "_bzr_default_fs_enc", None) is not None:
+    if (_fs_enc is None or codecs.lookup(_fs_enc).name == "ascii"):
+        _fs_enc = _patch_filesystem_default_encoding(sys._bzr_default_fs_enc)
+if _fs_enc is None:
+    _fs_enc = "ascii"
+else:
+    _fs_enc = codecs.lookup(_fs_enc).name
+
+
 # bzr has various bits of global state that are slowly being eliminated.
 # This variable is intended to permit any new state-like things to be attached
 # to a library_state.BzrLibraryState object rather than getting new global

=== modified file 'bzrlib/osutils.py'
--- a/bzrlib/osutils.py	2011-12-05 14:21:55 +0000
+++ b/bzrlib/osutils.py	2011-12-13 17:10:47 +0000
@@ -63,7 +63,7 @@
 
 
 import bzrlib
-from bzrlib import symbol_versioning
+from bzrlib import symbol_versioning, _fs_enc
 
 
 # Cross platform wall-clock time functionality with decent resolution.
@@ -293,7 +293,6 @@
 # choke on a Unicode string containing a relative path if
 # os.getcwd() returns a non-sys.getdefaultencoding()-encoded
 # string.
-_fs_enc = sys.getfilesystemencoding() or 'utf-8'
 def _posix_abspath(path):
     # jam 20060426 rather than encoding to fsencoding
     # copy posixpath.abspath, but use os.getcwdu instead
@@ -1771,7 +1770,6 @@
     """
     global _selected_dir_reader
     if _selected_dir_reader is None:
-        fs_encoding = _fs_enc.upper()
         if sys.platform == "win32" and win32utils.winver == 'Windows NT':
             # Win98 doesn't have unicode apis like FindFirstFileW
             # TODO: We possibly could support Win98 by falling back to the
@@ -1783,8 +1781,7 @@
                 _selected_dir_reader = Win32ReadDir()
             except ImportError:
                 pass
-        elif fs_encoding in ('UTF-8', 'US-ASCII', 'ANSI_X3.4-1968'):
-            # ANSI_X3.4-1968 is a form of ASCII
+        elif _fs_enc in ('utf-8', 'ascii'):
             try:
                 from bzrlib._readdir_pyx import UTF8DirReader
                 _selected_dir_reader = UTF8DirReader()

=== modified file 'bzrlib/tests/blackbox/test_exceptions.py'
--- a/bzrlib/tests/blackbox/test_exceptions.py	2011-09-16 20:42:31 +0000
+++ b/bzrlib/tests/blackbox/test_exceptions.py	2011-12-09 17:09:06 +0000
@@ -60,6 +60,14 @@
             flags=re.MULTILINE)
         self.assertEquals(out, "")
 
+    def test_utf8_default_fs_enc(self):
+        """In the C locale bzr treats a posix filesystem as UTF-8 encoded"""
+        if os.name != "posix":
+            raise tests.TestNotApplicable("Needs system beholden to C locales")
+        out, err = self.run_bzr_subprocess(["init", "file:%C2%A7"],
+            env_changes={"LANG": "C", "LC_ALL": "C"})
+        self.assertContainsRe(out, "^Created a standalone tree .*$")
+
 
 class TestOptParseBugHandling(tests.TestCase):
     "Test that we handle http://bugs.python.org/issue2931"

=== modified file 'bzrlib/tests/test_osutils.py'
--- a/bzrlib/tests/test_osutils.py	2011-12-02 12:49:48 +0000
+++ b/bzrlib/tests/test_osutils.py	2011-12-14 17:07:11 +0000
@@ -1217,7 +1217,7 @@
         self.requireFeature(UTF8DirReaderFeature)
         self._save_platform_info()
         win32utils.winver = None # Avoid the win32 detection code
-        osutils._fs_enc = 'UTF-8'
+        osutils._fs_enc = 'utf-8'
         self.assertDirReaderIs(
             UTF8DirReaderFeature.module.UTF8DirReader)
 
@@ -1225,22 +1225,14 @@
         self.requireFeature(UTF8DirReaderFeature)
         self._save_platform_info()
         win32utils.winver = None # Avoid the win32 detection code
-        osutils._fs_enc = 'US-ASCII'
-        self.assertDirReaderIs(
-            UTF8DirReaderFeature.module.UTF8DirReader)
-
-    def test_force_walkdirs_utf8_fs_ANSI(self):
-        self.requireFeature(UTF8DirReaderFeature)
-        self._save_platform_info()
-        win32utils.winver = None # Avoid the win32 detection code
-        osutils._fs_enc = 'ANSI_X3.4-1968'
+        osutils._fs_enc = 'ascii'
         self.assertDirReaderIs(
             UTF8DirReaderFeature.module.UTF8DirReader)
 
     def test_force_walkdirs_utf8_fs_latin1(self):
         self._save_platform_info()
         win32utils.winver = None # Avoid the win32 detection code
-        osutils._fs_enc = 'latin1'
+        osutils._fs_enc = 'iso-8859-1'
         self.assertDirReaderIs(osutils.UnicodeDirReader)
 
     def test_force_walkdirs_utf8_nt(self):

=== modified file 'doc/en/release-notes/bzr-2.5.txt'
--- a/doc/en/release-notes/bzr-2.5.txt	2011-12-14 12:27:44 +0000
+++ b/doc/en/release-notes/bzr-2.5.txt	2011-12-14 17:32:58 +0000
@@ -36,6 +36,11 @@
 
 * New HPSS call for ``Repository.reconcile``. (Jelmer Vernooij, #894455)
 
+* Override the value returned by ``sys.getfilesystemencoding()`` for the bzr
+  script to utf-8 when it would otherwise be ascii on a posix system. This
+  will mean bzr works with non-ascii files when no locale or an incorrect
+  locale is set. (Martin Packman, #794353)
+
 Bug Fixes
 *********
 

=== modified file 'doc/en/whats-new/whats-new-in-2.5.txt'
--- a/doc/en/whats-new/whats-new-in-2.5.txt	2011-10-06 14:39:49 +0000
+++ b/doc/en/whats-new/whats-new-in-2.5.txt	2011-12-14 15:54:07 +0000
@@ -28,6 +28,13 @@
 format is ``long``). This a work in progress and only some options are
 supported so far.
 
+Working on a posix system without a locale
+******************************************
+
+Previously bzr needed a valid locale set to work with branches containing
+non-ascii filenames. It will now use utf-8 rather than ascii as a fallback
+encoding for interacting with the filesystem. This makes creating a working
+tree and commiting to it possible for such branches in most environments.
 
 Further information
 *******************




More information about the bazaar-commits mailing list