Rev 6367: (gz) Override Py_FileSystemDefaultEncoding to utf-8 from ascii for the bzr in file:///srv/pqm.bazaar-vcs.org/archives/thelove/bzr/%2Btrunk/
Patch Queue Manager
pqm at pqm.ubuntu.com
Wed Dec 14 17:32:58 UTC 2011
At file:///srv/pqm.bazaar-vcs.org/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 6367 [merge]
revision-id: pqm at pqm.ubuntu.com-20111214173258-gt3ywqak3no5ybo0
parent: pqm at pqm.ubuntu.com-20111214155359-u6b2wjk5efk20g06
parent: martin.packman at canonical.com-20111214170711-giown090rjfgrsyo
committer: Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Wed 2011-12-14 17:32:58 +0000
message:
(gz) Override Py_FileSystemDefaultEncoding to utf-8 from ascii for the bzr
script (Martin Packman)
modified:
bzr bzr.py-20050313053754-5485f144c7006fa6
bzrlib/__init__.py __init__.py-20050309040759-33e65acf91bbcd5d
bzrlib/osutils.py osutils.py-20050309040759-eeaff12fbf77ac86
bzrlib/tests/blackbox/test_exceptions.py test_exceptions.py-20060604211237-yi2cxg0ose3xk4id-1
bzrlib/tests/test_osutils.py test_osutils.py-20051201224856-e48ee24c12182989
doc/en/release-notes/bzr-2.5.txt bzr2.5.txt-20110708125756-587p0hpw7oke4h05-1
doc/en/whats-new/whats-new-in-2.5.txt whatsnewin2.5.txt-20110711065040-xz9b4xba1qzlwp7m-1
=== modified file 'bzr'
--- a/bzr 2011-10-27 15:38:14 +0000
+++ b/bzr 2011-12-09 17:13:21 +0000
@@ -84,6 +84,10 @@
' Although this should be no problem for bzr itself, it might\n'
' cause problems with some plugins. To investigate the issue,\n'
' look at the output of the locale(1p) tool.\n' % e)
+ # Use better default than ascii with posix filesystems that deal in bytes
+ # natively even when the C locale or no locale at all is given. Note that
+ # we need an immortal string for the hack, hence the lack of a hyphen.
+ sys._bzr_default_fs_enc = "utf8"
# The python2.6 release includes some libraries that have deprecation warnings
=== modified file 'bzrlib/__init__.py'
--- a/bzrlib/__init__.py 2011-12-08 09:24:06 +0000
+++ b/bzrlib/__init__.py 2011-12-14 16:26:32 +0000
@@ -37,6 +37,7 @@
# timestamps relative to program start in the log file kept by bzrlib.trace.
_start_time = time.time()
+import codecs
import sys
@@ -131,6 +132,47 @@
__version__ = _format_version_tuple(version_info)
version_string = __version__
+
+def _patch_filesystem_default_encoding(new_enc):
+ """Change the Python process global encoding for filesystem names
+
+ The effect is to change how open() and other builtin functions handle
+ unicode filenames on posix systems. This should only be done near startup.
+
+ The new encoding string passed to this function must survive until process
+ termination, otherwise the interpreter may access uninitialized memory.
+ The use of intern() may defer breakage is but is not enough, the string
+ object should be secure against module reloading and during teardown.
+ """
+ try:
+ import ctypes
+ except ImportError:
+ return
+ pythonapi = getattr(ctypes, "pythonapi", None)
+ if pythonapi is None:
+ # Not CPython ctypes implementation
+ return
+ old_ptr = ctypes.c_void_p.in_dll(pythonapi, "Py_FileSystemDefaultEncoding")
+ new_ptr = ctypes.cast(ctypes.c_char_p(intern(new_enc)), ctypes.c_void_p)
+ old_ptr.value = new_ptr.value
+ if sys.getfilesystemencoding() != new_enc:
+ raise RuntimeError("Failed to change the filesystem default encoding")
+ return new_enc
+
+
+# When running under the bzr script, override bad filesystem default encoding.
+# This is not safe to do for all users of bzrlib, other scripts should instead
+# just ensure a usable locale is set via the $LANG variable on posix systems.
+_fs_enc = sys.getfilesystemencoding()
+if getattr(sys, "_bzr_default_fs_enc", None) is not None:
+ if (_fs_enc is None or codecs.lookup(_fs_enc).name == "ascii"):
+ _fs_enc = _patch_filesystem_default_encoding(sys._bzr_default_fs_enc)
+if _fs_enc is None:
+ _fs_enc = "ascii"
+else:
+ _fs_enc = codecs.lookup(_fs_enc).name
+
+
# bzr has various bits of global state that are slowly being eliminated.
# This variable is intended to permit any new state-like things to be attached
# to a library_state.BzrLibraryState object rather than getting new global
=== modified file 'bzrlib/osutils.py'
--- a/bzrlib/osutils.py 2011-12-05 14:21:55 +0000
+++ b/bzrlib/osutils.py 2011-12-13 17:10:47 +0000
@@ -63,7 +63,7 @@
import bzrlib
-from bzrlib import symbol_versioning
+from bzrlib import symbol_versioning, _fs_enc
# Cross platform wall-clock time functionality with decent resolution.
@@ -293,7 +293,6 @@
# choke on a Unicode string containing a relative path if
# os.getcwd() returns a non-sys.getdefaultencoding()-encoded
# string.
-_fs_enc = sys.getfilesystemencoding() or 'utf-8'
def _posix_abspath(path):
# jam 20060426 rather than encoding to fsencoding
# copy posixpath.abspath, but use os.getcwdu instead
@@ -1771,7 +1770,6 @@
"""
global _selected_dir_reader
if _selected_dir_reader is None:
- fs_encoding = _fs_enc.upper()
if sys.platform == "win32" and win32utils.winver == 'Windows NT':
# Win98 doesn't have unicode apis like FindFirstFileW
# TODO: We possibly could support Win98 by falling back to the
@@ -1783,8 +1781,7 @@
_selected_dir_reader = Win32ReadDir()
except ImportError:
pass
- elif fs_encoding in ('UTF-8', 'US-ASCII', 'ANSI_X3.4-1968'):
- # ANSI_X3.4-1968 is a form of ASCII
+ elif _fs_enc in ('utf-8', 'ascii'):
try:
from bzrlib._readdir_pyx import UTF8DirReader
_selected_dir_reader = UTF8DirReader()
=== modified file 'bzrlib/tests/blackbox/test_exceptions.py'
--- a/bzrlib/tests/blackbox/test_exceptions.py 2011-09-16 20:42:31 +0000
+++ b/bzrlib/tests/blackbox/test_exceptions.py 2011-12-09 17:09:06 +0000
@@ -60,6 +60,14 @@
flags=re.MULTILINE)
self.assertEquals(out, "")
+ def test_utf8_default_fs_enc(self):
+ """In the C locale bzr treats a posix filesystem as UTF-8 encoded"""
+ if os.name != "posix":
+ raise tests.TestNotApplicable("Needs system beholden to C locales")
+ out, err = self.run_bzr_subprocess(["init", "file:%C2%A7"],
+ env_changes={"LANG": "C", "LC_ALL": "C"})
+ self.assertContainsRe(out, "^Created a standalone tree .*$")
+
class TestOptParseBugHandling(tests.TestCase):
"Test that we handle http://bugs.python.org/issue2931"
=== modified file 'bzrlib/tests/test_osutils.py'
--- a/bzrlib/tests/test_osutils.py 2011-12-02 12:49:48 +0000
+++ b/bzrlib/tests/test_osutils.py 2011-12-14 17:07:11 +0000
@@ -1217,7 +1217,7 @@
self.requireFeature(UTF8DirReaderFeature)
self._save_platform_info()
win32utils.winver = None # Avoid the win32 detection code
- osutils._fs_enc = 'UTF-8'
+ osutils._fs_enc = 'utf-8'
self.assertDirReaderIs(
UTF8DirReaderFeature.module.UTF8DirReader)
@@ -1225,22 +1225,14 @@
self.requireFeature(UTF8DirReaderFeature)
self._save_platform_info()
win32utils.winver = None # Avoid the win32 detection code
- osutils._fs_enc = 'US-ASCII'
- self.assertDirReaderIs(
- UTF8DirReaderFeature.module.UTF8DirReader)
-
- def test_force_walkdirs_utf8_fs_ANSI(self):
- self.requireFeature(UTF8DirReaderFeature)
- self._save_platform_info()
- win32utils.winver = None # Avoid the win32 detection code
- osutils._fs_enc = 'ANSI_X3.4-1968'
+ osutils._fs_enc = 'ascii'
self.assertDirReaderIs(
UTF8DirReaderFeature.module.UTF8DirReader)
def test_force_walkdirs_utf8_fs_latin1(self):
self._save_platform_info()
win32utils.winver = None # Avoid the win32 detection code
- osutils._fs_enc = 'latin1'
+ osutils._fs_enc = 'iso-8859-1'
self.assertDirReaderIs(osutils.UnicodeDirReader)
def test_force_walkdirs_utf8_nt(self):
=== modified file 'doc/en/release-notes/bzr-2.5.txt'
--- a/doc/en/release-notes/bzr-2.5.txt 2011-12-14 12:27:44 +0000
+++ b/doc/en/release-notes/bzr-2.5.txt 2011-12-14 17:32:58 +0000
@@ -36,6 +36,11 @@
* New HPSS call for ``Repository.reconcile``. (Jelmer Vernooij, #894455)
+* Override the value returned by ``sys.getfilesystemencoding()`` for the bzr
+ script to utf-8 when it would otherwise be ascii on a posix system. This
+ will mean bzr works with non-ascii files when no locale or an incorrect
+ locale is set. (Martin Packman, #794353)
+
Bug Fixes
*********
=== modified file 'doc/en/whats-new/whats-new-in-2.5.txt'
--- a/doc/en/whats-new/whats-new-in-2.5.txt 2011-10-06 14:39:49 +0000
+++ b/doc/en/whats-new/whats-new-in-2.5.txt 2011-12-14 15:54:07 +0000
@@ -28,6 +28,13 @@
format is ``long``). This a work in progress and only some options are
supported so far.
+Working on a posix system without a locale
+******************************************
+
+Previously bzr needed a valid locale set to work with branches containing
+non-ascii filenames. It will now use utf-8 rather than ascii as a fallback
+encoding for interacting with the filesystem. This makes creating a working
+tree and commiting to it possible for such branches in most environments.
Further information
*******************
More information about the bazaar-commits
mailing list