Rev 6427: (gz) Simplify osutils.get_user_encoding and remove implicit setlocale calls in file:///srv/pqm.bazaar-vcs.org/archives/thelove/bzr/%2Btrunk/

Patch Queue Manager pqm at pqm.ubuntu.com
Thu Jan 5 11:39:44 UTC 2012


At file:///srv/pqm.bazaar-vcs.org/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 6427 [merge]
revision-id: pqm at pqm.ubuntu.com-20120105113943-zeel4wligxkp0tcf
parent: pqm at pqm.ubuntu.com-20120105110647-1vtwq3fi5v88ybqb
parent: martin.packman at canonical.com-20120105110819-qqwp2e1973yeomk7
committer: Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Thu 2012-01-05 11:39:43 +0000
message:
  (gz) Simplify osutils.get_user_encoding and remove implicit setlocale calls
   from the function (Martin Packman)
modified:
  bzr                            bzr.py-20050313053754-5485f144c7006fa6
  bzrlib/osutils.py              osutils.py-20050309040759-eeaff12fbf77ac86
  bzrlib/tests/test_osutils_encodings.py test_osutils_encodin-20061226013130-kkp732tpt3lm91vv-1
  doc/en/release-notes/bzr-2.5.txt bzr2.5.txt-20110708125756-587p0hpw7oke4h05-1
=== modified file 'bzr'
--- a/bzr	2012-01-03 11:38:01 +0000
+++ b/bzr	2012-01-05 10:44:12 +0000
@@ -41,26 +41,9 @@
     profile_imports.install()
     profiling = True
 
-if sys.platform == 'darwin':
-    # jameinel says this hack is to force python to honor the LANG setting,
-    # even on Darwin.  Otherwise it is apparently hardcoded to Mac-Roman,
-    # which is incorrect for the normal Terminal.app which wants UTF-8.
-    #
-    # "It might be that I should be setting the "system locale" somewhere else
-    # on the system, rather than setting LANG=en_US.UTF-8 in .bashrc.
-    # Switching to 'posix' and setting LANG worked for me."
-    #
-    # So we can remove this if someone works out the right way to tell Mac
-    # Python which encoding to use.  -- mbp 20080703
-    sys.platform = 'posix'
-    try:
-        import locale
-    finally:
-        sys.platform = 'darwin'
-else:
+
+if os.name == "posix":
     import locale
-
-if os.name == "posix":
     try:
         locale.setlocale(locale.LC_ALL, '')
     except locale.Error, e:

=== modified file 'bzrlib/osutils.py'
--- a/bzrlib/osutils.py	2012-01-03 17:11:56 +0000
+++ b/bzrlib/osutils.py	2012-01-05 10:44:12 +0000
@@ -28,6 +28,7 @@
 lazy_import(globals(), """
 from datetime import datetime
 import getpass
+import locale
 import ntpath
 import posixpath
 import select
@@ -54,8 +55,11 @@
 """)
 
 from bzrlib.symbol_versioning import (
+    DEPRECATED_PARAMETER,
     deprecated_function,
     deprecated_in,
+    deprecated_passed,
+    warn as warn_deprecated,
     )
 
 from hashlib import (
@@ -1976,76 +1980,53 @@
 _cached_user_encoding = None
 
 
-def get_user_encoding(use_cache=True):
+def get_user_encoding(use_cache=DEPRECATED_PARAMETER):
     """Find out what the preferred user encoding is.
 
     This is generally the encoding that is used for command line parameters
     and file contents. This may be different from the terminal encoding
     or the filesystem encoding.
 
-    :param  use_cache:  Enable cache for detected encoding.
-                        (This parameter is turned on by default,
-                        and required only for selftesting)
-
     :return: A string defining the preferred user encoding
     """
     global _cached_user_encoding
-    if _cached_user_encoding is not None and use_cache:
+    if deprecated_passed(use_cache):
+        warn_deprecated("use_cache should only have been used for tests",
+            DeprecationWarning, stacklevel=2) 
+    if _cached_user_encoding is not None:
         return _cached_user_encoding
 
-    if sys.platform == 'darwin':
-        # python locale.getpreferredencoding() always return
-        # 'mac-roman' on darwin. That's a lie.
-        sys.platform = 'posix'
-        try:
-            if os.environ.get('LANG', None) is None:
-                # If LANG is not set, we end up with 'ascii', which is bad
-                # ('mac-roman' is more than ascii), so we set a default which
-                # will give us UTF-8 (which appears to work in all cases on
-                # OSX). Users are still free to override LANG of course, as
-                # long as it give us something meaningful. This work-around
-                # *may* not be needed with python 3k and/or OSX 10.5, but will
-                # work with them too -- vila 20080908
-                os.environ['LANG'] = 'en_US.UTF-8'
-            import locale
-        finally:
-            sys.platform = 'darwin'
+    if os.name == 'posix' and getattr(locale, 'CODESET', None) is not None:
+        # Use the existing locale settings and call nl_langinfo directly
+        # rather than going through getpreferredencoding. This avoids
+        # <http://bugs.python.org/issue6202> on OSX Python 2.6 and the
+        # possibility of the setlocale call throwing an error.
+        user_encoding = locale.nl_langinfo(locale.CODESET)
     else:
-        import locale
+        # GZ 2011-12-19: On windows could call GetACP directly instead.
+        user_encoding = locale.getpreferredencoding(False)
 
     try:
-        user_encoding = locale.getpreferredencoding()
-    except locale.Error, e:
-        sys.stderr.write('bzr: warning: %s\n'
-                         '  Could not determine what text encoding to use.\n'
-                         '  This error usually means your Python interpreter\n'
-                         '  doesn\'t support the locale set by $LANG (%s)\n'
-                         "  Continuing with ascii encoding.\n"
-                         % (e, os.environ.get('LANG')))
-        user_encoding = 'ascii'
-
-    # Windows returns 'cp0' to indicate there is no code page. So we'll just
-    # treat that as ASCII, and not support printing unicode characters to the
-    # console.
-    #
-    # For python scripts run under vim, we get '', so also treat that as ASCII
-    if user_encoding in (None, 'cp0', ''):
-        user_encoding = 'ascii'
-    else:
-        # check encoding
-        try:
-            codecs.lookup(user_encoding)
-        except LookupError:
+        user_encoding = codecs.lookup(user_encoding).name
+    except LookupError:
+        if user_encoding not in ("", "cp0"):
             sys.stderr.write('bzr: warning:'
                              ' unknown encoding %s.'
                              ' Continuing with ascii encoding.\n'
                              % user_encoding
                             )
-            user_encoding = 'ascii'
-
-    if use_cache:
-        _cached_user_encoding = user_encoding
-
+        user_encoding = 'ascii'
+    else:
+        # Get 'ascii' when setlocale has not been called or LANG=C or unset.
+        if user_encoding == 'ascii':
+            if sys.platform == 'darwin':
+                # OSX is special-cased in Python to have a UTF-8 filesystem
+                # encoding and previously had LANG set here if not present.
+                user_encoding = 'utf-8'
+            # GZ 2011-12-19: Maybe UTF-8 should be the default in this case
+            #                for some other posix platforms as well.
+
+    _cached_user_encoding = user_encoding
     return user_encoding
 
 
@@ -2053,28 +2034,6 @@
     return get_terminal_encoding()
 
 
-_message_encoding = None
-
-
-def get_message_encoding():
-    """Return the encoding used for messages
-
-    While the message encoding is a general setting it should usually only be
-    needed for decoding system error strings such as from OSError instances.
-    """
-    global _message_encoding
-    if _message_encoding is None:
-        if os.name == "posix":
-            import locale
-            # This is a process-global setting that can change, but should in
-            # general just get set once at process startup then be constant.
-            _message_encoding = locale.getlocale(locale.LC_MESSAGES)[1]
-        else:
-            # On windows want the result of GetACP() which this boils down to.
-            _message_encoding = get_user_encoding()
-    return _message_encoding or "ascii"
-
-
 def get_host_name():
     """Return the current unicode host name.
 

=== modified file 'bzrlib/tests/test_osutils_encodings.py'
--- a/bzrlib/tests/test_osutils_encodings.py	2011-12-02 13:36:43 +0000
+++ b/bzrlib/tests/test_osutils_encodings.py	2012-01-05 10:37:58 +0000
@@ -17,9 +17,7 @@
 """Tests for the osutils wrapper."""
 
 import codecs
-import errno
 import locale
-import os
 import sys
 
 from bzrlib import (
@@ -170,70 +168,35 @@
 
     def setUp(self):
         TestCase.setUp(self)
-        self.overrideAttr(locale, 'getpreferredencoding')
+        self.overrideAttr(osutils, '_cached_user_encoding', None)
+        self.overrideAttr(locale, 'getpreferredencoding', self.get_encoding)
+        self.overrideAttr(locale, 'CODESET', None)
         self.overrideAttr(sys, 'stderr', StringIOWrapper())
 
+    def get_encoding(self, do_setlocale=True):
+        return self._encoding
+
     def test_get_user_encoding(self):
-        def f():
-            return 'user_encoding'
-
-        locale.getpreferredencoding = f
+        self._encoding = 'user_encoding'
         fake_codec.add('user_encoding')
-        self.assertEquals('user_encoding',
-                          osutils.get_user_encoding(use_cache=False))
+        self.assertEquals('iso8859-1', # fake_codec maps to latin-1
+                          osutils.get_user_encoding())
         self.assertEquals('', sys.stderr.getvalue())
 
     def test_user_cp0(self):
-        def f():
-            return 'cp0'
-
-        locale.getpreferredencoding = f
-        self.assertEquals('ascii', osutils.get_user_encoding(use_cache=False))
+        self._encoding = 'cp0'
+        self.assertEquals('ascii', osutils.get_user_encoding())
         self.assertEquals('', sys.stderr.getvalue())
 
     def test_user_cp_unknown(self):
-        def f():
-            return 'cp-unknown'
-
-        locale.getpreferredencoding = f
-        self.assertEquals('ascii', osutils.get_user_encoding(use_cache=False))
+        self._encoding = 'cp-unknown'
+        self.assertEquals('ascii', osutils.get_user_encoding())
         self.assertEquals('bzr: warning: unknown encoding cp-unknown.'
                           ' Continuing with ascii encoding.\n',
                           sys.stderr.getvalue())
 
     def test_user_empty(self):
         """Running bzr from a vim script gives '' for a preferred locale"""
-        def f():
-            return ''
-
-        locale.getpreferredencoding = f
-        self.assertEquals('ascii', osutils.get_user_encoding(use_cache=False))
+        self._encoding = ''
+        self.assertEquals('ascii', osutils.get_user_encoding())
         self.assertEquals('', sys.stderr.getvalue())
-
-    def test_user_locale_error(self):
-        def f():
-            raise locale.Error, 'unsupported locale'
-
-        locale.getpreferredencoding = f
-        self.overrideEnv('LANG', 'BOGUS')
-        self.assertEquals('ascii', osutils.get_user_encoding(use_cache=False))
-        self.assertEquals('bzr: warning: unsupported locale\n'
-                          '  Could not determine what text encoding to use.\n'
-                          '  This error usually means your Python interpreter\n'
-                          '  doesn\'t support the locale set by $LANG (BOGUS)\n'
-                          '  Continuing with ascii encoding.\n',
-                          sys.stderr.getvalue())
-
-
-class TestMessageEncoding(TestCase):
-    """Tests for getting the encoding used by system messages"""
-
-    def test_get_message_encoding(self):
-        encoding_name = osutils.get_message_encoding()
-        "".decode(encoding_name) # should be a valid encoding name
-
-    def test_get_message_encoding_decodes_strerror(self):
-        encoding_name = osutils.get_message_encoding()
-        for number, name in errno.errorcode.iteritems():
-            string = os.strerror(number)
-            string.decode(encoding_name)

=== modified file 'doc/en/release-notes/bzr-2.5.txt'
--- a/doc/en/release-notes/bzr-2.5.txt	2012-01-05 11:06:47 +0000
+++ b/doc/en/release-notes/bzr-2.5.txt	2012-01-05 11:39:43 +0000
@@ -107,6 +107,9 @@
   a dictionary which is not a supported use case for the configuration
   stacks). (Vincent Ladeuil, #908050)
 
+* Stop altering ``sys.platform`` on OSX when initialising the locale.
+  (Martin Packman, #570495)
+
 Documentation
 *************
 
@@ -126,6 +129,9 @@
 * ``Repository.get_commit_builder`` now takes a ``config_stack``
   rather than a ``config`` argument. (Jelmer Vernooij)
 
+* Scripts using bzrlib should now ensure setlocale is called on posix
+  platforms if they need a non-ascii user encoding. (Martin Packman)
+
 * Send formats now accept a new optional argument ``submit_branch``,
   which can be None or a Branch object for the submit branch location.
   (Jelmer Vernooij)




More information about the bazaar-commits mailing list