Rev 2198: Be strict about unicode passed to transport.put_{bytes, file} and SmartClient.call_with_body_bytes, fixing part of TestLockableFiles_RemoteLockDir.test_read_write. in sftp://bazaar.launchpad.net/%7Ebzr/bzr/hpss/

Andrew Bennetts andrew.bennetts at canonical.com
Thu Mar 29 09:47:40 BST 2007


At sftp://bazaar.launchpad.net/%7Ebzr/bzr/hpss/

------------------------------------------------------------
revno: 2198
revision-id: andrew.bennetts at canonical.com-20070329084623-ruqx0po8q3lxw0b7
parent: robertc at robertcollins.net-20070329073949-t6qt1arpx375mjek
committer: Andrew Bennetts <andrew.bennetts at canonical.com>
branch nick: hpss
timestamp: Thu 2007-03-29 18:46:23 +1000
message:
  Be strict about unicode passed to transport.put_{bytes,file} and SmartClient.call_with_body_bytes, fixing part of TestLockableFiles_RemoteLockDir.test_read_write.
modified:
  bzrlib/smart/client.py         client.py-20061116014825-2k6ada6xgulslami-1
  bzrlib/tests/test_smart_transport.py test_ssh_transport.py-20060608202016-c25gvf1ob7ypbus6-2
  bzrlib/tests/test_transport_implementations.py test_transport_implementations.py-20051227111451-f97c5c7d5c49fce7
  bzrlib/transport/memory.py     memory.py-20051016101338-cd008dbdf69f04fc
  bzrlib/transport/remote.py     ssh.py-20060608202016-c25gvf1ob7ypbus6-1
=== modified file 'bzrlib/smart/client.py'
--- a/bzrlib/smart/client.py	2006-11-29 03:43:35 +0000
+++ b/bzrlib/smart/client.py	2007-03-29 08:46:23 +0000
@@ -40,6 +40,13 @@
 
     def call_with_body_bytes(self, method, args, body):
         """Call a method on the remote server with body bytes."""
+        if type(method) is not str:
+            raise TypeError('method must be a byte string, not %r' % (method,))
+        for arg in args:
+            if type(arg) is not str:
+                raise TypeError('args must be byte strings, not %r' % (args,))
+        if type(body) is not str:
+            raise TypeError('body must be byte string, not %r' % (body,))
         request = self._medium.get_request()
         smart_protocol = protocol.SmartClientRequestProtocolOne(request)
         smart_protocol.call_with_body_bytes((method, ) + args, body)

=== modified file 'bzrlib/tests/test_smart_transport.py'
--- a/bzrlib/tests/test_smart_transport.py	2007-02-02 19:25:20 +0000
+++ b/bzrlib/tests/test_smart_transport.py	2007-03-29 08:46:23 +0000
@@ -1307,7 +1307,7 @@
             [('a', 'b', '34')])
 
     def test_client_call_with_body_bytes_uploads(self):
-        # protocol.call_with_upload should length-prefix the bytes onto the 
+        # protocol.call_with_body_bytes should length-prefix the bytes onto the
         # wire.
         expected_bytes = "foo\n7\nabcdefgdone\n"
         input = StringIO("\n")
@@ -1382,6 +1382,42 @@
             errors.ReadingCompleted, smart_protocol.read_body_bytes)
 
 
+class TestSmartClientUnicode(tests.TestCase):
+    """SmartClient tests for unicode arguments.
+
+    Unicode arguments to call_with_body_bytes are not correct (remote method
+    names, arguments, and bodies must all be expressed as byte strings), but
+    SmartClient should gracefully reject them, rather than getting into a broken
+    state that prevents future correct calls from working.  That is, it should
+    be possible to issue more requests on the medium afterwards, rather than
+    allowing one bad call to call_with_body_bytes to cause later calls to
+    mysteriously fail with TooManyConcurrentRequests.
+    """
+
+    def assertCallDoesNotBreakMedium(self, method, args, body):
+        """Call a medium with the given method, args and body, then assert that
+        the medium is left in a sane state, i.e. is capable of allowing further
+        requests.
+        """
+        input = StringIO("\n")
+        output = StringIO()
+        client_medium = medium.SmartSimplePipesClientMedium(input, output)
+        smart_client = client.SmartClient(client_medium)
+        self.assertRaises(TypeError,
+            smart_client.call_with_body_bytes, method, args, body)
+        self.assertEqual("", output.getvalue())
+        self.assertEqual(None, client_medium._current_request)
+
+    def test_call_with_body_bytes_unicode_method(self):
+        self.assertCallDoesNotBreakMedium(u'method', ('args',), 'body')
+
+    def test_call_with_body_bytes_unicode_args(self):
+        self.assertCallDoesNotBreakMedium('method', (u'args',), 'body')
+
+    def test_call_with_body_bytes_unicode_body(self):
+        self.assertCallDoesNotBreakMedium('method', ('args',), u'body')
+
+
 class LengthPrefixedBodyDecoder(tests.TestCase):
 
     # XXX: TODO: make accept_reading_trailer invoke translate_response or 

=== modified file 'bzrlib/tests/test_transport_implementations.py'
--- a/bzrlib/tests/test_transport_implementations.py	2007-03-28 07:48:37 +0000
+++ b/bzrlib/tests/test_transport_implementations.py	2007-03-29 08:46:23 +0000
@@ -22,6 +22,7 @@
 
 import os
 from cStringIO import StringIO
+from StringIO import StringIO as pyStringIO
 import stat
 import sys
 
@@ -378,6 +379,33 @@
                               dir_mode=0777, create_parent_dir=True)
         self.assertTransportMode(t, 'dir777', 0777)
 
+    def test_put_bytes_unicode(self):
+        # Expect put_bytes to raise AssertionError or UnicodeEncodeError if
+        # given unicode "bytes".  UnicodeEncodeError doesn't really make sense
+        # (we don't want to encode unicode here at all, callers should be
+        # strictly passing bytes to put_bytes), but we allow it for backwards
+        # compatibility.  At some point we should use a specific exception.
+        t = self.get_transport()
+        if t.is_readonly():
+            return
+        unicode_string = u'\u1234'
+        self.assertRaises(
+            (AssertionError, UnicodeEncodeError),
+            t.put_bytes, 'foo', unicode_string)
+
+    def test_put_file_unicode(self):
+        # Like put_bytes, except with a StringIO.StringIO of a unicode string.
+        # This situation can happen (and has) if code is careless about the type
+        # of "string" they initialise/write to a StringIO with.  We cannot use
+        # cStringIO, because it never returns unicode from read.
+        # Like put_bytes, UnicodeEncodeError isn't quite the right exception to
+        # raise, but we raise it for hysterical raisins.
+        t = self.get_transport()
+        if t.is_readonly():
+            return
+        unicode_file = pyStringIO(u'\u1234')
+        self.assertRaises(UnicodeEncodeError, t.put_file, 'foo', unicode_file)
+
     def test_put_multi(self):
         t = self.get_transport()
 

=== modified file 'bzrlib/transport/memory.py'
--- a/bzrlib/transport/memory.py	2007-03-27 06:42:26 +0000
+++ b/bzrlib/transport/memory.py	2007-03-29 08:46:23 +0000
@@ -128,7 +128,14 @@
         """See Transport.put_file()."""
         _abspath = self._abspath(relpath)
         self._check_parent(_abspath)
-        self._files[_abspath] = (f.read(), mode)
+        bytes = f.read()
+        if type(bytes) is not str:
+            # Although not strictly correct, we raise UnicodeEncodeError to be
+            # compatible with other transports.
+            raise UnicodeEncodeError(
+                'undefined', bytes, 0, 1,
+                'put_file must be given a file of bytes, not unicode.')
+        self._files[_abspath] = (bytes, mode)
 
     def mkdir(self, relpath, mode=None):
         """See Transport.mkdir()."""

=== modified file 'bzrlib/transport/remote.py'
--- a/bzrlib/transport/remote.py	2007-03-28 07:48:37 +0000
+++ b/bzrlib/transport/remote.py	2007-03-29 08:46:23 +0000
@@ -920,6 +920,12 @@
         # FIXME: upload_file is probably not safe for non-ascii characters -
         # should probably just pass all parameters as length-delimited
         # strings?
+        if type(upload_contents) is unicode:
+            # Although not strictly correct, we raise UnicodeEncodeError to be
+            # compatible with other transports.
+            raise UnicodeEncodeError(
+                'undefined', upload_contents, 0, 1,
+                'put_bytes must be given bytes, not unicode.')
         resp = self._call_with_body_bytes('put',
             (self._remote_path(relpath), self._serialise_optional_mode(mode)),
             upload_contents)




More information about the bazaar-commits mailing list