[MERGE] Fix handling of unicode paths in smart server requests

John Arbash Meinel john at arbash-meinel.com
Fri Oct 13 08:38:31 BST 2006


-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Andrew Bennetts wrote:
> http://people.ubuntu.com/~andrew/bzr/smart-server-unicode/ contains tests and
> fixes for handling of unicode paths in smart server requests.  There's some
> double-handling (UTF-8 read off the wire gets decoded, then re-encoded), but at
> least it's correct.
> 
> I haven't tested handling of non-ascii paths in responses yet, so I assume they
> are broken!  I'll take a look and add tests and if necessary fixes soon, but
> this branch is already useful as is.
> 
> The branch is against http://people.ubuntu.com/~andrew/bzr/http-smart-server/,
> because it builds on the refactoring that occurred in that branch.
> 
> I've attached a bundle relative to that branch for review.
> 
> -Andrew.
> 
> 

...

v- These tests look like they will fail with a UnicodeError when running
'make check' which runs LANG=C python ./bzr selftest.

They also are semi-likely to fail on specific platforms. But that is the
easiest way to flush it out. Have you tested 'make check'?.
	LANG=C ./bzr selftest SmartServer.\*Unicode
should flush it out as well.


> +class SmartServerRequestHandlerUnicodeTests(tests.TestCaseWithTransport):
> +    """Test unicode relpaths passed to the request handler.
> +
> +    SmartServerRequestProtocolOne decodes requests into a tuple of unicode
> +    objects.  That tuple is then passed to the request handler's
> +    dispatch_command method.  This test case is for testing that unicode paths
> +    are handled correctly by the request handler.
> +    """
> +
> +    def test_get_file(self):
> +        self.build_tree([u'\N{INTERROBANG}'], line_endings='binary')
> +        handler = smart.SmartServerRequestHandler(self.get_readonly_transport())
> +        handler.dispatch_command('get', (u'\N{INTERROBANG}',))
> +        self.assertEqual(('ok',), handler.response.args)
> +        self.assertEqual(
> +            u'contents of \N{INTERROBANG}\n'.encode('utf-8'),
> +            handler.response.body)
> +
> +    def test_has(self):
> +        self.build_tree([u'\N{INTERROBANG}'])
> +        handler = smart.SmartServerRequestHandler(self.get_readonly_transport())
> +        handler.dispatch_command('has', (u'\N{INTERROBANG}',))
> +        self.assertEqual(('yes',), handler.response.args)
> +
> +    def test_append(self):
> +        handler = smart.SmartServerRequestHandler(self.get_transport())
> +        handler.dispatch_command('append', (u'\N{INTERROBANG}', ''))
> +        handler.accept_body('abc\n')
> +        handler.end_of_body()
> +        self.assertEqual(('appended', '0'), handler.response.args)
> +        self.check_file_contents(u'\N{INTERROBANG}'.encode('utf-8'), 'abc\n')
> +
> +    def test_delete(self):
> +        self.build_tree([u'\N{INTERROBANG}'])
> +        handler = smart.SmartServerRequestHandler(self.get_transport())
> +        handler.dispatch_command('delete', (u'\N{INTERROBANG}',))
> +        self.assertEqual(('ok',), handler.response.args)
> +        self.failIfExists(u'\N{INTERROBANG}'.encode('utf-8'))
> +
> +    def test_iter_files_recursive(self):
> +        self.build_tree([
> +            u'\N{INTERROBANG}/',
> +            u'\N{INTERROBANG}/one',
> +            u'\N{INTERROBANG}/two',
> +            ])
> +        handler = smart.SmartServerRequestHandler(self.get_readonly_transport())
> +        handler.dispatch_command('iter_files_recursive', (u'\N{INTERROBANG}',))
> +        self.assertEqual('names', handler.response.args[0])
> +        self.assertEqual(set(['one', 'two']), set(handler.response.args[1:]))
> +
> +    def test_list_dir(self):
> +        self.build_tree([
> +            u'\N{INTERROBANG}/',
> +            u'\N{INTERROBANG}/one',
> +            u'\N{INTERROBANG}/two',
> +            ])
> +        handler = smart.SmartServerRequestHandler(self.get_readonly_transport())
> +        handler.dispatch_command('list_dir', (u'\N{INTERROBANG}',))
> +        self.assertEqual('names', handler.response.args[0])
> +        self.assertEqual(set(['one', 'two']), set(handler.response.args[1:]))
> +
> +    def test_mkdir(self):
> +        handler = smart.SmartServerRequestHandler(self.get_transport())
> +        handler.dispatch_command('mkdir', (u'\N{INTERROBANG}', ''))
> +        self.assertEqual(('ok',), handler.response.args)
> +        self.failUnlessExists(u'\N{INTERROBANG}'.encode('utf-8'))
> +
> +    def test_move(self):
> +        self.build_tree([u'one\N{INTERROBANG}'])
> +        handler = smart.SmartServerRequestHandler(self.get_transport())
> +        handler.dispatch_command(
> +            'move', (u'one\N{INTERROBANG}', u'two\N{INTERROBANG}',))
> +        self.assertEqual(('ok',), handler.response.args)
> +        self.failIfExists(u'one\N{INTERROBANG}'.encode('utf-8'))
> +        self.failUnlessExists(u'two\N{INTERROBANG}'.encode('utf-8'))
> +
> +    def test_put(self):
> +        handler = smart.SmartServerRequestHandler(self.get_transport())
> +        handler.dispatch_command('put', (u'\N{INTERROBANG}', ''))
> +        handler.accept_body('abc\n')
> +        handler.end_of_body()
> +        self.assertEqual(('ok',), handler.response.args)
> +        self.check_file_contents(u'\N{INTERROBANG}'.encode('utf-8'), 'abc\n')
> +
> +    def test_put_non_atomic(self):
> +        handler = smart.SmartServerRequestHandler(self.get_transport())
> +        handler.dispatch_command(
> +            'put_non_atomic', (u'\N{INTERROBANG}', '', 'F', ''))
> +        handler.accept_body('abc\n')
> +        handler.end_of_body()
> +        self.assertEqual(('ok',), handler.response.args)
> +        self.check_file_contents(u'\N{INTERROBANG}'.encode('utf-8'), 'abc\n')
> +
> +    def test_readv(self):
> +        self.build_tree([u'\N{INTERROBANG}'])
> +        handler = smart.SmartServerRequestHandler(self.get_readonly_transport())
> +        handler.dispatch_command('readv', (u'\N{INTERROBANG}',))
> +        handler.accept_body('2,3')
> +        handler.end_of_body()
> +        self.assertEqual(('readv', ), handler.response.args)
> +        # co - nte - nt of a-file is the file contents we are extracting from.
> +        self.assertEqual('nte', handler.response.body)
> +
> +    def test_rename(self):
> +        self.build_tree([u'one\N{INTERROBANG}'])
> +        handler = smart.SmartServerRequestHandler(self.get_transport())
> +        handler.dispatch_command(
> +            'rename', (u'one\N{INTERROBANG}', u'two\N{INTERROBANG}',))
> +        self.assertEqual(('ok',), handler.response.args)
> +        self.failIfExists(u'one\N{INTERROBANG}'.encode('utf-8'))
> +        self.failUnlessExists(u'two\N{INTERROBANG}'.encode('utf-8'))

...

v- This seems like it would look better without the newline.

>              bytes = self._in.read(bytes_to_read)
>              if bytes == '':
> +
>                  # Connection has been closed.
>                  self.finished = True
>                  self._out.flush()
> @@ -635,10 +636,12 @@
>          return SmartServerResponse(('ok', '1'))
>  

v- What about URL escaping? It seems like you need to be passing your
backing_transport a url_escaped utf-8 string. We generally do:

 encode('utf8') => urllib.quote() => url_string

Not
 urllib.quote() => encode('utf8') => url_string

So you may need to update you tests to include things like ' ' or '+',
or '%' which all need to be escaped for urls. (There is the helper
urlutils.escape() for stuff like this).

>      def do_has(self, relpath):
> +        relpath = relpath.encode('utf-8')
>          r = self._backing_transport.has(relpath) and 'yes' or 'no'
>          return SmartServerResponse((r,))
>  
>      def do_get(self, relpath):
> +        relpath = relpath.encode('utf-8')
>          backing_bytes = self._backing_transport.get_bytes(relpath)
>          return SmartServerResponse(('ok',), backing_bytes)
>  

Now, truth be told, we rarely need unicode over a transport. Working
trees are accessed directly, and file-ids and revision ids avoid having
unicode in their actual characters (they would be escaped anyway for
filesystem safety).

That said, we Transport wants to be unicode safe. But I want to check
that you are also url safe.

John
=:->

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.5 (Darwin)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org

iD8DBQFFL0J2JdeBCYSNAAMRAiP8AJ4pB90z8H1S4VHP4iFKbJFR2MHUugCgv6LB
hX5AoAxNEj35m85MlhMBguk=
=Gaxf
-----END PGP SIGNATURE-----




More information about the bazaar mailing list