[MERGE] Fix handling of unicode paths in smart server requests
John Arbash Meinel
john at arbash-meinel.com
Fri Oct 13 08:38:31 BST 2006
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
Andrew Bennetts wrote:
> http://people.ubuntu.com/~andrew/bzr/smart-server-unicode/ contains tests and
> fixes for handling of unicode paths in smart server requests. There's some
> double-handling (UTF-8 read off the wire gets decoded, then re-encoded), but at
> least it's correct.
>
> I haven't tested handling of non-ascii paths in responses yet, so I assume they
> are broken! I'll take a look and add tests and if necessary fixes soon, but
> this branch is already useful as is.
>
> The branch is against http://people.ubuntu.com/~andrew/bzr/http-smart-server/,
> because it builds on the refactoring that occurred in that branch.
>
> I've attached a bundle relative to that branch for review.
>
> -Andrew.
>
>
...
v- These tests look like they will fail with a UnicodeError when running
'make check' which runs LANG=C python ./bzr selftest.
They also are semi-likely to fail on specific platforms. But that is the
easiest way to flush it out. Have you tested 'make check'?.
LANG=C ./bzr selftest SmartServer.\*Unicode
should flush it out as well.
> +class SmartServerRequestHandlerUnicodeTests(tests.TestCaseWithTransport):
> + """Test unicode relpaths passed to the request handler.
> +
> + SmartServerRequestProtocolOne decodes requests into a tuple of unicode
> + objects. That tuple is then passed to the request handler's
> + dispatch_command method. This test case is for testing that unicode paths
> + are handled correctly by the request handler.
> + """
> +
> + def test_get_file(self):
> + self.build_tree([u'\N{INTERROBANG}'], line_endings='binary')
> + handler = smart.SmartServerRequestHandler(self.get_readonly_transport())
> + handler.dispatch_command('get', (u'\N{INTERROBANG}',))
> + self.assertEqual(('ok',), handler.response.args)
> + self.assertEqual(
> + u'contents of \N{INTERROBANG}\n'.encode('utf-8'),
> + handler.response.body)
> +
> + def test_has(self):
> + self.build_tree([u'\N{INTERROBANG}'])
> + handler = smart.SmartServerRequestHandler(self.get_readonly_transport())
> + handler.dispatch_command('has', (u'\N{INTERROBANG}',))
> + self.assertEqual(('yes',), handler.response.args)
> +
> + def test_append(self):
> + handler = smart.SmartServerRequestHandler(self.get_transport())
> + handler.dispatch_command('append', (u'\N{INTERROBANG}', ''))
> + handler.accept_body('abc\n')
> + handler.end_of_body()
> + self.assertEqual(('appended', '0'), handler.response.args)
> + self.check_file_contents(u'\N{INTERROBANG}'.encode('utf-8'), 'abc\n')
> +
> + def test_delete(self):
> + self.build_tree([u'\N{INTERROBANG}'])
> + handler = smart.SmartServerRequestHandler(self.get_transport())
> + handler.dispatch_command('delete', (u'\N{INTERROBANG}',))
> + self.assertEqual(('ok',), handler.response.args)
> + self.failIfExists(u'\N{INTERROBANG}'.encode('utf-8'))
> +
> + def test_iter_files_recursive(self):
> + self.build_tree([
> + u'\N{INTERROBANG}/',
> + u'\N{INTERROBANG}/one',
> + u'\N{INTERROBANG}/two',
> + ])
> + handler = smart.SmartServerRequestHandler(self.get_readonly_transport())
> + handler.dispatch_command('iter_files_recursive', (u'\N{INTERROBANG}',))
> + self.assertEqual('names', handler.response.args[0])
> + self.assertEqual(set(['one', 'two']), set(handler.response.args[1:]))
> +
> + def test_list_dir(self):
> + self.build_tree([
> + u'\N{INTERROBANG}/',
> + u'\N{INTERROBANG}/one',
> + u'\N{INTERROBANG}/two',
> + ])
> + handler = smart.SmartServerRequestHandler(self.get_readonly_transport())
> + handler.dispatch_command('list_dir', (u'\N{INTERROBANG}',))
> + self.assertEqual('names', handler.response.args[0])
> + self.assertEqual(set(['one', 'two']), set(handler.response.args[1:]))
> +
> + def test_mkdir(self):
> + handler = smart.SmartServerRequestHandler(self.get_transport())
> + handler.dispatch_command('mkdir', (u'\N{INTERROBANG}', ''))
> + self.assertEqual(('ok',), handler.response.args)
> + self.failUnlessExists(u'\N{INTERROBANG}'.encode('utf-8'))
> +
> + def test_move(self):
> + self.build_tree([u'one\N{INTERROBANG}'])
> + handler = smart.SmartServerRequestHandler(self.get_transport())
> + handler.dispatch_command(
> + 'move', (u'one\N{INTERROBANG}', u'two\N{INTERROBANG}',))
> + self.assertEqual(('ok',), handler.response.args)
> + self.failIfExists(u'one\N{INTERROBANG}'.encode('utf-8'))
> + self.failUnlessExists(u'two\N{INTERROBANG}'.encode('utf-8'))
> +
> + def test_put(self):
> + handler = smart.SmartServerRequestHandler(self.get_transport())
> + handler.dispatch_command('put', (u'\N{INTERROBANG}', ''))
> + handler.accept_body('abc\n')
> + handler.end_of_body()
> + self.assertEqual(('ok',), handler.response.args)
> + self.check_file_contents(u'\N{INTERROBANG}'.encode('utf-8'), 'abc\n')
> +
> + def test_put_non_atomic(self):
> + handler = smart.SmartServerRequestHandler(self.get_transport())
> + handler.dispatch_command(
> + 'put_non_atomic', (u'\N{INTERROBANG}', '', 'F', ''))
> + handler.accept_body('abc\n')
> + handler.end_of_body()
> + self.assertEqual(('ok',), handler.response.args)
> + self.check_file_contents(u'\N{INTERROBANG}'.encode('utf-8'), 'abc\n')
> +
> + def test_readv(self):
> + self.build_tree([u'\N{INTERROBANG}'])
> + handler = smart.SmartServerRequestHandler(self.get_readonly_transport())
> + handler.dispatch_command('readv', (u'\N{INTERROBANG}',))
> + handler.accept_body('2,3')
> + handler.end_of_body()
> + self.assertEqual(('readv', ), handler.response.args)
> + # co - nte - nt of a-file is the file contents we are extracting from.
> + self.assertEqual('nte', handler.response.body)
> +
> + def test_rename(self):
> + self.build_tree([u'one\N{INTERROBANG}'])
> + handler = smart.SmartServerRequestHandler(self.get_transport())
> + handler.dispatch_command(
> + 'rename', (u'one\N{INTERROBANG}', u'two\N{INTERROBANG}',))
> + self.assertEqual(('ok',), handler.response.args)
> + self.failIfExists(u'one\N{INTERROBANG}'.encode('utf-8'))
> + self.failUnlessExists(u'two\N{INTERROBANG}'.encode('utf-8'))
...
v- This seems like it would look better without the newline.
> bytes = self._in.read(bytes_to_read)
> if bytes == '':
> +
> # Connection has been closed.
> self.finished = True
> self._out.flush()
> @@ -635,10 +636,12 @@
> return SmartServerResponse(('ok', '1'))
>
v- What about URL escaping? It seems like you need to be passing your
backing_transport a url_escaped utf-8 string. We generally do:
encode('utf8') => urllib.quote() => url_string
Not
urllib.quote() => encode('utf8') => url_string
So you may need to update you tests to include things like ' ' or '+',
or '%' which all need to be escaped for urls. (There is the helper
urlutils.escape() for stuff like this).
> def do_has(self, relpath):
> + relpath = relpath.encode('utf-8')
> r = self._backing_transport.has(relpath) and 'yes' or 'no'
> return SmartServerResponse((r,))
>
> def do_get(self, relpath):
> + relpath = relpath.encode('utf-8')
> backing_bytes = self._backing_transport.get_bytes(relpath)
> return SmartServerResponse(('ok',), backing_bytes)
>
Now, truth be told, we rarely need unicode over a transport. Working
trees are accessed directly, and file-ids and revision ids avoid having
unicode in their actual characters (they would be escaped anyway for
filesystem safety).
That said, we Transport wants to be unicode safe. But I want to check
that you are also url safe.
John
=:->
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.5 (Darwin)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org
iD8DBQFFL0J2JdeBCYSNAAMRAiP8AJ4pB90z8H1S4VHP4iFKbJFR2MHUugCgv6LB
hX5AoAxNEj35m85MlhMBguk=
=Gaxf
-----END PGP SIGNATURE-----
More information about the bazaar
mailing list