[PATH] Fix list_dir quoting in SFTPTransport and FTPTransport
John Arbash Meinel
john at arbash-meinel.com
Mon Aug 14 18:07:41 BST 2006
David Allouche wrote:
> While attempting to convert a branch producted by baz-import to knits, I
> ran into a bug of SFTPTransport.list_dir, it did not urlquote its return
> value.
>
> That actually caused Transport.copy_tree to throw an error, and there
> was a ill-conceived "fix" in iter_files_recursive to work around the
> list_dir bug, so I extended test_list_dir and test_iter_files_recursive
> to check for proper quoting. I also added a test_copy_tree because
> that's ultimately the functionality I needed.
>
> Robert Collins told me that a fix for the same problem was recently
> reviewed, but I though my patch might also be of interest, in particular
> with respect to its test changes.
>
> Please CC me in replies to this message.
>
I just merged Andrew Bennetts' fix for list_dir() not properly escaping
characters. So some of this is redundant. But some is unique, and should
be merged. It seems that he did not fix FTP, so when I submitted it, the
PQM rejected it. (His tests uncover the FTP bug, but don't fix it)
...
v- It is historical cruft, from back before the url switchover (when
Transport.list_dir() did return unicode strings.
I think these changes are redundant with Andrew Bennetts' changes.
> - self.assertEqual([u'a', u'b', u'c', u'c2'], sorted_list(u'.'))
> + # XXX: If that intends to check that list_dir gives a list of unicode,
> + # it's buggy because ['a'] == [u'a']. If that's not the intention, then
> + # it's just confusing. -- David Allouche 2006-08-11
> + self.assertEqual(
> + [u'a', u'a%2525b', u'b', u'c', u'c2'], sorted_list(u'.'))
> self.assertEqual([u'd', u'e'], sorted_list(u'c'))
>
> if not t.is_readonly():
> @@ -786,7 +792,7 @@
> os.unlink('wd/c/d')
> os.unlink('wd/b')
>
> - self.assertEqual([u'a', u'c', u'c2'], sorted_list('.'))
> + self.assertEqual([u'a', u'a%2525b', u'c', u'c2'], sorted_list('.'))
> self.assertEqual([u'e'], sorted_list(u'c'))
>
> self.assertListRaises(PathError, t.list_dir, 'q')
> @@ -886,6 +892,7 @@
> 'isolated/dir/',
> 'isolated/dir/foo',
> 'isolated/dir/bar',
> + 'isolated/dir/b%25z', # make sure quoting is correct
> 'isolated/bar'],
> transport=transport)
> paths = set(transport.iter_files_recursive())
> @@ -893,10 +900,44 @@
> self.assertEqual(paths,
> set(['isolated/dir/foo',
> 'isolated/dir/bar',
> + 'isolated/dir/b%2525z',
> 'isolated/bar']))
> sub_transport = transport.clone('isolated')
> paths = set(sub_transport.iter_files_recursive())
> - self.assertEqual(set(['dir/foo', 'dir/bar', 'bar']), paths)
> + self.assertEqual(paths,
> + set(['dir/foo', 'dir/bar', 'dir/b%2525z', 'bar']))
^- Adding a test for 'iter_files_recursive' seems useful.
> +
> + def test_copy_tree(self):
> + # TODO: test file contents and permissions are preserved. This test was
> + # added just to ensure that quoting was handled correctly.
> + # -- David Allouche 2006-08-11
> + transport = self.get_transport()
> + if not transport.listable():
> + self.assertRaises(TransportNotPossible,
> + transport.iter_files_recursive)
> + return
> + if transport.is_readonly():
> + self.assertRaises(TransportNotPossible,
> + transport.put, 'a', 'some text for a\n')
> + return
> + self.build_tree(['from/',
> + 'from/dir/',
> + 'from/dir/foo',
> + 'from/dir/bar',
> + 'from/dir/b%25z', # make sure quoting is correct
> + 'from/bar'],
> + transport=transport)
> + transport.copy_tree('from', 'to')
> + paths = set(transport.iter_files_recursive())
> + self.assertEqual(paths,
> + set(['from/dir/foo',
> + 'from/dir/bar',
> + 'from/dir/b%2525z',
> + 'from/bar',
> + 'to/dir/foo',
> + 'to/dir/bar',
> + 'to/dir/b%2525z',
> + 'to/bar',]))
>
And adding a test that copy_tree does what we expect is also useful. So
I would like to see those two tests to be added.
> def test_connect_twice_is_same_content(self):
> # check that our server (whatever it is) is accessable reliably
>
> === modified file 'bzrlib/transport/__init__.py'
> --- bzrlib/transport/__init__.py 2006-05-16 18:07:33 +0000
> +++ bzrlib/transport/__init__.py 2006-08-11 22:01:15 +0000
> @@ -289,7 +289,7 @@
>
> Note that some transports MAY allow querying on directories, but this
> is not part of the protocol. In other words, the results of
> - t.has("a_directory_name") are undefined."
> + t.has("a_directory_name") are undefined.
> """
> raise NotImplementedError(self.has)
>
I could have sworn that I saw this fix at some point in the past, but
maybe it never got merged.
>
> === modified file 'bzrlib/transport/ftp.py'
> --- bzrlib/transport/ftp.py 2006-05-22 05:32:51 +0000
> +++ bzrlib/transport/ftp.py 2006-08-11 22:01:15 +0000
> @@ -453,18 +453,21 @@
>
v- I don't think we should be calling _abspath twice. So I'd rather see
basepath = self._abspath(relpath)
mutter()
f = self._get_FTP()
I realize that wasn't what you were fixing. But it should be done.
> def list_dir(self, relpath):
> """See Transport.list_dir."""
> + mutter("FTP nlst: %s", self._abspath(relpath))
> + f = self._get_FTP()
> + basepath = self._abspath(relpath)
> try:
> - mutter("FTP nlst: %s", self._abspath(relpath))
> - f = self._get_FTP()
> - basepath = self._abspath(relpath)
> paths = f.nlst(basepath)
> - # If FTP.nlst returns paths prefixed by relpath, strip 'em
> - if paths and paths[0].startswith(basepath):
> - paths = [path[len(basepath)+1:] for path in paths]
> - # Remove . and .. if present, and return
> - return [path for path in paths if path not in (".", "..")]
> except ftplib.error_perm, e:
> self._translate_perm_error(e, relpath, extra='error with list_dir')
> + # If FTP.nlst returns paths prefixed by relpath, strip 'em
> + if paths and paths[0].startswith(basepath):
> + entries = [path[len(basepath)+1:] for path in paths]
> + else:
> + entries = paths
> + # Remove . and .. if present
> + entries = [entry for entry in entries if entry not in (".", "..")]
> + return [urllib.quote(entry) for entry in entries]
I think iterating over the same list over and over again is a little
silly. Especially since we are building up a new list each time. So I
think it would be better to do:
return [urllib.quote(entry) for entry in entries
if entry not in (".", "..")]
>
> def iter_files_recursive(self):
> """See Transport.iter_files_recursive.
> @@ -473,7 +476,7 @@
> mutter("FTP iter_files_recursive")
> queue = list(self.list_dir("."))
> while queue:
> - relpath = urllib.quote(queue.pop(0))
> + relpath = queue.pop(0)
> st = self.stat(relpath)
> if stat.S_ISDIR(st.st_mode):
> for i, basename in enumerate(self.list_dir(relpath)):
>
> === modified file 'bzrlib/transport/local.py'
> --- bzrlib/transport/local.py 2006-05-05 01:15:53 +0000
> +++ bzrlib/transport/local.py 2006-08-11 22:01:15 +0000
> @@ -229,9 +229,10 @@
> """
> path = self.abspath(relpath)
> try:
> - return [urllib.quote(entry) for entry in os.listdir(path)]
> + entries = os.listdir(path)
> except (IOError, OSError), e:
> self._translate_error(e, path)
> + return [urllib.quote(entry) for entry in entries]
>
> def stat(self, relpath):
> """Return the stat information for a file.
>
> === modified file 'bzrlib/transport/sftp.py'
> --- bzrlib/transport/sftp.py 2006-05-19 00:35:14 +0000
> +++ bzrlib/transport/sftp.py 2006-08-11 22:01:15 +0000
> @@ -486,7 +486,7 @@
> """Walk the relative paths of all files in this transport."""
> queue = list(self.list_dir('.'))
> while queue:
> - relpath = urllib.quote(queue.pop(0))
> + relpath = queue.pop(0)
> st = self.stat(relpath)
> if stat.S_ISDIR(st.st_mode):
> for i, basename in enumerate(self.list_dir(relpath)):
> @@ -600,11 +600,15 @@
> Return a list of all files at the given location.
> """
> # does anything actually use this?
> + # -- Unknown
> + # This is at least used by copy_tree for remote upgrades.
> + # -- David Allouche 2006-08-11
> path = self._remote_path(relpath)
> try:
> - return self._sftp.listdir(path)
> + entries = self._sftp.listdir(path)
> except (IOError, paramiko.SSHException), e:
> self._translate_io_exception(e, path, ': failed to list_dir')
> + return [urllib.quote(entry) for entry in entries]
>
> def rmdir(self, relpath):
> """See Transport.rmdir."""
It is also used in v4->v6 branches when doing a clone, because it is
cheaper than extracting the inventories. Overall, I like what you've
done. So +1 from me. I'll wait a couple days to see if there is any
other feedback.
John
=:->
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 254 bytes
Desc: OpenPGP digital signature
Url : https://lists.ubuntu.com/archives/bazaar/attachments/20060814/d859c757/attachment.pgp
More information about the bazaar
mailing list