Rev 5837: (gz) Bug #656170, be more aggressive about closing file handles (Martin [gz]) in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Sat May 7 23:58:41 UTC 2011


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 5837 [merge]
revision-id: pqm at pqm.ubuntu.com-20110507235836-c277kl3ugz0s8nlw
parent: pqm at pqm.ubuntu.com-20110506163615-6xc9mzcb7t2824m2
parent: gzlist at googlemail.com-20110507230911-1960n4ca2gsrv2i7
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Sat 2011-05-07 23:58:36 +0000
message:
  (gz) Bug #656170, be more aggressive about closing file handles (Martin [gz])
modified:
  bzrlib/tests/per_transport.py  test_transport_implementations.py-20051227111451-f97c5c7d5c49fce7
  bzrlib/tests/test_sftp_transport.py testsftp.py-20051027032739-247570325fec7e7e
  bzrlib/transport/__init__.py   transport.py-20050711165921-4978aa7ce1285ad5
  bzrlib/transport/sftp.py       sftp.py-20051019050329-ab48ce71b7e32dfe
  doc/en/release-notes/bzr-2.4.txt bzr2.4.txt-20110114053217-k7ym9jfz243fddjm-1
=== modified file 'bzrlib/tests/per_transport.py'
--- a/bzrlib/tests/per_transport.py	2011-04-14 07:53:38 +0000
+++ b/bzrlib/tests/per_transport.py	2011-04-20 14:27:19 +0000
@@ -207,8 +207,17 @@
                     ]
         self.build_tree(files, transport=t, line_endings='binary')
         self.assertRaises(NoSuchFile, t.get, 'c')
-        self.assertListRaises(NoSuchFile, t.get_multi, ['a', 'b', 'c'])
-        self.assertListRaises(NoSuchFile, t.get_multi, iter(['a', 'b', 'c']))
+        def iterate_and_close(func, *args):
+            for f in func(*args):
+                # We call f.read() here because things like paramiko actually
+                # spawn a thread to prefetch the content, which we want to
+                # consume before we close the handle.
+                content = f.read()
+                f.close()
+        self.assertRaises(NoSuchFile, iterate_and_close,
+                          t.get_multi, ['a', 'b', 'c'])
+        self.assertRaises(NoSuchFile, iterate_and_close,
+                          t.get_multi, iter(['a', 'b', 'c']))
 
     def test_get_directory_read_gives_ReadError(self):
         """consistent errors for read() on a file returned by get()."""
@@ -1079,8 +1088,8 @@
         self.assertListRaises(NoSuchFile, t.stat_multi, iter(['a', 'c', 'd']))
         self.build_tree(['subdir/', 'subdir/file'], transport=t)
         subdir = t.clone('subdir')
-        subdir.stat('./file')
-        subdir.stat('.')
+        st = subdir.stat('./file')
+        st = subdir.stat('.')
 
     def test_hardlink(self):
         from stat import ST_NLINK

=== modified file 'bzrlib/tests/test_sftp_transport.py'
--- a/bzrlib/tests/test_sftp_transport.py	2011-04-15 07:01:22 +0000
+++ b/bzrlib/tests/test_sftp_transport.py	2011-05-07 23:09:11 +0000
@@ -417,7 +417,7 @@
 
 
 class ReadvFile(object):
-    """An object that acts like Paramiko's SFTPFile.readv()"""
+    """An object that acts like Paramiko's SFTPFile when readv() is used"""
 
     def __init__(self, data):
         self._data = data
@@ -426,6 +426,9 @@
         for start, length in requests:
             yield self._data[start:start+length]
 
+    def close(self):
+        pass
+
 
 def _null_report_activity(*a, **k):
     pass

=== modified file 'bzrlib/transport/__init__.py'
--- a/bzrlib/transport/__init__.py	2011-04-07 10:36:24 +0000
+++ b/bzrlib/transport/__init__.py	2011-04-27 08:05:26 +0000
@@ -672,7 +672,9 @@
 
         This uses _coalesce_offsets to issue larger reads and fewer seeks.
 
-        :param fp: A file-like object that supports seek() and read(size)
+        :param fp: A file-like object that supports seek() and read(size).
+            Note that implementations are allowed to call .close() on this file
+            handle, so don't trust that you can use it for other work.
         :param offsets: A list of offsets to be read from the given file.
         :return: yield (pos, data) tuples for each request
         """
@@ -689,37 +691,33 @@
 
         # Cache the results, but only until they have been fulfilled
         data_map = {}
-        for c_offset in coalesced:
-            # TODO: jam 20060724 it might be faster to not issue seek if
-            #       we are already at the right location. This should be
-            #       benchmarked.
-            fp.seek(c_offset.start)
-            data = fp.read(c_offset.length)
-            if len(data) < c_offset.length:
-                raise errors.ShortReadvError(relpath, c_offset.start,
-                            c_offset.length, actual=len(data))
-            for suboffset, subsize in c_offset.ranges:
-                key = (c_offset.start+suboffset, subsize)
-                data_map[key] = data[suboffset:suboffset+subsize]
+        try:
+            for c_offset in coalesced:
+                # TODO: jam 20060724 it might be faster to not issue seek if
+                #       we are already at the right location. This should be
+                #       benchmarked.
+                fp.seek(c_offset.start)
+                data = fp.read(c_offset.length)
+                if len(data) < c_offset.length:
+                    raise errors.ShortReadvError(relpath, c_offset.start,
+                                c_offset.length, actual=len(data))
+                for suboffset, subsize in c_offset.ranges:
+                    key = (c_offset.start+suboffset, subsize)
+                    data_map[key] = data[suboffset:suboffset+subsize]
 
-            # Now that we've read some data, see if we can yield anything back
-            while cur_offset_and_size in data_map:
-                this_data = data_map.pop(cur_offset_and_size)
-                this_offset = cur_offset_and_size[0]
-                try:
-                    cur_offset_and_size = offset_stack.next()
-                except StopIteration:
-                    # Close the file handle as there will be no more data
-                    # The handle would normally be cleaned up as this code goes
-                    # out of scope, but as we are a generator, not all code
-                    # will re-enter once we have consumed all the expected
-                    # data. For example:
-                    #   zip(range(len(requests)), readv(foo, requests))
-                    # Will stop because the range is done, and not run the
-                    # cleanup code for the readv().
-                    fp.close()
-                    cur_offset_and_size = None
-                yield this_offset, this_data
+                # Now that we've read some data, see if we can yield anything back
+                while cur_offset_and_size in data_map:
+                    this_data = data_map.pop(cur_offset_and_size)
+                    this_offset = cur_offset_and_size[0]
+                    try:
+                        cur_offset_and_size = offset_stack.next()
+                    except StopIteration:
+                        cur_offset_and_size = None
+                    yield this_offset, this_data
+        except:
+            fp.close()
+            raise
+        fp.close()
 
     def _sort_expand_and_combine(self, offsets, upper_limit):
         """Helper for readv.

=== modified file 'bzrlib/transport/sftp.py'
--- a/bzrlib/transport/sftp.py	2010-08-24 13:03:18 +0000
+++ b/bzrlib/transport/sftp.py	2011-04-20 14:27:19 +0000
@@ -281,6 +281,8 @@
                     buffered = buffered[buffered_offset:]
                     buffered_data = [buffered]
                     buffered_len = len(buffered)
+        # now that the data stream is done, close the handle
+        fp.close()
         if buffered_len:
             buffered = ''.join(buffered_data)
             del buffered_data[:]
@@ -421,11 +423,6 @@
         :param relpath: The relative path to the file
         """
         try:
-            # FIXME: by returning the file directly, we don't pass this
-            # through to report_activity.  We could try wrapping the object
-            # before it's returned.  For readv and get_bytes it's handled in
-            # the higher-level function.
-            # -- mbp 20090126
             path = self._remote_path(relpath)
             f = self._get_sftp().file(path, mode='rb')
             if self._do_prefetch and (getattr(f, 'prefetch', None) is not None):

=== modified file 'doc/en/release-notes/bzr-2.4.txt'
--- a/doc/en/release-notes/bzr-2.4.txt	2011-05-06 15:17:33 +0000
+++ b/doc/en/release-notes/bzr-2.4.txt	2011-05-07 23:58:36 +0000
@@ -167,6 +167,10 @@
   know about so far have been fixed, but there may be fallout for edge
   cases that we are missing. (John Arbash Meinel, #759091)
 
+* ``SFTPTransport`` is more pro-active about closing file-handles. This
+  reduces the chance of having threads fail from async requests while
+  running the test suite. (John Arbash Meinel, #656170)
+
 * Standalone bzr.exe installation on Windows: user can put additional python 
   libraries into ``site-packages`` subdirectory of the installation directory,
   this might be required for "installing" extra dependencies for some plugins.




More information about the bazaar-commits mailing list