Rev 3512: Some code cleanups. in http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/win32_find_files

Thu Jul 17 04:46:19 BST 2008

At http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/win32_find_files

------------------------------------------------------------
revno: 3512
revision-id: john at arbash-meinel.com-20080717034613-3cqwmu9mfshqwyet
parent: john at arbash-meinel.com-20080717024305-1odvs9kc7vqd3dum
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: win32_find_files
timestamp: Wed 2008-07-16 22:46:13 -0500
message:
  Some code cleanups.
  
  Remove extra comments.
  Use 64 bit integer math when possible.
  Use PyList_Append rather than foo.append()
  Use PyUnicode_AsUTF8String rather than codecs.encode()
  Make sure to raise an exception if the target directory doesn't exist.
  Seems to have made a significant performance impact.
-------------- next part --------------
=== modified file '.bzrignore'

--- a/.bzrignore	2008-07-16 22:06:22 +0000
+++ b/.bzrignore	2008-07-17 03:46:13 +0000
@@ -40,6 +40,7 @@
 doc/developers/performance.png
 bzrlib/_dirstate_helpers_c.c
 bzrlib/_knit_load_data_c.c
+bzrlib/_walkdirs_win32.c
 doc/en/release-notes/NEWS.txt
 doc/en/developer-guide/HACKING.txt
 doc/en/user-reference/bzr_man.txt
@@ -49,4 +50,3 @@
 bzrlib/_*_c.pyd
 # generated help topics
 doc/en/user-reference/*.txt
-bzrlib/_walkdirs_win32.c

=== modified file 'bzrlib/_walkdirs_win32.h'
--- a/bzrlib/_walkdirs_win32.h	2008-07-16 22:06:22 +0000
+++ b/bzrlib/_walkdirs_win32.h	2008-07-17 03:46:13 +0000
@@ -17,6 +17,8 @@
  */
 
 /* Header includes, etc for _walkdirs_win32
+ * Pyrex doesn't support #define, and defining WIN32_LEAN_AND_MEAN makes
+ * importing windows.h a lot less painful.
  */
 
 #define WIN32_LEAN_AND_MEAN

=== modified file 'bzrlib/_walkdirs_win32.pyx'
--- a/bzrlib/_walkdirs_win32.pyx	2008-07-17 02:43:05 +0000
+++ b/bzrlib/_walkdirs_win32.pyx	2008-07-17 03:46:13 +0000
@@ -21,7 +21,8 @@
     cdef struct _HANDLE:
         pass
     ctypedef _HANDLE *HANDLE
-    ctypedef unsigned int DWORD
+    ctypedef unsigned long DWORD
+    ctypedef unsigned long long __int64
     ctypedef unsigned short WCHAR
     cdef struct _FILETIME:
         DWORD dwHighDateTime
@@ -51,6 +52,7 @@
 
     cdef DWORD FILE_ATTRIBUTE_READONLY
     cdef DWORD FILE_ATTRIBUTE_DIRECTORY
+    cdef int ERROR_NO_MORE_FILES
 
     cdef int GetLastError()
 
@@ -62,9 +64,10 @@
     WCHAR *PyUnicode_AS_UNICODE(object)
     Py_ssize_t PyUnicode_GET_SIZE(object)
     object PyUnicode_FromUnicode(WCHAR *, Py_ssize_t)
-
-
-import codecs
+    int PyList_Append(object, object) except -1
+    object PyUnicode_AsUTF8String(object)
+
+
 import operator
 import stat
 
@@ -101,7 +104,6 @@
     cdef object _top
     cdef object _prefix
 
-    cdef object _utf8_encode
     cdef object _directory_kind
     cdef object _file_kind
 
@@ -112,12 +114,10 @@
         self._top = top
         self._prefix = prefix
 
-        self._utf8_encode = codecs.getencoder('utf8')
         self._directory_kind = osutils._directory_kind
         self._file_kind = osutils._formats[stat.S_IFREG]
 
-        self._pending = [(osutils.safe_utf8(prefix), None, None, None,
-                          osutils.safe_unicode(top))]
+        self._pending = [(osutils.safe_utf8(prefix), osutils.safe_unicode(top))]
         self._last_dirblock = None
 
     def __iter__(self):
@@ -141,7 +141,11 @@
         return mode_bits
 
     cdef object _get_size(self, WIN32_FIND_DATAW *data):
-        return long(data.nFileSizeLow) + (long(data.nFileSizeHigh) << 32)
+        # Pyrex casts a DWORD into a PyLong anyway, so it is safe to do << 32
+        # on a DWORD
+        cdef __int64 val
+        val = ((<__int64>data.nFileSizeHigh) << 32) + data.nFileSizeLow
+        return val
 
     cdef object _get_stat_value(self, WIN32_FIND_DATAW *data):
         """Get the filename and the stat information."""
@@ -179,12 +183,14 @@
         It also uses the epoch 1601-01-01 rather than 1970-01-01
         (taken from posixmodule.c)
         """
+        cdef __int64 val
         # NB: This gives slightly different results versus casting to a 64-bit
         #     integer and doing integer math before casting into a floating
         #     point number. But the difference is in the sub millisecond range,
         #     which doesn't seem critical here.
-        return ((ft.dwHighDateTime * 429.4967296 + ft.dwLowDateTime * 1e-7)
-                - 11644473600.0)
+        # secs between epochs: 11,644,473,600
+        val = ((<__int64>ft.dwHighDateTime) << 32) + ft.dwLowDateTime
+        return (val / 1.0e7) - 11644473600.0
 
     def _get_files_in(self, directory, relprefix):
         cdef WIN32_FIND_DATAW search_data
@@ -196,15 +202,12 @@
         top_star = directory + '*'
 
         dirblock = []
-        append = dirblock.append
 
         query = PyUnicode_AS_UNICODE(top_star)
         hFindFile = FindFirstFileW(query, &search_data)
         if hFindFile == INVALID_HANDLE_VALUE:
             # Raise an exception? This path doesn't seem to exist
-            last_err = GetLastError()
-            # Could be last_err == ERROR_FILE_NOT_FOUND
-            return []
+            raise WindowsError(GetLastError(), top_star)
 
         try:
             result = 1
@@ -214,15 +217,22 @@
                     result = FindNextFileW(hFindFile, &search_data)
                     continue
                 name_unicode = self._get_name(&search_data)
-                name_utf8 = self._utf8_encode(name_unicode)[0]
+                name_utf8 = PyUnicode_AsUTF8String(name_unicode)
                 relpath = relprefix + name_utf8
                 abspath = directory + name_unicode
-                append((relpath, name_utf8, 
-                        self._get_kind(&search_data),
-                        self._get_stat_value(&search_data),
-                        abspath))
+                PyList_Append(dirblock, 
+                    (relpath, name_utf8, 
+                     self._get_kind(&search_data),
+                     self._get_stat_value(&search_data),
+                     abspath))
 
                 result = FindNextFileW(hFindFile, &search_data)
+            # FindNextFileW sets GetLastError() == ERROR_NO_MORE_FILES when it
+            # actually finishes. If we have anything else, then we have a
+            # genuine problem
+            last_err = GetLastError()
+            if last_err != ERROR_NO_MORE_FILES:
+                raise WindowsError(last_err)
         finally:
             result = FindClose(hFindFile)
             if result == 0:
@@ -230,32 +240,22 @@
                 pass
         return dirblock
 
-        # for record in FindFilesIterator(top_star):
-        #     name = record[-2]
-        #     if name in ('.', '..'):
-        #         continue
-        #     attrib = record[0]
-        #     statvalue = osutils._Win32Stat(record)
-        #     name_utf8 = _utf8_encode(name)[0]
-        #     abspath = top_slash + name
-        #     if DIRECTORY & attrib == DIRECTORY:
-        #         kind = _directory
-        #     else:
-        #         kind = _file
-        #     append((relprefix + name_utf8, name_utf8, kind, statvalue, abspath))
-
-    def __next__(self):
+    cdef _update_pending(self):
+        """If we had a result before, add the subdirs to pending."""
         if self._last_dirblock is not None:
             # push the entries left in the dirblock onto the pending queue
             # we do this here, because we allow the user to modified the
             # queue before the next iteration
             for d in reversed(self._last_dirblock):
                 if d[2] == self._directory_kind:
-                    self._pending.append(d)
-
+                    self._pending.append((d[0], d[-1]))
+            self._last_dirblock = None
+        
+    def __next__(self):
+        self._update_pending()
         if not self._pending:
             raise StopIteration()
-        relroot, _, _, _, top = self._pending.pop()
+        relroot, top = self._pending.pop()
         # NB: At the moment Pyrex doesn't support Unicode literals, which means
         # that all of these string literals are going to be upcasted to Unicode
         # at runtime... :(

=== modified file 'bzrlib/tests/test__walkdirs_win32.py'
--- a/bzrlib/tests/test__walkdirs_win32.py	2008-07-17 02:21:33 +0000
+++ b/bzrlib/tests/test__walkdirs_win32.py	2008-07-17 03:46:13 +0000
@@ -16,6 +16,8 @@
 
 """Tests for the win32 walkdir extension."""
 
+import errno
+
 from bzrlib import tests
 
 
@@ -111,3 +113,10 @@
         third_dirblock = self._remove_stat_from_dirblock(third_dirblock)
         self.assertEqual(('c', u'./c'), dir_info)
         self.assertEqual([('c/cc', 'cc', 'file', u'./c/cc')], third_dirblock)
+
+    def test_missing_dir(self):
+        e = self.assertRaises(WindowsError, list,
+                                self.walkdirs_utf8(u'no_such_dir'))
+        self.assertEqual(errno.ENOENT, e.errno)
+        self.assertEqual(3, e.winerror)
+        self.assertEqual((3, u'no_such_dir/*'), e.args)