Rev 3512: Some code cleanups. in http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/win32_find_files
John Arbash Meinel
john at arbash-meinel.com
Thu Jul 17 04:46:19 BST 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.6-dev/win32_find_files
------------------------------------------------------------
revno: 3512
revision-id: john at arbash-meinel.com-20080717034613-3cqwmu9mfshqwyet
parent: john at arbash-meinel.com-20080717024305-1odvs9kc7vqd3dum
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: win32_find_files
timestamp: Wed 2008-07-16 22:46:13 -0500
message:
Some code cleanups.
Remove extra comments.
Use 64 bit integer math when possible.
Use PyList_Append rather than foo.append()
Use PyUnicode_AsUTF8String rather than codecs.encode()
Make sure to raise an exception if the target directory doesn't exist.
Seems to have made a significant performance impact.
-------------- next part --------------
=== modified file '.bzrignore'
--- a/.bzrignore 2008-07-16 22:06:22 +0000
+++ b/.bzrignore 2008-07-17 03:46:13 +0000
@@ -40,6 +40,7 @@
doc/developers/performance.png
bzrlib/_dirstate_helpers_c.c
bzrlib/_knit_load_data_c.c
+bzrlib/_walkdirs_win32.c
doc/en/release-notes/NEWS.txt
doc/en/developer-guide/HACKING.txt
doc/en/user-reference/bzr_man.txt
@@ -49,4 +50,3 @@
bzrlib/_*_c.pyd
# generated help topics
doc/en/user-reference/*.txt
-bzrlib/_walkdirs_win32.c
=== modified file 'bzrlib/_walkdirs_win32.h'
--- a/bzrlib/_walkdirs_win32.h 2008-07-16 22:06:22 +0000
+++ b/bzrlib/_walkdirs_win32.h 2008-07-17 03:46:13 +0000
@@ -17,6 +17,8 @@
*/
/* Header includes, etc for _walkdirs_win32
+ * Pyrex doesn't support #define, and defining WIN32_LEAN_AND_MEAN makes
+ * importing windows.h a lot less painful.
*/
#define WIN32_LEAN_AND_MEAN
=== modified file 'bzrlib/_walkdirs_win32.pyx'
--- a/bzrlib/_walkdirs_win32.pyx 2008-07-17 02:43:05 +0000
+++ b/bzrlib/_walkdirs_win32.pyx 2008-07-17 03:46:13 +0000
@@ -21,7 +21,8 @@
cdef struct _HANDLE:
pass
ctypedef _HANDLE *HANDLE
- ctypedef unsigned int DWORD
+ ctypedef unsigned long DWORD
+ ctypedef unsigned long long __int64
ctypedef unsigned short WCHAR
cdef struct _FILETIME:
DWORD dwHighDateTime
@@ -51,6 +52,7 @@
cdef DWORD FILE_ATTRIBUTE_READONLY
cdef DWORD FILE_ATTRIBUTE_DIRECTORY
+ cdef int ERROR_NO_MORE_FILES
cdef int GetLastError()
@@ -62,9 +64,10 @@
WCHAR *PyUnicode_AS_UNICODE(object)
Py_ssize_t PyUnicode_GET_SIZE(object)
object PyUnicode_FromUnicode(WCHAR *, Py_ssize_t)
-
-
-import codecs
+ int PyList_Append(object, object) except -1
+ object PyUnicode_AsUTF8String(object)
+
+
import operator
import stat
@@ -101,7 +104,6 @@
cdef object _top
cdef object _prefix
- cdef object _utf8_encode
cdef object _directory_kind
cdef object _file_kind
@@ -112,12 +114,10 @@
self._top = top
self._prefix = prefix
- self._utf8_encode = codecs.getencoder('utf8')
self._directory_kind = osutils._directory_kind
self._file_kind = osutils._formats[stat.S_IFREG]
- self._pending = [(osutils.safe_utf8(prefix), None, None, None,
- osutils.safe_unicode(top))]
+ self._pending = [(osutils.safe_utf8(prefix), osutils.safe_unicode(top))]
self._last_dirblock = None
def __iter__(self):
@@ -141,7 +141,11 @@
return mode_bits
cdef object _get_size(self, WIN32_FIND_DATAW *data):
- return long(data.nFileSizeLow) + (long(data.nFileSizeHigh) << 32)
+ # Pyrex casts a DWORD into a PyLong anyway, so it is safe to do << 32
+ # on a DWORD
+ cdef __int64 val
+ val = ((<__int64>data.nFileSizeHigh) << 32) + data.nFileSizeLow
+ return val
cdef object _get_stat_value(self, WIN32_FIND_DATAW *data):
"""Get the filename and the stat information."""
@@ -179,12 +183,14 @@
It also uses the epoch 1601-01-01 rather than 1970-01-01
(taken from posixmodule.c)
"""
+ cdef __int64 val
# NB: This gives slightly different results versus casting to a 64-bit
# integer and doing integer math before casting into a floating
# point number. But the difference is in the sub millisecond range,
# which doesn't seem critical here.
- return ((ft.dwHighDateTime * 429.4967296 + ft.dwLowDateTime * 1e-7)
- - 11644473600.0)
+ # secs between epochs: 11,644,473,600
+ val = ((<__int64>ft.dwHighDateTime) << 32) + ft.dwLowDateTime
+ return (val / 1.0e7) - 11644473600.0
def _get_files_in(self, directory, relprefix):
cdef WIN32_FIND_DATAW search_data
@@ -196,15 +202,12 @@
top_star = directory + '*'
dirblock = []
- append = dirblock.append
query = PyUnicode_AS_UNICODE(top_star)
hFindFile = FindFirstFileW(query, &search_data)
if hFindFile == INVALID_HANDLE_VALUE:
# Raise an exception? This path doesn't seem to exist
- last_err = GetLastError()
- # Could be last_err == ERROR_FILE_NOT_FOUND
- return []
+ raise WindowsError(GetLastError(), top_star)
try:
result = 1
@@ -214,15 +217,22 @@
result = FindNextFileW(hFindFile, &search_data)
continue
name_unicode = self._get_name(&search_data)
- name_utf8 = self._utf8_encode(name_unicode)[0]
+ name_utf8 = PyUnicode_AsUTF8String(name_unicode)
relpath = relprefix + name_utf8
abspath = directory + name_unicode
- append((relpath, name_utf8,
- self._get_kind(&search_data),
- self._get_stat_value(&search_data),
- abspath))
+ PyList_Append(dirblock,
+ (relpath, name_utf8,
+ self._get_kind(&search_data),
+ self._get_stat_value(&search_data),
+ abspath))
result = FindNextFileW(hFindFile, &search_data)
+ # FindNextFileW sets GetLastError() == ERROR_NO_MORE_FILES when it
+ # actually finishes. If we have anything else, then we have a
+ # genuine problem
+ last_err = GetLastError()
+ if last_err != ERROR_NO_MORE_FILES:
+ raise WindowsError(last_err)
finally:
result = FindClose(hFindFile)
if result == 0:
@@ -230,32 +240,22 @@
pass
return dirblock
- # for record in FindFilesIterator(top_star):
- # name = record[-2]
- # if name in ('.', '..'):
- # continue
- # attrib = record[0]
- # statvalue = osutils._Win32Stat(record)
- # name_utf8 = _utf8_encode(name)[0]
- # abspath = top_slash + name
- # if DIRECTORY & attrib == DIRECTORY:
- # kind = _directory
- # else:
- # kind = _file
- # append((relprefix + name_utf8, name_utf8, kind, statvalue, abspath))
-
- def __next__(self):
+ cdef _update_pending(self):
+ """If we had a result before, add the subdirs to pending."""
if self._last_dirblock is not None:
# push the entries left in the dirblock onto the pending queue
# we do this here, because we allow the user to modified the
# queue before the next iteration
for d in reversed(self._last_dirblock):
if d[2] == self._directory_kind:
- self._pending.append(d)
-
+ self._pending.append((d[0], d[-1]))
+ self._last_dirblock = None
+
+ def __next__(self):
+ self._update_pending()
if not self._pending:
raise StopIteration()
- relroot, _, _, _, top = self._pending.pop()
+ relroot, top = self._pending.pop()
# NB: At the moment Pyrex doesn't support Unicode literals, which means
# that all of these string literals are going to be upcasted to Unicode
# at runtime... :(
=== modified file 'bzrlib/tests/test__walkdirs_win32.py'
--- a/bzrlib/tests/test__walkdirs_win32.py 2008-07-17 02:21:33 +0000
+++ b/bzrlib/tests/test__walkdirs_win32.py 2008-07-17 03:46:13 +0000
@@ -16,6 +16,8 @@
"""Tests for the win32 walkdir extension."""
+import errno
+
from bzrlib import tests
@@ -111,3 +113,10 @@
third_dirblock = self._remove_stat_from_dirblock(third_dirblock)
self.assertEqual(('c', u'./c'), dir_info)
self.assertEqual([('c/cc', 'cc', 'file', u'./c/cc')], third_dirblock)
+
+ def test_missing_dir(self):
+ e = self.assertRaises(WindowsError, list,
+ self.walkdirs_utf8(u'no_such_dir'))
+ self.assertEqual(errno.ENOENT, e.errno)
+ self.assertEqual(3, e.winerror)
+ self.assertEqual((3, u'no_such_dir/*'), e.args)
More information about the bazaar-commits
mailing list