Rev 4787: (jam) Fix bugs (#425510, #426410, #194450), in file:///home/pqm/archives/thelove/bzr/%2Btrunk/
Canonical.com Patch Queue Manager
pqm at pqm.ubuntu.com
Thu Nov 5 16:02:17 GMT 2009
At file:///home/pqm/archives/thelove/bzr/%2Btrunk/
------------------------------------------------------------
revno: 4787 [merge]
revision-id: pqm at pqm.ubuntu.com-20091105160215-rjpz3n473ixk4p3i
parent: pqm at pqm.ubuntu.com-20091104160630-zeuyqfu2frdr4vob
parent: john at arbash-meinel.com-20091104223213-foo7qmu39b26zdac
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Thu 2009-11-05 16:02:15 +0000
message:
(jam) Fix bugs (#425510, #426410, #194450),
add a custom win32 command line parser.
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/builtins.py builtins.py-20050830033751-fc01482b9ca23183
bzrlib/commands.py bzr.py-20050309040720-d10f4714595cf8c3
bzrlib/tests/test_win32utils.py test_win32utils.py-20070713181630-8xsrjymd3e8mgw23-108
bzrlib/win32utils.py win32console.py-20051021033308-123c6c929d04973d
=== modified file 'NEWS'
--- a/NEWS 2009-11-04 09:52:44 +0000
+++ b/NEWS 2009-11-04 22:29:40 +0000
@@ -29,6 +29,15 @@
allow those because XML store silently translate it anyway. (The parser
auto-translates \r\n => \n in ways that are hard for us to catch.)
+* On Windows, do glob expansion at the command-line level (as is usually
+ done in bash, etc.) This means that *all* commands get glob expansion
+ (bzr status, bzr add, bzr mv, etc). It uses a custom command line
+ parser, which allows us to know if a given section was quoted. It means
+ you can now do ``bzr ignore "*.py"``. It also means that single-quotes
+ are now treated as quoted ``bzr ignore '*.py'``.
+ (John Arbash Meinel, #425510, #426410, #194450)
+
+
Improvements
************
=== modified file 'bzrlib/builtins.py'
--- a/bzrlib/builtins.py 2009-11-03 20:24:25 +0000
+++ b/bzrlib/builtins.py 2009-11-04 22:32:13 +0000
@@ -655,7 +655,6 @@
if base_tree:
base_tree.lock_read()
try:
- file_list = self._maybe_expand_globs(file_list)
tree, file_list = tree_files_for_add(file_list)
added, ignored = tree.smart_add(file_list, not
no_recurse, action=action, save=not dry_run)
=== modified file 'bzrlib/commands.py'
--- a/bzrlib/commands.py 2009-10-14 20:02:28 +0000
+++ b/bzrlib/commands.py 2009-11-04 22:32:13 +0000
@@ -56,6 +56,7 @@
from bzrlib.symbol_versioning import (
deprecated_function,
deprecated_in,
+ deprecated_method,
suppress_deprecation_warnings,
)
@@ -383,18 +384,18 @@
# List of standard options directly supported
self.supported_std_options = []
+ @deprecated_method(deprecated_in((2, 1, 0)))
def _maybe_expand_globs(self, file_list):
"""Glob expand file_list if the platform does not do that itself.
+ Not used anymore, now that the bzr command-line parser globs on
+ Windows.
+
:return: A possibly empty list of unicode paths.
Introduced in bzrlib 0.18.
"""
- if not file_list:
- file_list = []
- if sys.platform == 'win32':
- file_list = win32utils.glob_expand(file_list)
- return list(file_list)
+ return file_list
def _usage(self):
"""Return single-line grammar for this command.
=== modified file 'bzrlib/tests/test_win32utils.py'
--- a/bzrlib/tests/test_win32utils.py 2009-07-03 14:26:34 +0000
+++ b/bzrlib/tests/test_win32utils.py 2009-11-04 22:12:46 +0000
@@ -17,7 +17,11 @@
import os
import sys
-from bzrlib import osutils
+from bzrlib import (
+ osutils,
+ tests,
+ win32utils,
+ )
from bzrlib.tests import (
Feature,
TestCase,
@@ -26,7 +30,6 @@
UnicodeFilenameFeature,
)
from bzrlib.win32utils import glob_expand, get_app_path
-from bzrlib import win32utils
# Features
@@ -261,3 +264,90 @@
os.makedirs(u'\u1234\\.bzr')
path = osutils.abspath(u'\u1234\\.bzr')
win32utils.set_file_attr_hidden(path)
+
+
+
+class TestUnicodeShlex(tests.TestCase):
+
+ def assertAsTokens(self, expected, line):
+ s = win32utils.UnicodeShlex(line)
+ self.assertEqual(expected, list(s))
+
+ def test_simple(self):
+ self.assertAsTokens([(False, u'foo'), (False, u'bar'), (False, u'baz')],
+ u'foo bar baz')
+
+ def test_ignore_multiple_spaces(self):
+ self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'foo bar')
+
+ def test_ignore_leading_space(self):
+ self.assertAsTokens([(False, u'foo'), (False, u'bar')], u' foo bar')
+
+ def test_ignore_trailing_space(self):
+ self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'foo bar ')
+
+ def test_posix_quotations(self):
+ self.assertAsTokens([(True, u'foo bar')], u'"foo bar"')
+ self.assertAsTokens([(True, u'foo bar')], u"'foo bar'")
+ self.assertAsTokens([(True, u'foo bar')], u"'fo''o b''ar'")
+ self.assertAsTokens([(True, u'foo bar')], u'"fo""o b""ar"')
+ self.assertAsTokens([(True, u'foo bar')], u'"fo"\'o b\'"ar"')
+
+ def test_nested_quotations(self):
+ self.assertAsTokens([(True, u'foo"" bar')], u"'foo\"\" bar'")
+ self.assertAsTokens([(True, u'foo\'\' bar')], u"\"foo'' bar\"")
+
+ def test_empty_result(self):
+ self.assertAsTokens([], u'')
+ self.assertAsTokens([], u' ')
+
+ def test_quoted_empty(self):
+ self.assertAsTokens([(True, '')], u'""')
+ self.assertAsTokens([(True, '')], u"''")
+
+ def test_unicode_chars(self):
+ self.assertAsTokens([(False, u'f\xb5\xee'), (False, u'\u1234\u3456')],
+ u'f\xb5\xee \u1234\u3456')
+
+ def test_newline_in_quoted_section(self):
+ self.assertAsTokens([(True, u'foo\nbar\nbaz\n')], u'"foo\nbar\nbaz\n"')
+ self.assertAsTokens([(True, u'foo\nbar\nbaz\n')], u"'foo\nbar\nbaz\n'")
+
+ def test_escape_chars(self):
+ self.assertAsTokens([(False, u'foo\\bar')], u'foo\\bar')
+
+ def test_escape_quote(self):
+ self.assertAsTokens([(True, u'foo"bar')], u'"foo\\"bar"')
+ self.assertAsTokens([(True, u'foo\\"bar')], u"'foo\\\"bar'")
+
+ def test_double_escape(self):
+ self.assertAsTokens([(True, u'foo\\bar')], u'"foo\\\\bar"')
+ self.assertAsTokens([(True, u'foo\\\\bar')], u"'foo\\\\bar'")
+ self.assertAsTokens([(False, u'foo\\\\bar')], u"foo\\\\bar")
+
+
+class Test_CommandLineToArgv(tests.TestCaseInTempDir):
+
+ def assertCommandLine(self, expected, line):
+ self.assertEqual(expected, win32utils._command_line_to_argv(line))
+
+ def test_glob_paths(self):
+ self.build_tree(['a/', 'a/b.c', 'a/c.c', 'a/c.h'])
+ self.assertCommandLine([u'a/b.c', u'a/c.c'], 'a/*.c')
+ self.build_tree(['b/', 'b/b.c', 'b/d.c', 'b/d.h'])
+ self.assertCommandLine([u'a/b.c', u'b/b.c'], '*/b.c')
+ self.assertCommandLine([u'a/b.c', u'a/c.c', u'b/b.c', u'b/d.c'],
+ '*/*.c')
+ # Bash style, just pass through the argument if nothing matches
+ self.assertCommandLine([u'*/*.qqq'], '*/*.qqq')
+
+ def test_quoted_globs(self):
+ self.build_tree(['a/', 'a/b.c', 'a/c.c', 'a/c.h'])
+ self.assertCommandLine([u'a/*.c'], '"a/*.c"')
+ self.assertCommandLine([u'a/*.c'], "'a/*.c'")
+
+ def test_slashes_changed(self):
+ self.assertCommandLine([u'a/*.c'], '"a\\*.c"')
+ # Expands the glob, but nothing matches
+ self.assertCommandLine([u'a/*.c'], 'a\\*.c')
+ self.assertCommandLine([u'a/foo.c'], 'a\\foo.c')
=== modified file 'bzrlib/win32utils.py'
--- a/bzrlib/win32utils.py 2009-07-08 14:37:25 +0000
+++ b/bzrlib/win32utils.py 2009-11-04 22:26:25 +0000
@@ -19,8 +19,12 @@
Only one dependency: ctypes should be installed.
"""
+import glob
import os
+import re
+import shlex
import struct
+import StringIO
import sys
@@ -422,6 +426,26 @@
+def glob_one(possible_glob):
+ """Same as glob.glob().
+
+ work around bugs in glob.glob()
+ - Python bug #1001604 ("glob doesn't return unicode with ...")
+ - failing expansion for */* with non-iso-8859-* chars
+ """
+ corrected_glob, corrected = _ensure_with_dir(possible_glob)
+ glob_files = glob.glob(corrected_glob)
+
+ if not glob_files:
+ # special case to let the normal code path handle
+ # files that do not exist, etc.
+ glob_files = [possible_glob]
+ elif corrected:
+ glob_files = [_undo_ensure_with_dir(elem, corrected)
+ for elem in glob_files]
+ return [elem.replace(u'\\', u'/') for elem in glob_files]
+
+
def glob_expand(file_list):
"""Replacement for glob expansion by the shell.
@@ -435,25 +459,10 @@
"""
if not file_list:
return []
- import glob
expanded_file_list = []
for possible_glob in file_list:
- # work around bugs in glob.glob()
- # - Python bug #1001604 ("glob doesn't return unicode with ...")
- # - failing expansion for */* with non-iso-8859-* chars
- possible_glob, corrected = _ensure_with_dir(possible_glob)
- glob_files = glob.glob(possible_glob)
-
- if glob_files == []:
- # special case to let the normal code path handle
- # files that do not exists
- expanded_file_list.append(
- _undo_ensure_with_dir(possible_glob, corrected))
- else:
- glob_files = [_undo_ensure_with_dir(elem, corrected) for elem in glob_files]
- expanded_file_list += glob_files
-
- return [elem.replace(u'\\', u'/') for elem in expanded_file_list]
+ expanded_file_list.extend(glob_one(possible_glob))
+ return expanded_file_list
def get_app_path(appname):
@@ -511,6 +520,124 @@
trace.mutter('Unable to set hidden attribute on %r: %s', path, e)
+
+class UnicodeShlex(object):
+ """This is a very simplified version of shlex.shlex.
+
+ The main change is that it supports non-ascii input streams. The internal
+ structure is quite simplified relative to shlex.shlex, since we aren't
+ trying to handle multiple input streams, etc. In fact, we don't use a
+ file-like api either.
+ """
+
+ def __init__(self, uni_string):
+ self._input = uni_string
+ self._input_iter = iter(self._input)
+ self._whitespace_match = re.compile(u'\s').match
+ self._word_match = re.compile(u'\S').match
+ self._quote_chars = u'\'"'
+ # self._quote_match = re.compile(u'[\'"]').match
+ self._escape_match = lambda x: None # Never matches
+ self._escape = '\\'
+ # State can be
+ # ' ' - after whitespace, starting a new token
+ # 'a' - after text, currently working on a token
+ # '"' - after ", currently in a "-delimited quoted section
+ # "'" - after ', currently in a '-delimited quotod section
+ # "\" - after '\', checking the next char
+ self._state = ' '
+ self._token = [] # Current token being parsed
+
+ def _get_token(self):
+ # Were there quote chars as part of this token?
+ quoted = False
+ quoted_state = None
+ for nextchar in self._input_iter:
+ if self._state == ' ':
+ if self._whitespace_match(nextchar):
+ # if self._token: return token
+ continue
+ elif nextchar in self._quote_chars:
+ self._state = nextchar # quoted state
+ elif self._word_match(nextchar):
+ self._token.append(nextchar)
+ self._state = 'a'
+ else:
+ raise AssertionError('wtttf?')
+ elif self._state in self._quote_chars:
+ quoted = True
+ if nextchar == self._state: # End of quote
+ self._state = 'a' # posix allows 'foo'bar to translate to
+ # foobar
+ elif self._state == '"' and nextchar == self._escape:
+ quoted_state = self._state
+ self._state = nextchar
+ else:
+ self._token.append(nextchar)
+ elif self._state == self._escape:
+ if nextchar == '\\':
+ self._token.append('\\')
+ elif nextchar == '"':
+ self._token.append(nextchar)
+ else:
+ self._token.append('\\' + nextchar)
+ self._state = quoted_state
+ elif self._state == 'a':
+ if self._whitespace_match(nextchar):
+ if self._token:
+ break # emit this token
+ else:
+ continue # no token to emit
+ elif nextchar in self._quote_chars:
+ # Start a new quoted section
+ self._state = nextchar
+ # escape?
+ elif (self._word_match(nextchar)
+ or nextchar in self._quote_chars
+ # or whitespace_split?
+ ):
+ self._token.append(nextchar)
+ else:
+ raise AssertionError('state == "a", char: %r'
+ % (nextchar,))
+ else:
+ raise AssertionError('unknown state: %r' % (self._state,))
+ result = ''.join(self._token)
+ self._token = []
+ if not quoted and result == '':
+ result = None
+ return quoted, result
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ quoted, token = self._get_token()
+ if token is None:
+ raise StopIteration
+ return quoted, token
+
+
+def _command_line_to_argv(command_line):
+ """Convert a Unicode command line into a set of argv arguments.
+
+ This does wildcard expansion, etc. It is intended to make wildcards act
+ closer to how they work in posix shells, versus how they work by default on
+ Windows.
+ """
+ s = UnicodeShlex(command_line)
+ # Now that we've split the content, expand globs
+ # TODO: Use 'globbing' instead of 'glob.glob', this gives us stuff like
+ # '**/' style globs
+ args = []
+ for is_quoted, arg in s:
+ if is_quoted or not glob.has_magic(arg):
+ args.append(arg.replace(u'\\', u'/'))
+ else:
+ args.extend(glob_one(arg))
+ return args
+
+
if has_ctypes and winver != 'Windows 98':
def get_unicode_argv():
LPCWSTR = ctypes.c_wchar_p
@@ -520,21 +647,19 @@
GetCommandLine = prototype(("GetCommandLineW",
ctypes.windll.kernel32))
prototype = ctypes.WINFUNCTYPE(POINTER(LPCWSTR), LPCWSTR, POINTER(INT))
- CommandLineToArgv = prototype(("CommandLineToArgvW",
- ctypes.windll.shell32))
- c = INT(0)
- pargv = CommandLineToArgv(GetCommandLine(), ctypes.byref(c))
+ command_line = GetCommandLine()
# Skip the first argument, since we only care about parameters
- argv = [pargv[i] for i in range(1, c.value)]
+ argv = _command_line_to_argv(GetCommandLine())[1:]
if getattr(sys, 'frozen', None) is None:
# Invoked via 'python.exe' which takes the form:
# python.exe [PYTHON_OPTIONS] C:\Path\bzr [BZR_OPTIONS]
# we need to get only BZR_OPTIONS part,
- # so let's using sys.argv[1:] as reference to get the tail
- # of unicode argv
- tail_len = len(sys.argv[1:])
- ix = len(argv) - tail_len
- argv = argv[ix:]
+ # We already removed 'python.exe' so we remove everything up to and
+ # including the first non-option ('-') argument.
+ for idx in xrange(len(argv)):
+ if argv[idx][:1] != '-':
+ break
+ argv = argv[idx+1:]
return argv
else:
get_unicode_argv = None
More information about the bazaar-commits
mailing list