Rev 4787: Work on doing globbing, etc for all commands on Windows. in
John Arbash Meinel
john at
Wed Nov 4 22:04:07 GMT 2009
revno: 4787
revision-id: john at
parent: pqm at
committer: John Arbash Meinel <john at>
branch nick: 2.1.0b3-win32-shell-completion
timestamp: Wed 2009-11-04 16:03:46 -0600
Work on doing globbing, etc for all commands on Windows.
Implement a UnicodeShlex that takes the key bits from shlex.shlex.
Most notably, though, it works on a Unicode string, and not a collection
of input files. It also has a slightly different handling of \ because
those are frequently part of the path.
It also lets us know if a given section was quoted, so that we know if
we should skip glob expansion or not.
Then start expanding globs as appropriate.
-------------- next part --------------
=== modified file 'bzrlib/tests/'
--- a/bzrlib/tests/ 2009-07-03 14:26:34 +0000
+++ b/bzrlib/tests/ 2009-11-04 22:03:46 +0000
@@ -17,7 +17,11 @@
import os
import sys
-from bzrlib import osutils
+from bzrlib import (
+ osutils,
+ tests,
+ win32utils,
+ )
from bzrlib.tests import (
@@ -26,7 +30,6 @@
from bzrlib.win32utils import glob_expand, get_app_path
-from bzrlib import win32utils
# Features
@@ -261,3 +264,84 @@
path = osutils.abspath(u'\u1234\\.bzr')
+class TestUnicodeShlex(tests.TestCase):
+ def assertAsTokens(self, expected, line):
+ s = win32utils.UnicodeShlex(line)
+ self.assertEqual(expected, list(s))
+ def test_simple(self):
+ self.assertAsTokens([(False, u'foo'), (False, u'bar'), (False, u'baz')],
+ u'foo bar baz')
+ def test_ignore_multiple_spaces(self):
+ self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'foo bar')
+ def test_ignore_leading_space(self):
+ self.assertAsTokens([(False, u'foo'), (False, u'bar')], u' foo bar')
+ def test_ignore_trailing_space(self):
+ self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'foo bar ')
+ def test_posix_quotations(self):
+ self.assertAsTokens([(True, u'foo bar')], u'"foo bar"')
+ self.assertAsTokens([(True, u'foo bar')], u"'foo bar'")
+ self.assertAsTokens([(True, u'foo bar')], u"'fo''o b''ar'")
+ self.assertAsTokens([(True, u'foo bar')], u'"fo""o b""ar"')
+ self.assertAsTokens([(True, u'foo bar')], u'"fo"\'o b\'"ar"')
+ def test_nested_quotations(self):
+ self.assertAsTokens([(True, u'foo"" bar')], u"'foo\"\" bar'")
+ self.assertAsTokens([(True, u'foo\'\' bar')], u"\"foo'' bar\"")
+ def test_empty_result(self):
+ self.assertAsTokens([], u'')
+ self.assertAsTokens([], u' ')
+ def test_quoted_empty(self):
+ self.assertAsTokens([(True, '')], u'""')
+ self.assertAsTokens([(True, '')], u"''")
+ def test_unicode_chars(self):
+ self.assertAsTokens([(False, u'f\xb5\xee'), (False, u'\u1234\u3456')],
+ u'f\xb5\xee \u1234\u3456')
+ def test_newline_in_quoted_section(self):
+ self.assertAsTokens([(True, u'foo\nbar\nbaz\n')], u'"foo\nbar\nbaz\n"')
+ self.assertAsTokens([(True, u'foo\nbar\nbaz\n')], u"'foo\nbar\nbaz\n'")
+ def test_escape_chars(self):
+ self.assertAsTokens([(False, u'foo\\bar')], u'foo\\bar')
+ def test_escape_quote(self):
+ self.assertAsTokens([(True, u'foo"bar')], u'"foo\\"bar"')
+ self.assertAsTokens([(True, u'foo\\"bar')], u"'foo\\\"bar'")
+ def test_double_escape(self):
+ self.assertAsTokens([(True, u'foo\\bar')], u'"foo\\\\bar"')
+ self.assertAsTokens([(True, u'foo\\\\bar')], u"'foo\\\\bar'")
+ self.assertAsTokens([(False, u'foo\\\\bar')], u"foo\\\\bar")
+class Test_CommandLineToArgv(tests.TestCaseInTempDir):
+ def assertCommandLine(self, expected, line):
+ self.assertEqual(expected, win32utils._command_line_to_argv(line))
+ def test_glob_paths(self):
+ self.build_tree(['a/', 'a/b.c', 'a/c.c', 'a/c.h'])
+ self.assertCommandLine([u'a/b.c', u'a/c.c'], 'a/*.c')
+ self.build_tree(['b/', 'b/b.c', 'b/d.c', 'b/d.h'])
+ self.assertCommandLine([u'a/b.c', u'b/b.c'], '*/b.c')
+ self.assertCommandLine([u'a/b.c', u'a/c.c', u'b/b.c', u'b/d.c'],
+ '*/*.c')
+ # Bash style, just pass through the argument if nothing matches
+ self.assertCommandLine([u'*/*.qqq'], '*/*.qqq')
+ def test_quoted_globs(self):
+ self.build_tree(['a/', 'a/b.c', 'a/c.c', 'a/c.h'])
+ self.assertCommandLine([u'a/*.c'], '"a/*.c"')
+ self.assertCommandLine([u'a/*.c'], "'a/*.c'")
=== modified file 'bzrlib/'
--- a/bzrlib/ 2009-07-08 14:37:25 +0000
+++ b/bzrlib/ 2009-11-04 22:03:46 +0000
@@ -19,8 +19,12 @@
Only one dependency: ctypes should be installed.
+import glob
import os
+import re
+import shlex
import struct
+import StringIO
import sys
@@ -435,7 +439,6 @@
if not file_list:
return []
- import glob
expanded_file_list = []
for possible_glob in file_list:
# work around bugs in glob.glob()
@@ -511,6 +514,124 @@
trace.mutter('Unable to set hidden attribute on %r: %s', path, e)
+class UnicodeShlex(object):
+ """This is a very simplified version of shlex.shlex.
+ The main change is that it supports non-ascii input streams. The internal
+ structure is quite simplified relative to shlex.shlex, since we aren't
+ trying to handle multiple input streams, etc. In fact, we don't use a
+ file-like api either.
+ """
+ def __init__(self, uni_string):
+ self._input = uni_string
+ self._input_iter = iter(self._input)
+ self._whitespace_match = re.compile(u'\s').match
+ self._word_match = re.compile(u'\S').match
+ self._quote_chars = u'\'"'
+ # self._quote_match = re.compile(u'[\'"]').match
+ self._escape_match = lambda x: None # Never matches
+ self._escape = '\\'
+ # State can be
+ # ' ' - after whitespace, starting a new token
+ # 'a' - after text, currently working on a token
+ # '"' - after ", currently in a "-delimited quoted section
+ # "'" - after ', currently in a '-delimited quotod section
+ # "\" - after '\', checking the next char
+ self._state = ' '
+ self._token = [] # Current token being parsed
+ def _get_token(self):
+ # Were there quote chars as part of this token?
+ quoted = False
+ quoted_state = None
+ for nextchar in self._input_iter:
+ if self._state == ' ':
+ if self._whitespace_match(nextchar):
+ # if self._token: return token
+ continue
+ elif nextchar in self._quote_chars:
+ self._state = nextchar # quoted state
+ elif self._word_match(nextchar):
+ self._token.append(nextchar)
+ self._state = 'a'
+ else:
+ raise AssertionError('wtttf?')
+ elif self._state in self._quote_chars:
+ quoted = True
+ if nextchar == self._state: # End of quote
+ self._state = 'a' # posix allows 'foo'bar to translate to
+ # foobar
+ elif self._state == '"' and nextchar == self._escape:
+ quoted_state = self._state
+ self._state = nextchar
+ else:
+ self._token.append(nextchar)
+ elif self._state == self._escape:
+ if nextchar == '\\':
+ self._token.append('\\')
+ elif nextchar == '"':
+ self._token.append(nextchar)
+ else:
+ self._token.append('\\' + nextchar)
+ self._state = quoted_state
+ elif self._state == 'a':
+ if self._whitespace_match(nextchar):
+ if self._token:
+ break # emit this token
+ else:
+ continue # no token to emit
+ elif nextchar in self._quote_chars:
+ # Start a new quoted section
+ self._state = nextchar
+ # escape?
+ elif (self._word_match(nextchar)
+ or nextchar in self._quote_chars
+ # or whitespace_split?
+ ):
+ self._token.append(nextchar)
+ else:
+ raise AssertionError('state == "a", char: %r'
+ % (nextchar,))
+ else:
+ raise AssertionError('unknown state: %r' % (self._state,))
+ result = ''.join(self._token)
+ self._token = []
+ if not quoted and result == '':
+ result = None
+ return quoted, result
+ def __iter__(self):
+ return self
+ def next(self):
+ quoted, token = self._get_token()
+ if token is None:
+ raise StopIteration
+ return quoted, token
+def _command_line_to_argv(command_line):
+ """Convert a Unicode command line into a set of argv arguments.
+ This does wildcard expansion, etc. It is intended to make wildcards act
+ closer to how they work in posix shells, versus how they work by default on
+ Windows.
+ """
+ s = UnicodeShlex(command_line)
+ # Now that we've split the content, expand globs
+ # TODO: Use 'globbing' instead of 'glob.glob', this gives us stuff like
+ # '**/' style globs
+ args = []
+ for is_quoted, arg in s:
+ if is_quoted:
+ args.append(arg)
+ else:
+ args.extend(glob_expand([arg]))
+ return args
if has_ctypes and winver != 'Windows 98':
def get_unicode_argv():
LPCWSTR = ctypes.c_wchar_p
More information about the bazaar-commits
mailing list