Rev 5046: (mbp, for doxx) better win32 cmdline splitter in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Thu Feb 18 03:02:31 GMT 2010


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 5046 [merge]
revision-id: pqm at pqm.ubuntu.com-20100218030228-dtbmt8pdtf21ndo7
parent: pqm at pqm.ubuntu.com-20100218011016-cx9drbdydd2xmu2p
parent: mbp at sourcefrog.net-20100218021548-kegv1m3k54jxjc2p
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Thu 2010-02-18 03:02:28 +0000
message:
  (mbp, for doxx) better win32 cmdline splitter
added:
  bzrlib/cmdline.py              bzrlibcmdline.py-20100202043522-83yorxx3tcigi7ap-1
  bzrlib/tests/test_cmdline.py   bzrlibteststest_cmdl-20100202043522-83yorxx3tcigi7ap-2
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/commands.py             bzr.py-20050309040720-d10f4714595cf8c3
  bzrlib/diff.py                 diff.py-20050309040759-26944fbbf2ebbf36
  bzrlib/rules.py                properties.py-20080506032617-9k06uqalkf09ck0z-1
  bzrlib/tests/__init__.py       selftest.py-20050531073622-8d0e3c8845c97a64
  bzrlib/tests/test_commands.py  test_command.py-20051019190109-3b17be0f52eaa7a8
  bzrlib/tests/test_diff.py      testdiff.py-20050727164403-d1a3496ebb12e339
  bzrlib/tests/test_win32utils.py test_win32utils.py-20070713181630-8xsrjymd3e8mgw23-108
  bzrlib/win32utils.py           win32console.py-20051021033308-123c6c929d04973d
=== modified file 'NEWS'
--- a/NEWS	2010-02-17 15:50:09 +0000
+++ b/NEWS	2010-02-18 03:02:28 +0000
@@ -40,6 +40,10 @@
   automatically or by running ``apport-bug``.  No information is sent
   without specific permission from the user.  (Martin Pool, #515052)
 
+* Parsing of command lines, for example in ``diff --using``, no longer
+  treats backslash as an escape character on Windows.   (Gordon Tyler,
+  #392248)
+
 * Tree-shape conflicts can be resolved by providing ``--take-this`` and
   ``--take-other`` to the ``bzr resolve`` command. Just marking the conflict
   as resolved is still accessible via the ``--done`` default action.

=== added file 'bzrlib/cmdline.py'
--- a/bzrlib/cmdline.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/cmdline.py	2010-02-12 05:40:17 +0000
@@ -0,0 +1,160 @@
+# Copyright (C) 2010 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Unicode-compatible command-line splitter for all platforms."""
+
+import re
+
+
+_whitespace_match = re.compile(u'\s', re.UNICODE).match
+
+
+class _PushbackSequence(object):
+    def __init__(self, orig):
+        self._iter = iter(orig)
+        self._pushback_buffer = []
+        
+    def next(self):
+        if len(self._pushback_buffer) > 0:
+            return self._pushback_buffer.pop()
+        else:
+            return self._iter.next()
+    
+    def pushback(self, char):
+        self._pushback_buffer.append(char)
+        
+    def __iter__(self):
+        return self
+
+
+class _Whitespace(object):
+    def process(self, next_char, context):
+        if _whitespace_match(next_char):
+            if len(context.token) > 0:
+                return None
+            else:
+                return self
+        elif next_char in context.allowed_quote_chars:
+            context.quoted = True
+            return _Quotes(next_char, self)
+        elif next_char == u'\\':
+            return _Backslash(self)
+        else:
+            context.token.append(next_char)
+            return _Word()
+
+
+class _Quotes(object):
+    def __init__(self, quote_char, exit_state):
+        self.quote_char = quote_char
+        self.exit_state = exit_state
+
+    def process(self, next_char, context):
+        if next_char == u'\\':
+            return _Backslash(self)
+        elif next_char == self.quote_char:
+            return self.exit_state
+        else:
+            context.token.append(next_char)
+            return self
+
+
+class _Backslash(object):
+    # See http://msdn.microsoft.com/en-us/library/bb776391(VS.85).aspx
+    def __init__(self, exit_state):
+        self.exit_state = exit_state
+        self.count = 1
+        
+    def process(self, next_char, context):
+        if next_char == u'\\':
+            self.count += 1
+            return self
+        elif next_char in context.allowed_quote_chars:
+            # 2N backslashes followed by a quote are N backslashes
+            context.token.append(u'\\' * (self.count/2))
+            # 2N+1 backslashes follwed by a quote are N backslashes followed by
+            # the quote which should not be processed as the start or end of
+            # the quoted arg
+            if self.count % 2 == 1:
+                # odd number of \ escapes the quote
+                context.token.append(next_char)
+            else:
+                # let exit_state handle next_char
+                context.seq.pushback(next_char)
+            self.count = 0
+            return self.exit_state
+        else:
+            # N backslashes not followed by a quote are just N backslashes
+            if self.count > 0:
+                context.token.append(u'\\' * self.count)
+                self.count = 0
+            # let exit_state handle next_char
+            context.seq.pushback(next_char)
+            return self.exit_state
+    
+    def finish(self, context):
+        if self.count > 0:
+            context.token.append(u'\\' * self.count)
+
+
+class _Word(object):
+    def process(self, next_char, context):
+        if _whitespace_match(next_char):
+            return None
+        elif next_char in context.allowed_quote_chars:
+            return _Quotes(next_char, self)
+        elif next_char == u'\\':
+            return _Backslash(self)
+        else:
+            context.token.append(next_char)
+            return self
+
+
+class Splitter(object):
+    def __init__(self, command_line, single_quotes_allowed):
+        self.seq = _PushbackSequence(command_line)
+        self.allowed_quote_chars = u'"'
+        if single_quotes_allowed:
+            self.allowed_quote_chars += u"'"
+    
+    def __iter__(self):
+        return self
+    
+    def next(self):
+        quoted, token = self._get_token()
+        if token is None:
+            raise StopIteration
+        return quoted, token
+    
+    def _get_token(self):
+        self.quoted = False
+        self.token = []
+        state = _Whitespace()
+        for next_char in self.seq:
+            state = state.process(next_char, self)
+            if state is None:
+                break
+        if not state is None and not getattr(state, 'finish', None) is None:
+            state.finish(self)
+        result = u''.join(self.token)
+        if not self.quoted and result == '':
+            result = None
+        return self.quoted, result
+
+
+def split(unsplit, single_quotes_allowed=True):
+    splitter = Splitter(unsplit, single_quotes_allowed=single_quotes_allowed)
+    return [arg for quoted, arg in splitter]

=== modified file 'bzrlib/commands.py'
--- a/bzrlib/commands.py	2010-02-11 01:13:46 +0000
+++ b/bzrlib/commands.py	2010-02-12 04:02:50 +0000
@@ -41,6 +41,7 @@
 import bzrlib
 from bzrlib import (
     cleanup,
+    cmdline,
     debug,
     errors,
     option,
@@ -873,11 +874,6 @@
     return ret
 
 
-def shlex_split_unicode(unsplit):
-    import shlex
-    return [u.decode('utf-8') for u in shlex.split(unsplit.encode('utf-8'))]
-
-
 def get_alias(cmd, config=None):
     """Return an expanded alias, or None if no alias exists.
 
@@ -893,7 +889,7 @@
         config = bzrlib.config.GlobalConfig()
     alias = config.get_alias(cmd)
     if (alias):
-        return shlex_split_unicode(alias)
+        return cmdline.split(alias)
     return None
 
 

=== modified file 'bzrlib/diff.py'
--- a/bzrlib/diff.py	2010-01-20 23:26:31 +0000
+++ b/bzrlib/diff.py	2010-02-02 06:30:43 +0000
@@ -31,7 +31,7 @@
 from bzrlib import (
     branch as _mod_branch,
     bzrdir,
-    commands,
+    cmdline,
     errors,
     osutils,
     patiencediff,
@@ -683,7 +683,7 @@
     @classmethod
     def from_string(klass, command_string, old_tree, new_tree, to_file,
                     path_encoding='utf-8'):
-        command_template = commands.shlex_split_unicode(command_string)
+        command_template = cmdline.split(command_string)
         if '@' not in command_string:
             command_template.extend(['@old_path', '@new_path'])
         return klass(command_template, old_tree, new_tree, to_file,

=== modified file 'bzrlib/rules.py'
--- a/bzrlib/rules.py	2009-05-07 05:08:46 +0000
+++ b/bzrlib/rules.py	2010-02-02 06:30:43 +0000
@@ -21,7 +21,7 @@
 
 from bzrlib import (
     config,
-    commands,
+    cmdline,
     errors,
     globbing,
     osutils,
@@ -81,8 +81,7 @@
         self.pattern_to_section = {}
         for s in sections:
             if s.startswith(FILE_PREFS_PREFIX):
-                file_patterns = commands.shlex_split_unicode(
-                    s[FILE_PREFS_PREFIX_LEN:])
+                file_patterns = cmdline.split(s[FILE_PREFS_PREFIX_LEN:])
                 patterns.extend(file_patterns)
                 for fp in file_patterns:
                     self.pattern_to_section[fp] = s

=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py	2010-02-17 05:12:01 +0000
+++ b/bzrlib/tests/__init__.py	2010-02-18 02:15:48 +0000
@@ -3628,6 +3628,7 @@
         'bzrlib.tests.test_chunk_writer',
         'bzrlib.tests.test_clean_tree',
         'bzrlib.tests.test_cleanup',
+        'bzrlib.tests.test_cmdline',
         'bzrlib.tests.test_commands',
         'bzrlib.tests.test_commit',
         'bzrlib.tests.test_commit_merge',

=== added file 'bzrlib/tests/test_cmdline.py'
--- a/bzrlib/tests/test_cmdline.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/test_cmdline.py	2010-02-18 02:15:48 +0000
@@ -0,0 +1,93 @@
+# Copyright (C) 2010 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+from bzrlib import (
+    cmdline,
+    tests)
+
+class TestSplitter(tests.TestCase):
+
+    def assertAsTokens(self, expected, line, single_quotes_allowed=False):
+        s = cmdline.Splitter(line, single_quotes_allowed=single_quotes_allowed)
+        self.assertEqual(expected, list(s))
+
+    def test_simple(self):
+        self.assertAsTokens([(False, u'foo'), (False, u'bar'), (False, u'baz')],
+                            u'foo bar baz')
+
+    def test_ignore_multiple_spaces(self):
+        self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'foo  bar')
+
+    def test_ignore_leading_space(self):
+        self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'  foo bar')
+
+    def test_ignore_trailing_space(self):
+        self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'foo bar  ')
+
+    def test_posix_quotations(self):
+        self.assertAsTokens([(True, u'foo bar')], u"'foo bar'",
+            single_quotes_allowed=True)
+        self.assertAsTokens([(True, u'foo bar')], u"'fo''o b''ar'",
+            single_quotes_allowed=True)
+        self.assertAsTokens([(True, u'foo bar')], u'"fo""o b""ar"',
+            single_quotes_allowed=True)
+        self.assertAsTokens([(True, u'foo bar')], u'"fo"\'o b\'"ar"',
+            single_quotes_allowed=True)
+
+    def test_nested_quotations(self):
+        self.assertAsTokens([(True, u'foo"" bar')], u"\"foo\\\"\\\" bar\"")
+        self.assertAsTokens([(True, u'foo\'\' bar')], u"\"foo'' bar\"")
+        self.assertAsTokens([(True, u'foo\'\' bar')], u"\"foo'' bar\"",
+            single_quotes_allowed=True)
+        self.assertAsTokens([(True, u'foo"" bar')], u"'foo\"\" bar'",
+            single_quotes_allowed=True)
+
+    def test_empty_result(self):
+        self.assertAsTokens([], u'')
+        self.assertAsTokens([], u'    ')
+
+    def test_quoted_empty(self):
+        self.assertAsTokens([(True, '')], u'""')
+        self.assertAsTokens([(False, u"''")], u"''")
+        self.assertAsTokens([(True, '')], u"''", single_quotes_allowed=True)
+
+    def test_unicode_chars(self):
+        self.assertAsTokens([(False, u'f\xb5\xee'), (False, u'\u1234\u3456')],
+                             u'f\xb5\xee \u1234\u3456')
+
+    def test_newline_in_quoted_section(self):
+        self.assertAsTokens([(True, u'foo\nbar\nbaz\n')], u'"foo\nbar\nbaz\n"')
+        self.assertAsTokens([(True, u'foo\nbar\nbaz\n')], u"'foo\nbar\nbaz\n'",
+            single_quotes_allowed=True)
+
+    def test_escape_chars(self):
+        self.assertAsTokens([(False, u'foo\\bar')], u'foo\\bar')
+
+    def test_escape_quote(self):
+        self.assertAsTokens([(True, u'foo"bar')], u'"foo\\"bar"')
+        self.assertAsTokens([(True, u'foo\\"bar')], u'"foo\\\\\\"bar"')
+        self.assertAsTokens([(True, u'foo\\bar')], u'"foo\\\\"bar"')
+
+    def test_double_escape(self):
+        self.assertAsTokens([(True, u'foo\\\\bar')], u'"foo\\\\bar"')
+        self.assertAsTokens([(False, u'foo\\\\bar')], u"foo\\\\bar")
+        
+    def test_multiple_quoted_args(self):
+        self.assertAsTokens([(True, u'x x'), (True, u'y y')],
+            u'"x x" "y y"')
+        self.assertAsTokens([(True, u'x x'), (True, u'y y')],
+            u'"x x" \'y y\'', single_quotes_allowed=True)

=== modified file 'bzrlib/tests/test_commands.py'
--- a/bzrlib/tests/test_commands.py	2009-05-23 21:01:51 +0000
+++ b/bzrlib/tests/test_commands.py	2010-01-14 13:17:33 +0000
@@ -111,7 +111,7 @@
 
     def test_unicode(self):
         my_config = self._get_config("[ALIASES]\n"
-            u"iam=whoami 'Erik B\u00e5gfors <erik at bagfors.nu>'\n")
+            u'iam=whoami "Erik B\u00e5gfors <erik at bagfors.nu>"\n')
         self.assertEqual([u'whoami', u'Erik B\u00e5gfors <erik at bagfors.nu>'],
                           commands.get_alias("iam", config=my_config))
 

=== modified file 'bzrlib/tests/test_diff.py'
--- a/bzrlib/tests/test_diff.py	2009-12-22 15:50:40 +0000
+++ b/bzrlib/tests/test_diff.py	2010-01-14 13:17:33 +0000
@@ -45,6 +45,8 @@
 from bzrlib.revisiontree import RevisionTree
 from bzrlib.revisionspec import RevisionSpec
 
+from bzrlib.tests.test_win32utils import BackslashDirSeparatorFeature
+
 
 class _AttribFeature(Feature):
 
@@ -1292,12 +1294,22 @@
             diff_obj.command_template)
 
     def test_from_string_u5(self):
-        diff_obj = DiffFromTool.from_string('diff -u\\ 5', None, None, None)
+        diff_obj = DiffFromTool.from_string('diff "-u 5"', None, None, None)
         self.addCleanup(diff_obj.finish)
         self.assertEqual(['diff', '-u 5', '@old_path', '@new_path'],
                          diff_obj.command_template)
         self.assertEqual(['diff', '-u 5', 'old-path', 'new-path'],
                          diff_obj._get_command('old-path', 'new-path'))
+        
+    def test_from_string_path_with_backslashes(self):
+        self.requireFeature(BackslashDirSeparatorFeature)
+        tool = 'C:\\Tools\\Diff.exe'
+        diff_obj = DiffFromTool.from_string(tool, None, None, None)
+        self.addCleanup(diff_obj.finish)
+        self.assertEqual(['C:\\Tools\\Diff.exe', '@old_path', '@new_path'],
+                         diff_obj.command_template)
+        self.assertEqual(['C:\\Tools\\Diff.exe', 'old-path', 'new-path'],
+                         diff_obj._get_command('old-path', 'new-path'))
 
     def test_execute(self):
         output = StringIO()

=== modified file 'bzrlib/tests/test_win32utils.py'
--- a/bzrlib/tests/test_win32utils.py	2010-01-25 17:48:22 +0000
+++ b/bzrlib/tests/test_win32utils.py	2010-02-02 06:39:31 +0000
@@ -288,70 +288,15 @@
 
 
 
-class TestUnicodeShlex(tests.TestCase):
-
-    def assertAsTokens(self, expected, line):
-        s = win32utils.UnicodeShlex(line)
-        self.assertEqual(expected, list(s))
-
-    def test_simple(self):
-        self.assertAsTokens([(False, u'foo'), (False, u'bar'), (False, u'baz')],
-                            u'foo bar baz')
-
-    def test_ignore_multiple_spaces(self):
-        self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'foo  bar')
-
-    def test_ignore_leading_space(self):
-        self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'  foo bar')
-
-    def test_ignore_trailing_space(self):
-        self.assertAsTokens([(False, u'foo'), (False, u'bar')], u'foo bar  ')
-
-    def test_posix_quotations(self):
-        self.assertAsTokens([(True, u'foo bar')], u'"foo bar"')
-        self.assertAsTokens([(False, u"'fo''o"), (False, u"b''ar'")],
-            u"'fo''o b''ar'")
-        self.assertAsTokens([(True, u'foo bar')], u'"fo""o b""ar"')
-        self.assertAsTokens([(True, u"fo'o"), (True, u"b'ar")],
-            u'"fo"\'o b\'"ar"')
-
-    def test_nested_quotations(self):
-        self.assertAsTokens([(True, u'foo"" bar')], u"\"foo\\\"\\\" bar\"")
-        self.assertAsTokens([(True, u'foo\'\' bar')], u"\"foo'' bar\"")
-
-    def test_empty_result(self):
-        self.assertAsTokens([], u'')
-        self.assertAsTokens([], u'    ')
-
-    def test_quoted_empty(self):
-        self.assertAsTokens([(True, '')], u'""')
-        self.assertAsTokens([(False, u"''")], u"''")
-
-    def test_unicode_chars(self):
-        self.assertAsTokens([(False, u'f\xb5\xee'), (False, u'\u1234\u3456')],
-                             u'f\xb5\xee \u1234\u3456')
-
-    def test_newline_in_quoted_section(self):
-        self.assertAsTokens([(True, u'foo\nbar\nbaz\n')], u'"foo\nbar\nbaz\n"')
-
-    def test_escape_chars(self):
-        self.assertAsTokens([(False, u'foo\\bar')], u'foo\\bar')
-
-    def test_escape_quote(self):
-        self.assertAsTokens([(True, u'foo"bar')], u'"foo\\"bar"')
-
-    def test_double_escape(self):
-        self.assertAsTokens([(True, u'foo\\bar')], u'"foo\\\\bar"')
-        self.assertAsTokens([(False, u'foo\\\\bar')], u"foo\\\\bar")
-
 
 class Test_CommandLineToArgv(tests.TestCaseInTempDir):
 
-    def assertCommandLine(self, expected, line):
+    def assertCommandLine(self, expected, line, single_quotes_allowed=False):
         # Strictly speaking we should respect parameter order versus glob
         # expansions, but it's not really worth the effort here
-        self.assertEqual(expected,
-                         sorted(win32utils._command_line_to_argv(line)))
+        argv = win32utils._command_line_to_argv(line,
+                single_quotes_allowed=single_quotes_allowed)
+        self.assertEqual(expected, sorted(argv))
 
     def test_glob_paths(self):
         self.build_tree(['a/', 'a/b.c', 'a/c.c', 'a/c.h'])
@@ -367,19 +312,25 @@
         self.build_tree(['a/', 'a/b.c', 'a/c.c', 'a/c.h'])
         self.assertCommandLine([u'a/*.c'], '"a/*.c"')
         self.assertCommandLine([u"'a/*.c'"], "'a/*.c'")
+        self.assertCommandLine([u'a/*.c'], "'a/*.c'",
+            single_quotes_allowed=True)
 
     def test_slashes_changed(self):
         # Quoting doesn't change the supplied args
         self.assertCommandLine([u'a\\*.c'], '"a\\*.c"')
+        self.assertCommandLine([u'a\\*.c'], "'a\\*.c'",
+            single_quotes_allowed=True)
         # Expands the glob, but nothing matches, swaps slashes
         self.assertCommandLine([u'a/*.c'], 'a\\*.c')
         self.assertCommandLine([u'a/?.c'], 'a\\?.c')
         # No glob, doesn't touch slashes
         self.assertCommandLine([u'a\\foo.c'], 'a\\foo.c')
 
-    def test_no_single_quote_supported(self):
+    def test_single_quote_support(self):
         self.assertCommandLine(["add", "let's-do-it.txt"],
             "add let's-do-it.txt")
+        self.assertCommandLine(["add", "lets do it.txt"],
+            "add 'lets do it.txt'", single_quotes_allowed=True)
 
     def test_case_insensitive_globs(self):
         self.requireFeature(tests.CaseInsCasePresFilenameFeature)

=== modified file 'bzrlib/win32utils.py'
--- a/bzrlib/win32utils.py	2010-02-04 16:06:36 +0000
+++ b/bzrlib/win32utils.py	2010-02-12 04:02:50 +0000
@@ -25,6 +25,7 @@
 import struct
 import sys
 
+from bzrlib import cmdline
 
 # Windows version
 if sys.platform == 'win32':
@@ -522,112 +523,21 @@
             trace.mutter('Unable to set hidden attribute on %r: %s', path, e)
 
 
-
-class UnicodeShlex(object):
-    """This is a very simplified version of shlex.shlex.
-
-    The main change is that it supports non-ascii input streams. The internal
-    structure is quite simplified relative to shlex.shlex, since we aren't
-    trying to handle multiple input streams, etc. In fact, we don't use a
-    file-like api either.
-    """
-
-    def __init__(self, uni_string):
-        self._input = uni_string
-        self._input_iter = iter(self._input)
-        self._whitespace_match = re.compile(u'\s').match
-        self._word_match = re.compile(u'\S').match
-        self._quote_chars = u'"'
-        # self._quote_match = re.compile(u'[\'"]').match
-        self._escape_match = lambda x: None # Never matches
-        self._escape = '\\'
-        # State can be
-        #   ' ' - after whitespace, starting a new token
-        #   'a' - after text, currently working on a token
-        #   '"' - after ", currently in a "-delimited quoted section
-        #   "\" - after '\', checking the next char
-        self._state = ' '
-        self._token = [] # Current token being parsed
-
-    def _get_token(self):
-        # Were there quote chars as part of this token?
-        quoted = False
-        quoted_state = None
-        for nextchar in self._input_iter:
-            if self._state == ' ':
-                if self._whitespace_match(nextchar):
-                    # if self._token: return token
-                    continue
-                elif nextchar in self._quote_chars:
-                    self._state = nextchar # quoted state
-                elif self._word_match(nextchar):
-                    self._token.append(nextchar)
-                    self._state = 'a'
-                else:
-                    raise AssertionError('wtttf?')
-            elif self._state in self._quote_chars:
-                quoted = True
-                if nextchar == self._state: # End of quote
-                    self._state = 'a' # posix allows 'foo'bar to translate to
-                                      # foobar
-                elif self._state == '"' and nextchar == self._escape:
-                    quoted_state = self._state
-                    self._state = nextchar
-                else:
-                    self._token.append(nextchar)
-            elif self._state == self._escape:
-                if nextchar == '\\':
-                    self._token.append('\\')
-                elif nextchar == '"':
-                    self._token.append(nextchar)
-                else:
-                    self._token.append('\\' + nextchar)
-                self._state = quoted_state
-            elif self._state == 'a':
-                if self._whitespace_match(nextchar):
-                    if self._token:
-                        break # emit this token
-                    else:
-                        continue # no token to emit
-                elif nextchar in self._quote_chars:
-                    # Start a new quoted section
-                    self._state = nextchar
-                # escape?
-                elif (self._word_match(nextchar)
-                      or nextchar in self._quote_chars
-                      # or whitespace_split?
-                      ):
-                    self._token.append(nextchar)
-                else:
-                    raise AssertionError('state == "a", char: %r'
-                                         % (nextchar,))
-            else:
-                raise AssertionError('unknown state: %r' % (self._state,))
-        result = ''.join(self._token)
-        self._token = []
-        if not quoted and result == '':
-            result = None
-        return quoted, result
-
-    def __iter__(self):
-        return self
-
-    def next(self):
-        quoted, token = self._get_token()
-        if token is None:
-            raise StopIteration
-        return quoted, token
-
-
-def _command_line_to_argv(command_line):
-    """Convert a Unicode command line into a set of argv arguments.
-
-    This does wildcard expansion, etc. It is intended to make wildcards act
-    closer to how they work in posix shells, versus how they work by default on
-    Windows.
-    """
-    s = UnicodeShlex(command_line)
-    # Now that we've split the content, expand globs
+def _command_line_to_argv(command_line, single_quotes_allowed=False):
+    """Convert a Unicode command line into a list of argv arguments.
+
+    It performs wildcard expansion to make wildcards act closer to how they
+    work in posix shells, versus how they work by default on Windows. Quoted
+    arguments are left untouched.
+
+    :param command_line: The unicode string to split into an arg list.
+    :param single_quotes_allowed: Whether single quotes are accepted as quoting
+                                  characters like double quotes. False by
+                                  default.
+    :return: A list of unicode strings.
+    """
+    s = cmdline.Splitter(command_line, single_quotes_allowed=single_quotes_allowed)
+    # Now that we've split the content, expand globs if necessary
     # TODO: Use 'globbing' instead of 'glob.glob', this gives us stuff like
     #       '**/' style globs
     args = []
@@ -641,14 +551,12 @@
 
 if has_ctypes and winver != 'Windows 98':
     def get_unicode_argv():
-        LPCWSTR = ctypes.c_wchar_p
-        INT = ctypes.c_int
-        POINTER = ctypes.POINTER
-        prototype = ctypes.WINFUNCTYPE(LPCWSTR)
-        GetCommandLine = prototype(("GetCommandLineW",
-                                    ctypes.windll.kernel32))
-        prototype = ctypes.WINFUNCTYPE(POINTER(LPCWSTR), LPCWSTR, POINTER(INT))
-        command_line = GetCommandLine()
+        prototype = ctypes.WINFUNCTYPE(ctypes.c_wchar_p)
+        GetCommandLineW = prototype(("GetCommandLineW",
+                                     ctypes.windll.kernel32))
+        command_line = GetCommandLineW()
+        if command_line is None:
+            raise ctypes.WinError()
         # Skip the first argument, since we only care about parameters
         argv = _command_line_to_argv(command_line)[1:]
         if getattr(sys, 'frozen', None) is None:




More information about the bazaar-commits mailing list