Rev 4841: (bialix) Change the commandline parser to be even closer to the plain in file:///home/pqm/archives/thelove/bzr/2.1/

Tue May 18 22:18:20 BST 2010

At file:///home/pqm/archives/thelove/bzr/2.1/

------------------------------------------------------------
revno: 4841 [merge]
revision-id: pqm at pqm.ubuntu.com-20100518211815-qtbvq8qj4gg0v7c8
parent: pqm at pqm.ubuntu.com-20100506084624-ii9nk3id2c5210us
parent: bialix at ukr.net-20100512161942-coa5f76tyjiheu02
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: 2.1
timestamp: Tue 2010-05-18 22:18:15 +0100
message:
  (bialix) Change the commandline parser to be even closer to the plain
  	win32 one.
modified:
  bzrlib/tests/test_win32utils.py test_win32utils.py-20070713181630-8xsrjymd3e8mgw23-108
  bzrlib/win32utils.py           win32console.py-20051021033308-123c6c929d04973d
=== modified file 'bzrlib/tests/test_win32utils.py'

--- a/bzrlib/tests/test_win32utils.py	2010-02-17 17:11:16 +0000
+++ b/bzrlib/tests/test_win32utils.py	2010-05-12 16:19:42 +0000
@@ -345,9 +345,29 @@
         self.assertAsTokens([(True, u'foo"bar')], u'"foo\\"bar"')
 
     def test_double_escape(self):
-        self.assertAsTokens([(True, u'foo\\bar')], u'"foo\\\\bar"')
+        self.assertAsTokens([(True, u'foo\\\\bar')], u'"foo\\\\bar"')
         self.assertAsTokens([(False, u'foo\\\\bar')], u"foo\\\\bar")
 
+    def test_n_backslashes_handling(self):
+        # https://bugs.launchpad.net/bzr/+bug/528944
+        # actually we care about the doubled backslashes when they're
+        # represents UNC paths.
+        # But in fact there is too much weird corner cases
+        # (see https://bugs.launchpad.net/tortoisebzr/+bug/569050)
+        # so to reproduce every bit of windows command-line handling
+        # could be not worth of efforts?
+        self.requireFeature(BackslashDirSeparatorFeature)
+        self.assertAsTokens([(True, r'\\host\path')], r'"\\host\path"')
+        self.assertAsTokens([(False, r'\\host\path')], r'\\host\path')
+        # handling of " after the 2n and 2n+1 backslashes
+        # inside and outside the quoted string
+        self.assertAsTokens([(True, r'\\'), (False, r'*.py')], r'"\\\\" *.py')
+        self.assertAsTokens([(True, r'\\" *.py')], r'"\\\\\" *.py"')
+        self.assertAsTokens([(True, r'\\ *.py')], r'\\\\" *.py"')
+        self.assertAsTokens([(False, r'\\"'), (False, r'*.py')],
+                            r'\\\\\" *.py')
+        self.assertAsTokens([(True, u'\\\\')], u'"\\\\')
+
 
 class Test_CommandLineToArgv(tests.TestCaseInTempDir):
 

=== modified file 'bzrlib/win32utils.py'
--- a/bzrlib/win32utils.py	2010-02-17 17:11:16 +0000
+++ b/bzrlib/win32utils.py	2010-05-12 16:19:42 +0000
@@ -536,70 +536,49 @@
         # self._quote_match = re.compile(u'[\'"]').match
         self._escape_match = lambda x: None # Never matches
         self._escape = '\\'
-        # State can be
-        #   ' ' - after whitespace, starting a new token
-        #   'a' - after text, currently working on a token
-        #   '"' - after ", currently in a "-delimited quoted section
-        #   "\" - after '\', checking the next char
-        self._state = ' '
         self._token = [] # Current token being parsed
 
     def _get_token(self):
         # Were there quote chars as part of this token?
-        quoted = False
-        quoted_state = None
+        quoted = None   # state:
+                        #  None - the string is not quoted
+                        #  empty string ('') - there was quoted substring
+                        #  double quote (") - we're inside quoted chunk
+        number_of_backslashes = 0
         for nextchar in self._input_iter:
-            if self._state == ' ':
-                if self._whitespace_match(nextchar):
-                    # if self._token: return token
-                    continue
-                elif nextchar in self._quote_chars:
-                    self._state = nextchar # quoted state
-                elif self._word_match(nextchar):
-                    self._token.append(nextchar)
-                    self._state = 'a'
-                else:
-                    raise AssertionError('wtttf?')
-            elif self._state in self._quote_chars:
-                quoted = True
-                if nextchar == self._state: # End of quote
-                    self._state = 'a' # posix allows 'foo'bar to translate to
-                                      # foobar
-                elif self._state == '"' and nextchar == self._escape:
-                    quoted_state = self._state
-                    self._state = nextchar
-                else:
-                    self._token.append(nextchar)
-            elif self._state == self._escape:
-                if nextchar == '\\':
-                    self._token.append('\\')
-                elif nextchar == '"':
-                    self._token.append(nextchar)
-                else:
-                    self._token.append('\\' + nextchar)
-                self._state = quoted_state
-            elif self._state == 'a':
-                if self._whitespace_match(nextchar):
-                    if self._token:
-                        break # emit this token
+            if self._whitespace_match(nextchar):
+                if quoted:
+                    self._token.append(nextchar)
+                elif self._token:
+                    break
+            elif nextchar == '\\':
+                number_of_backslashes += 1
+            elif nextchar in self._quote_chars:
+                if number_of_backslashes:
+                    self._token.append('\\'*(number_of_backslashes/2))
+                    if number_of_backslashes % 2:
+                        self._token.append('"')
                     else:
-                        continue # no token to emit
-                elif nextchar in self._quote_chars:
-                    # Start a new quoted section
-                    self._state = nextchar
-                # escape?
-                elif (self._word_match(nextchar)
-                      or nextchar in self._quote_chars
-                      # or whitespace_split?
-                      ):
-                    self._token.append(nextchar)
+                        if quoted:
+                            quoted = ''
+                        else:
+                            quoted = nextchar
+                    number_of_backslashes = 0
+                elif nextchar == quoted:
+                    # end of quoted string
+                    quoted = ''
                 else:
-                    raise AssertionError('state == "a", char: %r'
-                                         % (nextchar,))
+                    quoted = nextchar
             else:
-                raise AssertionError('unknown state: %r' % (self._state,))
+                if number_of_backslashes:
+                    self._token.append('\\'*number_of_backslashes)
+                    number_of_backslashes = 0
+                self._token.append(nextchar)
+        if number_of_backslashes > 0:
+            self._token.append('\\'*number_of_backslashes)
         result = ''.join(self._token)
         self._token = []
+        quoted = quoted is not None
         if not quoted and result == '':
             result = None
         return quoted, result