[patch] encoding branch

Wouter van Heyst larstiq at larstiq.dyndns.org
Wed Jun 7 18:42:44 BST 2006


Hi,

John and I went through his encoding branch and came up with the
following patch. It's a mere 5800 lines for your reviewing pleasure.

Wouter van Heyst
-------------- next part --------------
=== added file 'bzrlib/tests/EncodingAdapter.py'
--- /dev/null	
+++ bzrlib/tests/EncodingAdapter.py	
@@ -0,0 +1,123 @@
+# Copyright (C) 2006 by Canonical Ltd
+# -*- coding: utf-8 -*-
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Adapter for running test cases against multiple encodings."""
+
+from copy import deepcopy
+
+from bzrlib.tests import TestSuite
+
+
+# prefix for micro (1/1000000)
+_mu = u'\xb5'
+
+# Swedish?
+_erik = u'Erik B\xe5gfors'
+
+# Swedish 'räksmörgås' means shrimp sandwich
+_shrimp_sandwich = u'r\xe4ksm\xf6rg\xe5s'
+
+# Arabic, probably only Unicode encodings can handle this one
+_juju = u'\u062c\u0648\u062c\u0648'
+
+# iso-8859-1 alternative for juju
+_juju_alt = u'j\xfbj\xfa'
+
+# Russian, 'Alexander' in russian
+_alexander = u'\u0410\u043b\u0435\u043a\u0441\u0430\u043d\u0434\u0440'
+# No idea if this means anything, but we need another string
+_alex = u'\u0410\u043b\u0435\u043a'
+
+# Kanji
+# It is a kanji sequence for nihonjin, or Japanese in English.
+# 
+# '\u4eba' being person, 'u\65e5' sun and '\u672c' origin. Ie,
+# sun-origin-person, 'native from the land where the sun rises'. Note, I'm
+# not a fluent speaker, so this is just my crude breakdown.
+# 
+# Wouter van Heyst
+_nihonjin = u'\u65e5\u672c\u4eba'
+
+# Czech
+# It's what is usually used for showing how fonts look, because it contains
+# most accented characters, ie. in places where Englishman use 'Quick brown fox
+# jumped over a lazy dog'. The literal translation of the Czech version would
+# be something like 'Yellow horse groaned devilish codes'. Actually originally
+# the last word used to be 'ódy' (odes). The 'k' was added as a pun when using
+# the sentece to check whether one has properly set encoding.
+_yellow_horse = (u'\u017dlu\u0165ou\u010dk\xfd k\u016f\u0148'
+                 u' \xfap\u011bl \u010f\xe1belsk\xe9 k\xf3dy')
+_yellow = u'\u017dlu\u0165ou\u010dk\xfd'
+_someone = u'Some\u016f\u0148\u011b'
+_something = u'\u0165ou\u010dk\xfd'
+
+# Hebrew
+# Shalom -> 'hello' or 'peace', used as a common greeting
+_shalom = u'\u05e9\u05dc\u05d5\u05dd'
+
+
+class EncodingTestAdapter(object):
+    """A tool to generate a suite, testing multiple encodings for a single test.
+    
+    This is similar to bzrlib.transport.TransportTestProviderAdapter.
+    It is done by copying the test once for each encoding, and injecting
+    the encoding name, and the list of valid strings for that encoding.
+    Each copy is also given a new id() to make it easy to identify.
+    """
+
+    _encodings = [
+        # Permutation 1 of utf-8
+        ('utf-8', 1, {'committer':_erik
+                  , 'message':_yellow_horse
+                  , 'filename':_shrimp_sandwich
+                  , 'directory':_nihonjin}),
+        # Permutation 2 of utf-8
+        ('utf-8', 2, {'committer':_alexander
+                  , 'message':u'Testing ' + _mu
+                  , 'filename':_shalom
+                  , 'directory':_juju}),
+        ('iso-8859-1', 0, {'committer':_erik
+                  , 'message':u'Testing ' + _mu
+                  , 'filename':_juju_alt
+                  , 'directory':_shrimp_sandwich}),
+        ('iso-8859-2', 0, {'committer':_someone
+                  , 'message':_yellow_horse
+                  , 'filename':_yellow
+                  , 'directory':_something}),
+        ('cp1251', 0, {'committer':_alexander
+                  , 'message':u'Testing ' + _mu
+                  , 'filename':_alex
+                  , 'directory':_alex + 'dir'}),
+    ]
+
+    def adapt(self, test):
+        result = TestSuite()
+        for encoding, count, info in self._encodings:
+            new_test = deepcopy(test)
+            new_test.encoding = encoding
+            new_test.info = info
+            def make_new_test_id():
+                if count:
+                    new_id = "%s(%s,%s)" % (new_test.id(), encoding, count)
+                else:
+                    new_id = "%s(%s)" % (new_test.id(), encoding)
+                return lambda: new_id
+            new_test.id = make_new_test_id()
+            result.addTest(new_test)
+        return result
+
+

=== added file 'bzrlib/tests/blackbox/test_command_encoding.py'
--- /dev/null	
+++ bzrlib/tests/blackbox/test_command_encoding.py	
@@ -0,0 +1,118 @@
+# Copyright (C) 2005 by Canonical Ltd
+# -*- coding: utf-8 -*-
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Tests for the Command.encoding_type interface."""
+
+from bzrlib.tests import TestCase
+from bzrlib.commands import Command, register_command, plugin_cmds
+
+
+class cmd_echo_exact(Command):
+    """This command just repeats what it is given.
+
+    It decodes the argument, and then writes it to stdout.
+    """
+
+    takes_args = ['text']
+    encoding_type = 'exact'
+
+    def run(self, text=None):
+        self.outf.write(text)
+
+
+class cmd_echo_strict(cmd_echo_exact):
+    """Replace bogus unicode characters."""
+
+    encoding_type = 'strict'
+
+
+class cmd_echo_replace(cmd_echo_exact):
+    """Replace bogus unicode characters."""
+
+    encoding_type = 'replace'
+
+
+class TestCommandEncoding(TestCase):
+    
+    def test_exact(self):
+        def bzr(*args, **kwargs):
+            return self.run_bzr(*args, **kwargs)[0]
+
+        register_command(cmd_echo_exact)
+        try:
+            self.assertEqual('foo', bzr('echo-exact', 'foo'))
+            # This is cheating a little bit, because 'foo\xb5' shouldn't
+            # get past main()
+            self.assertEqual('foo\xb5', bzr('echo-exact', 'foo\xb5'))
+            # Exact should fail to decode the string
+            bzr('echo-exact', u'foo\xb5', retcode=3)
+        finally:
+            plugin_cmds.pop('echo-exact')
+
+    def test_strict_utf8(self):
+        def bzr(*args, **kwargs):
+            kwargs['encoding'] = 'utf-8'
+            return self.run_bzr(*args, **kwargs)[0]
+
+        register_command(cmd_echo_strict)
+        try:
+            self.assertEqual('foo', bzr('echo-strict', 'foo'))
+            self.assertEqual(u'foo\xb5'.encode('utf-8'),
+                bzr('echo-strict', u'foo\xb5'))
+        finally:
+            plugin_cmds.pop('echo-strict')
+
+    def test_strict_ascii(self):
+        def bzr(*args, **kwargs):
+            kwargs['encoding'] = 'ascii'
+            return self.run_bzr(*args, **kwargs)[0]
+
+        register_command(cmd_echo_strict)
+        try:
+            self.assertEqual('foo', bzr('echo-strict', 'foo'))
+            # ascii can't encode \xb5
+            bzr('echo-strict', u'foo\xb5', retcode=3)
+        finally:
+            plugin_cmds.pop('echo-strict')
+
+    def test_replace_utf8(self):
+        def bzr(*args, **kwargs):
+            kwargs['encoding'] = 'utf-8'
+            return self.run_bzr(*args, **kwargs)[0]
+
+        register_command(cmd_echo_replace)
+        try:
+            self.assertEqual('foo', bzr('echo-replace', 'foo'))
+            self.assertEqual(u'foo\xb5'.encode('utf-8'),
+                             bzr('echo-replace', u'foo\xb5'))
+        finally:
+            plugin_cmds.pop('echo-replace')
+
+    def test_replace_ascii(self):
+        def bzr(*args, **kwargs):
+            kwargs['encoding'] = 'ascii'
+            return self.run_bzr(*args, **kwargs)[0]
+
+        register_command(cmd_echo_replace)
+        try:
+            self.assertEqual('foo', bzr('echo-replace', 'foo'))
+            # ascii can't encode \xb5
+            self.assertEqual('foo?', bzr('echo-replace', u'foo\xb5'))
+        finally:
+            plugin_cmds.pop('echo-replace')
+
+

=== added file 'bzrlib/tests/blackbox/test_non_ascii.py'
--- /dev/null	
+++ bzrlib/tests/blackbox/test_non_ascii.py	
@@ -0,0 +1,487 @@
+# Copyright (C) 2006 by Canonical Ltd
+# -*- coding: utf-8 -*-
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Black-box tests for bzr handling non-ascii characters."""
+
+import sys
+import os
+
+import bzrlib
+from bzrlib.tests import TestCaseInTempDir, TestSkipped
+from bzrlib.trace import mutter, note
+import bzrlib.urlutils as urlutils
+
+
+class TestNonAscii(TestCaseInTempDir):
+    """Test that bzr handles files/committers/etc which are non-ascii."""
+
+    def setUp(self):
+        super(TestNonAscii, self).setUp()
+        self._orig_email = os.environ.get('BZREMAIL', None)
+        self._orig_encoding = bzrlib.user_encoding
+
+        bzrlib.user_encoding = self.encoding
+        email = self.info['committer'] + ' <joe at foo.com>'
+        os.environ['BZREMAIL'] = email.encode(bzrlib.user_encoding)
+        self.create_base()
+
+    def tearDown(self):
+        if self._orig_email is not None:
+            os.environ['BZREMAIL'] = self._orig_email
+        else:
+            if os.environ.get('BZREMAIL', None) is not None:
+                del os.environ['BZREMAIL']
+        bzrlib.user_encoding = self._orig_encoding
+        super(TestNonAscii, self).tearDown()
+
+    def create_base(self):
+        bzr = self.run_bzr
+
+        fs_enc = sys.getfilesystemencoding()
+        fname = self.info['filename']
+        dir_name = self.info['directory']
+        for thing in [fname, dir_name]:
+            try:
+                thing.encode(fs_enc)
+            except UnicodeEncodeError:
+                raise TestSkipped(('Unable to represent path %r'
+                                   ' in filesystem encoding %s')
+                                    % (thing, fs_enc))
+
+        bzr('init')
+        open('a', 'wb').write('foo\n')
+        bzr('add', 'a')
+        bzr('commit', '-m', 'adding a')
+
+        open('b', 'wb').write('non-ascii \xFF\xFF\xFC\xFB\x00 in b\n')
+        bzr('add', 'b')
+        bzr('commit', '-m', self.info['message'])
+
+        open(fname, 'wb').write('unicode filename\n')
+        bzr('add', fname)
+        bzr('commit', '-m', u'And a unicode file\n')
+
+    def test_status(self):
+        bzr = self.run_bzr_decode
+
+        open(self.info['filename'], 'ab').write('added something\n')
+        txt = bzr('status')
+        self.assertEqual(u'modified:\n  %s\n' % (self.info['filename'],), txt)
+
+        txt = bzr('status', encoding='ascii')
+        expected = u'modified:\n  %s\n' % (
+                    self.info['filename'].encode('ascii', 'replace'),)
+        self.assertEqual(expected, txt)
+
+    def test_cat(self):
+        # bzr cat shouldn't change the contents
+        # using run_bzr since that doesn't decode
+        txt = self.run_bzr('cat', 'b')[0]
+        self.assertEqual('non-ascii \xFF\xFF\xFC\xFB\x00 in b\n', txt)
+
+        txt = self.run_bzr('cat', self.info['filename'])[0]
+        self.assertEqual('unicode filename\n', txt)
+
+    def test_cat_revision(self):
+        bzr = self.run_bzr_decode
+
+        committer = self.info['committer']
+        txt = bzr('cat-revision', '-r', '1')
+        self.failUnless(committer in txt,
+                        'failed to find %r in %r' % (committer, txt))
+
+        msg = self.info['message']
+        txt = bzr('cat-revision', '-r', '2')
+        self.failUnless(msg in txt, 'failed to find %r in %r' % (msg, txt))
+
+    def test_mkdir(self):
+        bzr = self.run_bzr_decode
+
+        txt = bzr('mkdir', self.info['directory'])
+        self.assertEqual(u'added %s\n' % self.info['directory'], txt)
+
+        # The text should be garbled, but the command should succeed
+        txt = bzr('mkdir', self.info['directory'] + '2', encoding='ascii')
+        expected = u'added %s2\n' % (self.info['directory'],)
+        expected = expected.encode('ascii', 'replace')
+        self.assertEqual(expected, txt)
+
+    def test_relpath(self):
+        bzr = self.run_bzr_decode
+
+        txt = bzr('relpath', self.info['filename'])
+        self.assertEqual(self.info['filename'] + '\n', txt)
+
+        bzr('relpath', self.info['filename'], encoding='ascii', retcode=3)
+
+    def test_inventory(self):
+        bzr = self.run_bzr_decode
+
+        txt = bzr('inventory')
+        self.assertEqual(['a', 'b', self.info['filename']],
+                         txt.splitlines())
+
+        # inventory should fail if unable to encode
+        bzr('inventory', encoding='ascii', retcode=3)
+
+        # We don't really care about the ids themselves,
+        # but the command shouldn't fail
+        txt = bzr('inventory', '--show-ids')
+
+    def test_revno(self):
+        # There isn't a lot to test here, since revno should always
+        # be an integer
+        bzr = self.run_bzr_decode
+
+        self.assertEqual('3\n', bzr('revno'))
+        self.assertEqual('3\n', bzr('revno', encoding='ascii'))
+
+    def test_revision_info(self):
+        bzr = self.run_bzr_decode
+
+        bzr('revision-info', '-r', '1')
+
+        # TODO: jam 20060105 If we support revisions with non-ascii characters,
+        # this should be strict and fail.
+        bzr('revision-info', '-r', '1', encoding='ascii')
+
+    def test_mv(self):
+        bzr = self.run_bzr_decode
+
+        fname1 = self.info['filename']
+        fname2 = self.info['filename'] + '2'
+        dirname = self.info['directory']
+
+        # fname1 already exists
+        bzr('mv', 'a', fname1, retcode=3)
+
+        txt = bzr('mv', 'a', fname2)
+        self.assertEqual(u'a => %s\n' % fname2, txt)
+        self.failIfExists('a')
+        self.failUnlessExists(fname2)
+
+        bzr('commit', '-m', 'renamed to non-ascii')
+
+        bzr('mkdir', dirname)
+        txt = bzr('mv', fname1, fname2, dirname)
+        self.assertEqual([u'%s => %s/%s' % (fname1, dirname, fname1),
+                          u'%s => %s/%s' % (fname2, dirname, fname2)]
+                         , txt.splitlines())
+
+        # The rename should still succeed
+        newpath = u'%s/%s' % (dirname, fname2)
+        txt = bzr('mv', newpath, 'a', encoding='ascii')
+        self.failUnlessExists('a')
+        self.assertEqual(newpath.encode('ascii', 'replace') + ' => a\n', txt)
+
+    def test_branch(self):
+        # We should be able to branch into a directory that
+        # has a unicode name, even if we can't display the name
+        bzr = self.run_bzr_decode
+        bzr('branch', u'.', self.info['directory'])
+        bzr('branch', u'.', self.info['directory'] + '2', encoding='ascii')
+
+    def test_pull(self):
+        # Make sure we can pull from paths that can't be encoded
+        bzr = self.run_bzr_decode
+
+        dirname1 = self.info['directory']
+        dirname2 = self.info['directory'] + '2'
+        bzr('branch', '.', dirname1)
+        bzr('branch', dirname1, dirname2)
+
+        os.chdir(dirname1)
+        open('a', 'ab').write('more text\n')
+        bzr('commit', '-m', 'mod a')
+
+        pwd = os.getcwdu()
+
+        os.chdir(u'../' + dirname2)
+        txt = bzr('pull')
+
+        self.assertEqual(u'Using saved location: %s/\n' % (pwd,), txt)
+
+        os.chdir('../' + dirname1)
+        open('a', 'ab').write('and yet more\n')
+        bzr('commit', '-m', 'modifying a by ' + self.info['committer'])
+
+        os.chdir('../' + dirname2)
+        # We should be able to pull, even if our encoding is bad
+        bzr('pull', '--verbose', encoding='ascii')
+
+    def test_push(self):
+        # TODO: Test push to an SFTP location
+        # Make sure we can pull from paths that can't be encoded
+        bzr = self.run_bzr_decode
+
+        # TODO: jam 20060427 For drastically improving performance, we probably
+        #       could create a local repository, so it wouldn't have to copy
+        #       the files around as much.
+
+        dirname = self.info['directory']
+        bzr('push', dirname)
+
+        open('a', 'ab').write('adding more text\n')
+        bzr('commit', '-m', 'added some stuff')
+
+        # TODO: check the output text is properly encoded
+        bzr('push')
+
+        f = open('a', 'ab')
+        f.write('and a bit more: ')
+        f.write(dirname.encode('utf-8'))
+        f.write('\n')
+        f.close()
+
+        bzr('commit', '-m', u'Added some ' + dirname)
+        bzr('push', '--verbose', encoding='ascii')
+
+        bzr('push', '--verbose', dirname + '2')
+
+        bzr('push', '--verbose', dirname + '3', encoding='ascii')
+
+        bzr('push', '--verbose', '--create-prefix', dirname + '4/' + dirname + '5')
+        bzr('push', '--verbose', '--create-prefix', dirname + '6/' + dirname + '7', encoding='ascii')
+
+    def test_renames(self):
+        bzr = self.run_bzr_decode
+
+        fname = self.info['filename'] + '2'
+        bzr('mv', 'a', fname)
+        txt = bzr('renames')
+        self.assertEqual(u'a => %s\n' % fname, txt)
+
+        bzr('renames', retcode=3, encoding='ascii')
+
+    def test_remove(self):
+        bzr = self.run_bzr_decode
+
+        fname = self.info['filename']
+        txt = bzr('remove', fname, encoding='ascii')
+
+    def test_remove_verbose(self):
+        bzr = self.run_bzr_decode
+
+        fname = self.info['filename']
+        txt = bzr('remove', '--verbose', fname, encoding='ascii')
+
+    def test_file_id(self):
+        bzr = self.run_bzr_decode
+
+        fname = self.info['filename']
+        txt = bzr('file-id', fname)
+
+        # TODO: jam 20060106 We don't support non-ascii file ids yet, 
+        #       so there is nothing which would fail in ascii encoding
+        #       This *should* be retcode=3
+        txt = bzr('file-id', fname, encoding='ascii')
+
+    def test_file_path(self):
+        bzr = self.run_bzr_decode
+
+        # Create a directory structure
+        fname = self.info['filename']
+        dirname = self.info['directory']
+        bzr('mkdir', 'base')
+        bzr('mkdir', 'base/' + dirname)
+        path = '/'.join(['base', dirname, fname])
+        bzr('mv', fname, path)
+        bzr('commit', '-m', 'moving things around')
+
+        txt = bzr('file-path', path)
+
+        # TODO: jam 20060106 We don't support non-ascii file ids yet, 
+        #       so there is nothing which would fail in ascii encoding
+        #       This *should* be retcode=3
+        txt = bzr('file-path', path, encoding='ascii')
+
+    def test_revision_history(self):
+        bzr = self.run_bzr_decode
+
+        # TODO: jam 20060106 We don't support non-ascii revision ids yet, 
+        #       so there is nothing which would fail in ascii encoding
+        txt = bzr('revision-history')
+
+    def test_ancestry(self):
+        bzr = self.run_bzr_decode
+
+        # TODO: jam 20060106 We don't support non-ascii revision ids yet, 
+        #       so there is nothing which would fail in ascii encoding
+        txt = bzr('ancestry')
+
+    def test_diff(self):
+        # TODO: jam 20060106 diff is a difficult one to test, because it 
+        #       shouldn't encode the file contents, but it needs some sort
+        #       of encoding for the paths, etc which are displayed.
+        open(self.info['filename'], 'ab').write('newline\n')
+        txt = self.run_bzr('diff', retcode=1)[0]
+
+    def test_deleted(self):
+        bzr = self.run_bzr_decode
+
+        fname = self.info['filename']
+        os.remove(fname)
+        bzr('rm', fname)
+
+        txt = bzr('deleted')
+        self.assertEqual(fname+'\n', txt)
+
+        txt = bzr('deleted', '--show-ids')
+        self.failUnless(txt.startswith(fname))
+
+        # Deleted should fail if cannot decode
+        # Because it is giving the exact paths
+        # which might be used by a front end
+        bzr('deleted', encoding='ascii', retcode=3)
+
+    def test_modified(self):
+        bzr = self.run_bzr_decode
+
+        fname = self.info['filename']
+        open(fname, 'ab').write('modified\n')
+
+        txt = bzr('modified')
+        self.assertEqual(fname+'\n', txt)
+
+        bzr('modified', encoding='ascii', retcode=3)
+
+    def test_added(self):
+        bzr = self.run_bzr_decode
+
+        fname = self.info['filename'] + '2'
+        open(fname, 'wb').write('added\n')
+        bzr('add', fname)
+
+        txt = bzr('added')
+        self.assertEqual(fname+'\n', txt)
+
+        bzr('added', encoding='ascii', retcode=3)
+
+    def test_root(self):
+        bzr = self.run_bzr_decode
+
+        dirname = self.info['directory']
+        bzr('root')
+
+        bzr('branch', u'.', dirname)
+
+        os.chdir(dirname)
+
+        txt = bzr('root')
+        self.failUnless(txt.endswith(dirname+'\n'))
+
+        txt = bzr('root', encoding='ascii', retcode=3)
+
+    def test_log(self):
+        bzr = self.run_bzr_decode
+
+        fname = self.info['filename']
+
+        txt = bzr('log')
+        self.assertNotEqual(-1, txt.find(self.info['committer']))
+        self.assertNotEqual(-1, txt.find(self.info['message']))
+
+        txt = bzr('log', '--verbose')
+        self.assertNotEqual(-1, txt.find(fname))
+
+        # Make sure log doesn't fail even if we can't write out
+        txt = bzr('log', '--verbose', encoding='ascii')
+        self.assertEqual(-1, txt.find(fname))
+        self.assertNotEqual(-1, txt.find(fname.encode('ascii', 'replace')))
+
+    def test_touching_revisions(self):
+        bzr = self.run_bzr_decode
+
+        fname = self.info['filename']
+        txt = bzr('touching-revisions', fname)
+        self.assertEqual(u'     3 added %s\n' % (fname,), txt)
+
+        fname2 = self.info['filename'] + '2'
+        bzr('mv', fname, fname2)
+        bzr('commit', '-m', u'Renamed %s => %s' % (fname, fname2))
+
+        txt = bzr('touching-revisions', fname2)
+        expected_txt = (u'     3 added %s\n' 
+                        u'     4 renamed %s => %s\n'
+                        % (fname, fname, fname2))
+        self.assertEqual(expected_txt, txt)
+
+        bzr('touching-revisions', fname2, encoding='ascii', retcode=3)
+
+    def test_ls(self):
+        bzr = self.run_bzr_decode
+
+        txt = bzr('ls')
+        self.assertEqual(['a', 'b', self.info['filename']],
+                         txt.splitlines())
+        txt = bzr('ls', '--null')
+        self.assertEqual(['a', 'b', self.info['filename'], ''],
+                         txt.split('\0'))
+
+        txt = bzr('ls', encoding='ascii', retcode=3)
+        txt = bzr('ls', '--null', encoding='ascii', retcode=3)
+
+    def test_unknowns(self):
+        bzr = self.run_bzr_decode
+
+        fname = self.info['filename'] + '2'
+        open(fname, 'wb').write('unknown\n')
+
+        # TODO: jam 20060112 bzr unknowns is the only one which 
+        #       quotes paths do we really want it to?
+        txt = bzr('unknowns')
+        self.assertEqual(u'"%s"\n' % (fname,), txt)
+
+        bzr('unknowns', encoding='ascii', retcode=3)
+
+    def test_ignore(self):
+        bzr = self.run_bzr_decode
+
+        fname2 = self.info['filename'] + '2.txt'
+        open(fname2, 'wb').write('ignored\n')
+
+        txt = bzr('unknowns')
+        self.assertEqual(u'"%s"\n' % (fname2,), txt)
+
+        bzr('ignore', './' + fname2)
+        txt = bzr('unknowns')
+        self.assertEqual(u'', txt)
+
+        fname3 = self.info['filename'] + '3.txt'
+        open(fname3, 'wb').write('unknown 3\n')
+        txt = bzr('unknowns')
+        self.assertEqual(u'"%s"\n' % (fname3,), txt)
+
+        # Ignore should not care what the encoding is
+        # (right now it doesn't print anything)
+        bzr('ignore', fname3, encoding='ascii')
+        txt = bzr('unknowns')
+        self.assertEqual('', txt)
+
+        # Now try a wildcard match
+        fname4 = self.info['filename'] + '4.txt'
+        open(fname4, 'wb').write('unknown 4\n')
+        bzr('ignore', '*.txt')
+        txt = bzr('unknowns')
+        self.assertEqual('', txt)
+
+        os.remove('.bzrignore')
+        bzr('ignore', self.info['filename'] + '*')
+        txt = bzr('unknowns')
+        self.assertEqual('', txt)
+
+

=== added file 'bzrlib/tests/test_urlutils.py'
--- /dev/null	
+++ bzrlib/tests/test_urlutils.py	
@@ -0,0 +1,434 @@
+# Copyright (C) 2005 by Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Tests for the urlutils wrapper."""
+
+import os
+import sys
+
+import bzrlib
+from bzrlib.errors import InvalidURL, InvalidURLJoin
+import bzrlib.urlutils as urlutils
+from bzrlib.tests import TestCaseInTempDir, TestCase, TestSkipped
+
+
+class TestUrlToPath(TestCase):
+    
+    def test_basename(self):
+        # bzrlib.urlutils.basename
+        # Test bzrlib.urlutils.split()
+        basename = urlutils.basename
+        if sys.platform == 'win32':
+            self.assertRaises(InvalidURL, basename, 'file:///path/to/foo')
+            self.assertEqual('foo', basename('file:///C|/foo'))
+            self.assertEqual('foo', basename('file:///C:/foo'))
+            self.assertEqual('', basename('file:///C:/'))
+        else:
+            self.assertEqual('foo', basename('file:///foo'))
+            self.assertEqual('', basename('file:///'))
+
+        self.assertEqual('foo', basename('http://host/path/to/foo'))
+        self.assertEqual('foo', basename('http://host/path/to/foo/'))
+        self.assertEqual('',
+            basename('http://host/path/to/foo/', exclude_trailing_slash=False))
+        self.assertEqual('path', basename('http://host/path'))
+        self.assertEqual('', basename('http://host/'))
+        self.assertEqual('', basename('http://host'))
+        self.assertEqual('path', basename('http:///nohost/path'))
+
+        self.assertEqual('path', basename('random+scheme://user:pass@ahost:port/path'))
+        self.assertEqual('path', basename('random+scheme://user:pass@ahost:port/path/'))
+        self.assertEqual('', basename('random+scheme://user:pass@ahost:port/'))
+
+        # relative paths
+        self.assertEqual('foo', basename('path/to/foo'))
+        self.assertEqual('foo', basename('path/to/foo/'))
+        self.assertEqual('', basename('path/to/foo/',
+            exclude_trailing_slash=False))
+        self.assertEqual('foo', basename('path/../foo'))
+        self.assertEqual('foo', basename('../path/foo'))
+
+    def test_normalize_url_files(self):
+        # Test that local paths are properly normalized
+        normalize_url = urlutils.normalize_url
+
+        def norm_file(expected, path):
+            url = normalize_url(path)
+            self.assertStartsWith(url, 'file:///')
+            if sys.platform == 'win32':
+                url = url[len('file:///C:'):]
+            else:
+                url = url[len('file://'):]
+
+            self.assertEndsWith(url, expected)
+
+        norm_file('path/to/foo', 'path/to/foo')
+        norm_file('/path/to/foo', '/path/to/foo')
+        norm_file('path/to/foo', '../path/to/foo')
+
+        # Local paths are assumed to *not* be escaped at all
+        try:
+            u'uni/\xb5'.encode(bzrlib.user_encoding)
+        except UnicodeError:
+            # locale cannot handle unicode 
+            pass
+        else:
+            norm_file('uni/%C2%B5', u'uni/\xb5')
+
+        norm_file('uni/%25C2%25B5', u'uni/%C2%B5')
+        norm_file('uni/%20b', u'uni/ b')
+        # All the crazy characters get escaped in local paths => file:/// urls
+        norm_file('%27%3B/%3F%3A%40%26%3D%2B%24%2C%23%20', "';/?:@&=+$,# ")
+
+    def test_normalize_url_hybrid(self):
+        # Anything with a scheme:// should be treated as a hybrid url
+        # which changes what characters get escaped.
+        normalize_url = urlutils.normalize_url
+
+        eq = self.assertEqual
+        eq('file:///foo/', normalize_url(u'file:///foo/'))
+        eq('file:///foo/%20', normalize_url(u'file:///foo/ '))
+        eq('file:///foo/%20', normalize_url(u'file:///foo/%20'))
+        # Don't escape reserved characters
+        eq('file:///ab_c.d-e/%f:?g&h=i+j;k,L#M$',
+            normalize_url('file:///ab_c.d-e/%f:?g&h=i+j;k,L#M$'))
+        eq('http://ab_c.d-e/%f:?g&h=i+j;k,L#M$',
+            normalize_url('http://ab_c.d-e/%f:?g&h=i+j;k,L#M$'))
+
+        # Escape unicode characters, but not already escaped chars
+        eq('http://host/ab/%C2%B5/%C2%B5',
+            normalize_url(u'http://host/ab/%C2%B5/\xb5'))
+
+        # Normalize verifies URLs when they are not unicode
+        # (indicating they did not come from the user)
+        self.assertRaises(InvalidURL, normalize_url, 'http://host/\xb5')
+        self.assertRaises(InvalidURL, normalize_url, 'http://host/ ')
+
+    def test_url_scheme_re(self):
+        # Test paths that may be URLs
+        def test_one(url, scheme_and_path):
+            """Assert that _url_scheme_re correctly matches
+
+            :param scheme_and_path: The (scheme, path) that should be matched
+                can be None, to indicate it should not match
+            """
+            m = urlutils._url_scheme_re.match(url)
+            if scheme_and_path is None:
+                self.assertEqual(None, m)
+            else:
+                self.assertEqual(scheme_and_path[0], m.group('scheme'))
+                self.assertEqual(scheme_and_path[1], m.group('path'))
+
+        # Local paths
+        test_one('/path', None)
+        test_one('C:/path', None)
+        test_one('../path/to/foo', None)
+        test_one(u'../path/to/fo\xe5', None)
+
+        # Real URLS
+        test_one('http://host/path/', ('http', 'host/path/'))
+        test_one('sftp://host/path/to/foo', ('sftp', 'host/path/to/foo'))
+        test_one('file:///usr/bin', ('file', '/usr/bin'))
+        test_one('file:///C:/Windows', ('file', '/C:/Windows'))
+        test_one('file:///C|/Windows', ('file', '/C|/Windows'))
+        test_one(u'readonly+sftp://host/path/\xe5', ('readonly+sftp', u'host/path/\xe5'))
+
+        # Weird stuff
+        # Can't have slashes or colons in the scheme
+        test_one('/path/to/://foo', None)
+        test_one('path:path://foo', None)
+        # Must have more than one character for scheme
+        test_one('C://foo', None)
+        test_one('ab://foo', ('ab', 'foo'))
+
+    def test_dirname(self):
+        # Test bzrlib.urlutils.dirname()
+        dirname = urlutils.dirname
+        if sys.platform == 'win32':
+            self.assertRaises(InvalidURL, dirname, 'file:///path/to/foo')
+            self.assertEqual('file:///C|/', dirname('file:///C|/foo'))
+            self.assertEqual('file:///C|/', dirname('file:///C|/'))
+        else:
+            self.assertEqual('file:///', dirname('file:///foo'))
+            self.assertEqual('file:///', dirname('file:///'))
+
+        self.assertEqual('http://host/path/to', dirname('http://host/path/to/foo'))
+        self.assertEqual('http://host/path/to', dirname('http://host/path/to/foo/'))
+        self.assertEqual('http://host/path/to/foo',
+            dirname('http://host/path/to/foo/', exclude_trailing_slash=False))
+        self.assertEqual('http://host/', dirname('http://host/path'))
+        self.assertEqual('http://host/', dirname('http://host/'))
+        self.assertEqual('http://host', dirname('http://host'))
+        self.assertEqual('http:///nohost', dirname('http:///nohost/path'))
+
+        self.assertEqual('random+scheme://user:pass@ahost:port/',
+            dirname('random+scheme://user:pass@ahost:port/path'))
+        self.assertEqual('random+scheme://user:pass@ahost:port/',
+            dirname('random+scheme://user:pass@ahost:port/path/'))
+        self.assertEqual('random+scheme://user:pass@ahost:port/',
+            dirname('random+scheme://user:pass@ahost:port/'))
+
+        # relative paths
+        self.assertEqual('path/to', dirname('path/to/foo'))
+        self.assertEqual('path/to', dirname('path/to/foo/'))
+        self.assertEqual('path/to/foo',
+            dirname('path/to/foo/', exclude_trailing_slash=False))
+        self.assertEqual('path/..', dirname('path/../foo'))
+        self.assertEqual('../path', dirname('../path/foo'))
+
+    def test_join(self):
+        def test(expected, *args):
+            joined = urlutils.join(*args)
+            self.assertEqual(expected, joined)
+
+        # Test a single element
+        test('foo', 'foo')
+
+        # Test relative path joining
+        test('foo/bar', 'foo', 'bar')
+        test('http://foo/bar', 'http://foo', 'bar')
+        test('http://foo/bar', 'http://foo', '.', 'bar')
+        test('http://foo/baz', 'http://foo', 'bar', '../baz')
+        test('http://foo/bar/baz', 'http://foo', 'bar/baz')
+        test('http://foo/baz', 'http://foo', 'bar/../baz')
+
+        # Absolute paths
+        test('http://bar', 'http://foo', 'http://bar')
+        test('sftp://bzr/foo', 'http://foo', 'bar', 'sftp://bzr/foo')
+        test('file:///bar', 'foo', 'file:///bar')
+        
+        # Invalid joinings
+        # Cannot go above root
+        self.assertRaises(InvalidURLJoin, urlutils.join,
+                'http://foo', '../baz')
+
+    def test_function_type(self):
+        if sys.platform == 'win32':
+            self.assertEqual(urlutils._win32_local_path_to_url, urlutils.local_path_to_url)
+            self.assertEqual(urlutils._win32_local_path_from_url, urlutils.local_path_from_url)
+        else:
+            self.assertEqual(urlutils._posix_local_path_to_url, urlutils.local_path_to_url)
+            self.assertEqual(urlutils._posix_local_path_from_url, urlutils.local_path_from_url)
+
+    def test_posix_local_path_to_url(self):
+        to_url = urlutils._posix_local_path_to_url
+        self.assertEqual('file:///path/to/foo',
+            to_url('/path/to/foo'))
+
+        try:
+            result = to_url(u'/path/to/r\xe4ksm\xf6rg\xe5s')
+        except UnicodeError:
+            raise TestSkipped("local encoding cannot handle unicode")
+
+        self.assertEqual('file:///path/to/r%C3%A4ksm%C3%B6rg%C3%A5s', result)
+
+    def test_posix_local_path_from_url(self):
+        from_url = urlutils._posix_local_path_from_url
+        self.assertEqual('/path/to/foo',
+            from_url('file:///path/to/foo'))
+        self.assertEqual(u'/path/to/r\xe4ksm\xf6rg\xe5s',
+            from_url('file:///path/to/r%C3%A4ksm%C3%B6rg%C3%A5s'))
+        self.assertEqual(u'/path/to/r\xe4ksm\xf6rg\xe5s',
+            from_url('file:///path/to/r%c3%a4ksm%c3%b6rg%c3%a5s'))
+
+        self.assertRaises(InvalidURL, from_url, '/path/to/foo')
+
+    def test_win32_local_path_to_url(self):
+        to_url = urlutils._win32_local_path_to_url
+        self.assertEqual('file:///C:/path/to/foo',
+            to_url('C:/path/to/foo'))
+
+        try:
+            result = to_url(u'd:/path/to/r\xe4ksm\xf6rg\xe5s')
+        except UnicodeError:
+            raise TestSkipped("local encoding cannot handle unicode")
+
+        self.assertEqual('file:///D:/path/to/r%C3%A4ksm%C3%B6rg%C3%A5s', result)
+
+    def test_win32_local_path_from_url(self):
+        from_url = urlutils._win32_local_path_from_url
+        self.assertEqual('C:/path/to/foo',
+            from_url('file:///C|/path/to/foo'))
+        self.assertEqual(u'D:/path/to/r\xe4ksm\xf6rg\xe5s',
+            from_url('file:///d|/path/to/r%C3%A4ksm%C3%B6rg%C3%A5s'))
+        self.assertEqual(u'D:/path/to/r\xe4ksm\xf6rg\xe5s',
+            from_url('file:///d:/path/to/r%c3%a4ksm%c3%b6rg%c3%a5s'))
+
+        self.assertRaises(InvalidURL, from_url, '/path/to/foo')
+        # Not a valid _win32 url, no drive letter
+        self.assertRaises(InvalidURL, from_url, 'file:///path/to/foo')
+
+    def test_split(self):
+        # Test bzrlib.urlutils.split()
+        split = urlutils.split
+        if sys.platform == 'win32':
+            self.assertRaises(InvalidURL, split, 'file:///path/to/foo')
+            self.assertEqual(('file:///C|/', 'foo'), split('file:///C|/foo'))
+            self.assertEqual(('file:///C:/', ''), split('file:///C:/'))
+        else:
+            self.assertEqual(('file:///', 'foo'), split('file:///foo'))
+            self.assertEqual(('file:///', ''), split('file:///'))
+
+        self.assertEqual(('http://host/path/to', 'foo'), split('http://host/path/to/foo'))
+        self.assertEqual(('http://host/path/to', 'foo'), split('http://host/path/to/foo/'))
+        self.assertEqual(('http://host/path/to/foo', ''),
+            split('http://host/path/to/foo/', exclude_trailing_slash=False))
+        self.assertEqual(('http://host/', 'path'), split('http://host/path'))
+        self.assertEqual(('http://host/', ''), split('http://host/'))
+        self.assertEqual(('http://host', ''), split('http://host'))
+        self.assertEqual(('http:///nohost', 'path'), split('http:///nohost/path'))
+
+        self.assertEqual(('random+scheme://user:pass@ahost:port/', 'path'),
+            split('random+scheme://user:pass@ahost:port/path'))
+        self.assertEqual(('random+scheme://user:pass@ahost:port/', 'path'),
+            split('random+scheme://user:pass@ahost:port/path/'))
+        self.assertEqual(('random+scheme://user:pass@ahost:port/', ''),
+            split('random+scheme://user:pass@ahost:port/'))
+
+        # relative paths
+        self.assertEqual(('path/to', 'foo'), split('path/to/foo'))
+        self.assertEqual(('path/to', 'foo'), split('path/to/foo/'))
+        self.assertEqual(('path/to/foo', ''),
+            split('path/to/foo/', exclude_trailing_slash=False))
+        self.assertEqual(('path/..', 'foo'), split('path/../foo'))
+        self.assertEqual(('../path', 'foo'), split('../path/foo'))
+
+    def test_strip_trailing_slash(self):
+        sts = urlutils.strip_trailing_slash
+        if sys.platform == 'win32':
+            self.assertEqual('file:///C|/', sts('file:///C|/'))
+            self.assertEqual('file:///C:/foo', sts('file:///C:/foo'))
+            self.assertEqual('file:///C|/foo', sts('file:///C|/foo/'))
+        else:
+            self.assertEqual('file:///', sts('file:///'))
+            self.assertEqual('file:///foo', sts('file:///foo'))
+            self.assertEqual('file:///foo', sts('file:///foo/'))
+
+        self.assertEqual('http://host/', sts('http://host/'))
+        self.assertEqual('http://host/foo', sts('http://host/foo'))
+        self.assertEqual('http://host/foo', sts('http://host/foo/'))
+
+        # No need to fail just because the slash is missing
+        self.assertEqual('http://host', sts('http://host'))
+        # TODO: jam 20060502 Should this raise InvalidURL?
+        self.assertEqual('file://', sts('file://'))
+
+        self.assertEqual('random+scheme://user:pass@ahost:port/path',
+            sts('random+scheme://user:pass@ahost:port/path'))
+        self.assertEqual('random+scheme://user:pass@ahost:port/path',
+            sts('random+scheme://user:pass@ahost:port/path/'))
+        self.assertEqual('random+scheme://user:pass@ahost:port/',
+            sts('random+scheme://user:pass@ahost:port/'))
+
+        # Make sure relative paths work too
+        self.assertEqual('path/to/foo', sts('path/to/foo'))
+        self.assertEqual('path/to/foo', sts('path/to/foo/'))
+        self.assertEqual('../to/foo', sts('../to/foo/'))
+        self.assertEqual('path/../foo', sts('path/../foo/'))
+
+    def test_unescape_for_display_utf8(self):
+        # Test that URLs are converted to nice unicode strings for display
+        def test(expected, url, encoding='utf-8'):
+            disp_url = urlutils.unescape_for_display(url, encoding=encoding)
+            self.assertIsInstance(disp_url, unicode)
+            self.assertEqual(expected, disp_url)
+
+        test('http://foo', 'http://foo')
+        if sys.platform == 'win32':
+            test('C:/foo/path', 'file:///C|/foo/path')
+            test('C:/foo/path', 'file:///C:/foo/path')
+        else:
+            test('/foo/path', 'file:///foo/path')
+
+        test('http://foo/%2Fbaz', 'http://foo/%2Fbaz')
+        test(u'http://host/r\xe4ksm\xf6rg\xe5s',
+             'http://host/r%C3%A4ksm%C3%B6rg%C3%A5s')
+
+        # Make sure special escaped characters stay escaped
+        test(u'http://host/%3B%2F%3F%3A%40%26%3D%2B%24%2C%23',
+             'http://host/%3B%2F%3F%3A%40%26%3D%2B%24%2C%23')
+
+        # Can we handle sections that don't have utf-8 encoding?
+        test(u'http://host/%EE%EE%EE/r\xe4ksm\xf6rg\xe5s',
+             'http://host/%EE%EE%EE/r%C3%A4ksm%C3%B6rg%C3%A5s')
+
+        # Test encoding into output that can handle some characters
+        test(u'http://host/%EE%EE%EE/r\xe4ksm\xf6rg\xe5s',
+             'http://host/%EE%EE%EE/r%C3%A4ksm%C3%B6rg%C3%A5s',
+             encoding='iso-8859-1')
+
+        # This one can be encoded into utf8
+        test(u'http://host/\u062c\u0648\u062c\u0648',
+             'http://host/%d8%ac%d9%88%d8%ac%d9%88',
+             encoding='utf-8')
+
+        # This can't be put into 8859-1 and so stays as escapes
+        test(u'http://host/%d8%ac%d9%88%d8%ac%d9%88',
+             'http://host/%d8%ac%d9%88%d8%ac%d9%88',
+             encoding='iso-8859-1')
+
+    def test_escape(self):
+        self.assertEqual('%25', urlutils.escape('%'))
+        self.assertEqual('%C3%A5', urlutils.escape(u'\xe5'))
+
+    def test_unescape(self):
+        self.assertEqual('%', urlutils.unescape('%25'))
+        self.assertEqual(u'\xe5', urlutils.unescape('%C3%A5'))
+
+        self.assertRaises(InvalidURL, urlutils.unescape, u'\xe5')
+        self.assertRaises(InvalidURL, urlutils.unescape, '\xe5')
+        self.assertRaises(InvalidURL, urlutils.unescape, '%E5')
+
+    def test_escape_unescape(self):
+        self.assertEqual(u'\xe5', urlutils.unescape(urlutils.escape(u'\xe5')))
+        self.assertEqual('%', urlutils.unescape(urlutils.escape('%')))
+
+    def test_relative_url(self):
+        def test(expected, base, other):
+            result = urlutils.relative_url(base, other)
+            self.assertEqual(expected, result)
+            
+        test('a', 'http://host/', 'http://host/a')
+        test('http://entirely/different', 'sftp://host/branch',
+                    'http://entirely/different')
+        test('../person/feature', 'http://host/branch/mainline',
+                    'http://host/branch/person/feature')
+        test('..', 'http://host/branch', 'http://host/')
+        test('http://host2/branch', 'http://host1/branch', 'http://host2/branch')
+        test('.', 'http://host1/branch', 'http://host1/branch')
+        test('../../../branch/2b', 'file:///home/jelmer/foo/bar/2b',
+                    'file:///home/jelmer/branch/2b')
+        test('../../branch/2b', 'sftp://host/home/jelmer/bar/2b',
+                    'sftp://host/home/jelmer/branch/2b')
+        test('../../branch/feature/%2b', 'http://host/home/jelmer/bar/%2b',
+                    'http://host/home/jelmer/branch/feature/%2b')
+        test('../../branch/feature/2b', 'http://host/home/jelmer/bar/2b/', 
+                    'http://host/home/jelmer/branch/feature/2b')
+        # relative_url should preserve a trailing slash
+        test('../../branch/feature/2b/', 'http://host/home/jelmer/bar/2b/',
+                    'http://host/home/jelmer/branch/feature/2b/')
+        test('../../branch/feature/2b/', 'http://host/home/jelmer/bar/2b',
+                    'http://host/home/jelmer/branch/feature/2b/')
+
+        # TODO: treat http://host as http://host/
+        #       relative_url is typically called from a branch.base or
+        #       transport.base which always ends with a /
+        #test('a', 'http://host', 'http://host/a')
+        test('http://host/a', 'http://host', 'http://host/a')
+        #test('.', 'http://host', 'http://host/')
+        test('http://host/', 'http://host', 'http://host/')
+        #test('.', 'http://host/', 'http://host')
+        test('http://host', 'http://host/', 'http://host')

=== added file 'bzrlib/urlutils.py'
--- /dev/null	
+++ bzrlib/urlutils.py	
@@ -0,0 +1,476 @@
+# Bazaar-NG -- distributed version control
+#
+# Copyright (C) 2006 by Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""A collection of function for handling URL operations."""
+
+import os
+from posixpath import split as _posix_split
+import re
+import sys
+import urllib
+
+import bzrlib.errors as errors
+import bzrlib.osutils
+
+
+def basename(url, exclude_trailing_slash=True):
+    """Return the last component of a URL.
+
+    :param url: The URL in question
+    :param exclude_trailing_slash: If the url looks like "path/to/foo/"
+        ignore the final slash and return 'foo' rather than ''
+    :return: Just the final component of the URL. This can return ''
+        if you don't exclude_trailing_slash, or if you are at the
+        root of the URL.
+    """
+    return split(url, exclude_trailing_slash=exclude_trailing_slash)[1]
+
+
+def dirname(url, exclude_trailing_slash=True):
+    """Return the parent directory of the given path.
+
+    :param url: Relative or absolute URL
+    :param exclude_trailing_slash: Remove a final slash
+        (treat http://host/foo/ as http://host/foo, but
+        http://host/ stays http://host/)
+    :return: Everything in the URL except the last path chunk
+    """
+    # TODO: jam 20060502 This was named dirname to be consistent
+    #       with the os functions, but maybe "parent" would be better
+    return split(url, exclude_trailing_slash=exclude_trailing_slash)[0]
+
+
+def escape(relpath):
+    """Escape relpath to be a valid url."""
+    if isinstance(relpath, unicode):
+        relpath = relpath.encode('utf-8')
+    # After quoting and encoding, the path should be perfectly
+    # safe as a plain ASCII string, str() just enforces this
+    return str(urllib.quote(relpath))
+
+
+def file_relpath(base, path):
+    """Compute just the relative sub-portion of a url
+    
+    This assumes that both paths are already fully specified file:// URLs.
+    """
+    assert len(base) >= MIN_ABS_FILEURL_LENGTH, ('Length of base must be equal or'
+        ' exceed the platform minimum url length (which is %d)' % 
+        MIN_ABS_FILEURL_LENGTH)
+
+    base = local_path_from_url(base)
+    path = local_path_from_url(path)
+    return escape(bzrlib.osutils.relpath(base, path))
+
+
+def _find_scheme_and_separator(url):
+    """Find the scheme separator (://) and the first path separator
+
+    This is just a helper functions for other path utilities.
+    It could probably be replaced by urlparse
+    """
+    m = _url_scheme_re.match(url)
+    if not m:
+        return None, None
+
+    scheme = m.group('scheme')
+    path = m.group('path')
+
+    # Find the path separating slash
+    # (first slash after the ://)
+    first_path_slash = path.find('/')
+    if first_path_slash == -1:
+        return len(scheme), None
+    return len(scheme), first_path_slash+len(scheme)+3
+
+
+def join(base, *args):
+    """Create a URL by joining sections.
+
+    This will normalize '..', assuming that paths are absolute
+    (it assumes no symlinks in either path)
+
+    If any of *args is an absolute URL, it will be treated correctly.
+    Example:
+        join('http://foo', 'http://bar') => 'http://bar'
+        join('http://foo', 'bar') => 'http://foo/bar'
+        join('http://foo', 'bar', '../baz') => 'http://foo/baz'
+    """
+    m = _url_scheme_re.match(base)
+    scheme = None
+    if m:
+        scheme = m.group('scheme')
+        path = m.group('path').split('/')
+    else:
+        path = base.split('/')
+
+    for arg in args:
+        m = _url_scheme_re.match(arg)
+        if m:
+            # Absolute URL
+            scheme = m.group('scheme')
+            path = m.group('path').split('/')
+        else:
+            for chunk in arg.split('/'):
+                if chunk == '.':
+                    continue
+                elif chunk == '..':
+                    if len(path) >= 2:
+                        # Don't pop off the host portion
+                        path.pop()
+                    else:
+                        raise errors.InvalidURLJoin('Cannot go above root',
+                                base, args)
+                else:
+                    path.append(chunk)
+
+    if scheme is None:
+        return '/'.join(path)
+    return scheme + '://' + '/'.join(path)
+
+
+# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
+def _posix_local_path_from_url(url):
+    """Convert a url like file:///path/to/foo into /path/to/foo"""
+    if not url.startswith('file:///'):
+        raise errors.InvalidURL(url, 'local urls must start with file:///')
+    # We only strip off 2 slashes
+    return unescape(url[len('file://'):])
+
+
+def _posix_local_path_to_url(path):
+    """Convert a local path like ./foo into a URL like file:///path/to/foo
+
+    This also handles transforming escaping unicode characters, etc.
+    """
+    # importing directly from posixpath allows us to test this 
+    # on non-posix platforms
+    from posixpath import normpath
+    return 'file://' + escape(normpath(bzrlib.osutils._posix_abspath(path)))
+
+
+def _win32_local_path_from_url(url):
+    """Convert a url like file:///C|/path/to/foo into C:/path/to/foo"""
+    if not url.startswith('file:///'):
+        raise errors.InvalidURL(url, 'local urls must start with file:///')
+    # We strip off all 3 slashes
+    win32_url = url[len('file:///'):]
+    if (win32_url[0] not in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+        or win32_url[1] not in  '|:'
+        or win32_url[2] != '/'):
+        raise errors.InvalidURL(url, 'Win32 file urls start with file:///X|/, where X is a valid drive letter')
+    # TODO: jam 20060426, we could .upper() or .lower() the drive letter
+    #       for better consistency.
+    return win32_url[0].upper() + u':' + unescape(win32_url[2:])
+
+
+def _win32_local_path_to_url(path):
+    """Convert a local path like ./foo into a URL like file:///C|/path/to/foo
+
+    This also handles transforming escaping unicode characters, etc.
+    """
+    # importing directly from ntpath allows us to test this 
+    # on non-win32 platforms
+    win32_path = bzrlib.osutils._nt_normpath(
+        bzrlib.osutils._win32_abspath(path)).replace('\\', '/')
+    return 'file:///' + win32_path[0].upper() + ':' + escape(win32_path[2:])
+
+
+local_path_to_url = _posix_local_path_to_url
+local_path_from_url = _posix_local_path_from_url
+MIN_ABS_FILEURL_LENGTH = len('file:///')
+
+if sys.platform == 'win32':
+    local_path_to_url = _win32_local_path_to_url
+    local_path_from_url = _win32_local_path_from_url
+
+    MIN_ABS_FILEURL_LENGTH = len('file:///C|/')
+
+
+_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,})://(?P<path>.*)$')
+
+
+def normalize_url(url):
+    """Make sure that a path string is in fully normalized URL form.
+    
+    This handles URLs which have unicode characters, spaces, 
+    special characters, etc.
+
+    It has two basic modes of operation, depending on whether the
+    supplied string starts with a url specifier (scheme://) or not.
+    If it does not have a specifier it is considered a local path,
+    and will be converted into a file:/// url. Non-ascii characters
+    will be encoded using utf-8.
+    If it does have a url specifier, it will be treated as a "hybrid"
+    URL. Basically, a URL that should have URL special characters already
+    escaped (like +?&# etc), but may have unicode characters, etc
+    which would not be valid in a real URL.
+
+    :param url: Either a hybrid URL or a local path
+    :return: A normalized URL which only includes 7-bit ASCII characters.
+    """
+    m = _url_scheme_re.match(url)
+    if not m:
+        return local_path_to_url(url)
+    if not isinstance(url, unicode):
+        for c in url:
+            if c not in _url_safe_characters:
+                raise errors.InvalidURL(url, 'URLs can only contain specific'
+                                            ' safe characters (not %r)' % c)
+        return url
+    # We have a unicode (hybrid) url
+    scheme = m.group('scheme')
+    path = list(m.group('path'))
+
+    for i in xrange(len(path)):
+        if path[i] not in _url_safe_characters:
+            chars = path[i].encode('utf-8')
+            path[i] = ''.join(['%%%02X' % ord(c) for c in path[i].encode('utf-8')])
+    return scheme + '://' + ''.join(path)
+
+
+def relative_url(base, other):
+    """Return a path to other from base.
+
+    If other is unrelated to base, return other. Else return a relative path.
+    This assumes no symlinks as part of the url.
+    """
+    dummy, base_first_slash = _find_scheme_and_separator(base)
+    if base_first_slash is None:
+        return other
+    
+    dummy, other_first_slash = _find_scheme_and_separator(other)
+    if other_first_slash is None:
+        return other
+
+    # this takes care of differing schemes or hosts
+    base_scheme = base[:base_first_slash]
+    other_scheme = other[:other_first_slash]
+    if base_scheme != other_scheme:
+        return other
+
+    base_path = base[base_first_slash+1:]
+    other_path = other[other_first_slash+1:]
+
+    if base_path.endswith('/'):
+        base_path = base_path[:-1]
+
+    base_sections = base_path.split('/')
+    other_sections = other_path.split('/')
+
+    if base_sections == ['']:
+        base_sections = []
+    if other_sections == ['']:
+        other_sections = []
+
+    output_sections = []
+    for b, o in zip(base_sections, other_sections):
+        if b != o:
+            break
+        output_sections.append(b)
+
+    match_len = len(output_sections)
+    output_sections = ['..' for x in base_sections[match_len:]]
+    output_sections.extend(other_sections[match_len:])
+
+    return "/".join(output_sections) or "."
+
+
+def split(url, exclude_trailing_slash=True):
+    """Split a URL into its parent directory and a child directory.
+
+    :param url: A relative or absolute URL
+    :param exclude_trailing_slash: Strip off a final '/' if it is part
+        of the path (but not if it is part of the protocol specification)
+
+    :return: (parent_url, child_dir).  child_dir may be the empty string if we're at 
+        the root.
+    """
+    scheme_loc, first_path_slash = _find_scheme_and_separator(url)
+
+    if first_path_slash is None:
+        # We have either a relative path, or no separating slash
+        if scheme_loc is None:
+            # Relative path
+            if exclude_trailing_slash and url.endswith('/'):
+                url = url[:-1]
+            return _posix_split(url)
+        else:
+            # Scheme with no path
+            return url, ''
+
+    # We have a fully defined path
+    url_base = url[:first_path_slash] # http://host, file://
+    path = url[first_path_slash:] # /file/foo
+
+    if sys.platform == 'win32' and url.startswith('file:///'):
+        # Strip off the drive letter
+        if path[2:3] not in '\\/':
+            raise errors.InvalidURL(url, 
+                'win32 file:/// paths need a drive letter')
+        url_base += path[1:4] # file:///C|/
+        path = path[3:]
+
+    if exclude_trailing_slash and len(path) > 1 and path.endswith('/'):
+        path = path[:-1]
+    head, tail = _posix_split(path)
+    return url_base + head, tail
+
+
+def strip_trailing_slash(url):
+    """Strip trailing slash, except for root paths.
+
+    The definition of 'root path' is platform-dependent.
+    This assumes that all URLs are valid netloc urls, such that they
+    form:
+    scheme://host/path
+    It searches for ://, and then refuses to remove the next '/'.
+    It can also handle relative paths
+    Examples:
+        path/to/foo       => path/to/foo
+        path/to/foo/      => path/to/foo
+        http://host/path/ => http://host/path
+        http://host/path  => http://host/path
+        http://host/      => http://host/
+        file:///          => file:///
+        file:///foo/      => file:///foo
+        # This is unique on win32 platforms, and is the only URL
+        # format which does it differently.
+        file:///C|/       => file:///C|/
+    """
+    if not url.endswith('/'):
+        # Nothing to do
+        return url
+    if sys.platform == 'win32' and url.startswith('file:///'):
+        # This gets handled specially, because the 'top-level'
+        # of a win32 path is actually the drive letter
+        if len(url) > MIN_ABS_FILEURL_LENGTH:
+            return url[:-1]
+        else:
+            return url
+
+    scheme_loc, first_path_slash = _find_scheme_and_separator(url)
+    if scheme_loc is None:
+        # This is a relative path, as it has no scheme
+        # so just chop off the last character
+        return url[:-1]
+
+    if first_path_slash is None or first_path_slash == len(url)-1:
+        # Don't chop off anything if the only slash is the path
+        # separating slash
+        return url
+
+    return url[:-1]
+
+
+def unescape(url):
+    """Unescape relpath from url format.
+
+    This returns a Unicode path from a URL
+    """
+    # jam 20060427 URLs are supposed to be ASCII only strings
+    #       If they are passed in as unicode, urllib.unquote
+    #       will return a UNICODE string, which actually contains
+    #       utf-8 bytes. So we have to ensure that they are
+    #       plain ASCII strings, or the final .decode will
+    #       try to encode the UNICODE => ASCII, and then decode
+    #       it into utf-8.
+    try:
+        url = str(url)
+    except UnicodeError, e:
+        raise errors.InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))
+
+    unquoted = urllib.unquote(url)
+    try:
+        unicode_path = unquoted.decode('utf-8')
+    except UnicodeError, e:
+        raise errors.InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,))
+    return unicode_path
+
+
+# These are characters that if escaped, should stay that way
+_no_decode_chars = ';/?:@&=+$,#'
+_no_decode_ords = [ord(c) for c in _no_decode_chars]
+_no_decode_hex = (['%02x' % o for o in _no_decode_ords] 
+                + ['%02X' % o for o in _no_decode_ords])
+_hex_display_map = dict(([('%02x' % o, chr(o)) for o in range(256)]
+                    + [('%02X' % o, chr(o)) for o in range(256)]))
+#These entries get mapped to themselves
+_hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex)
+
+# These characters should not be escaped
+_url_safe_characters = set('abcdefghijklmnopqrstuvwxyz'
+                        'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+                        '0123456789' '_.-/'
+                        ';?:@&=+$,%#')
+
+
+def unescape_for_display(url, encoding):
+    """Decode what you can for a URL, so that we get a nice looking path.
+
+    This will turn file:// urls into local paths, and try to decode
+    any portions of a http:// style url that it can.
+
+    Any sections of the URL which can't be represented in the encoding or 
+    need to stay as escapes are left alone.
+
+    :param url: A 7-bit ASCII URL
+    :param encoding: The final output encoding
+
+    :return: A unicode string which can be safely encoded into the 
+         specified encoding.
+    """
+    if url.startswith('file://'):
+        try:
+            path = local_path_from_url(url)
+            path.encode(encoding)
+            return path
+        except UnicodeError:
+            return url
+
+    # Split into sections to try to decode utf-8
+    res = url.split('/')
+    for i in xrange(1, len(res)):
+        escaped_chunks = res[i].split('%')
+        for j in xrange(1, len(escaped_chunks)):
+            item = escaped_chunks[j]
+            try:
+                escaped_chunks[j] = _hex_display_map[item[:2]] + item[2:]
+            except KeyError:
+                # Put back the percent symbol
+                escaped_chunks[j] = '%' + item
+            except UnicodeDecodeError:
+                escaped_chunks[j] = unichr(int(item[:2], 16)) + item[2:]
+        unescaped = ''.join(escaped_chunks)
+        try:
+            decoded = unescaped.decode('utf-8')
+        except UnicodeDecodeError:
+            # If this path segment cannot be properly utf-8 decoded
+            # after doing unescaping we will just leave it alone
+            pass
+        else:
+            try:
+                decoded.encode(encoding)
+            except UnicodeEncodeError:
+                # If this chunk cannot be encoded in the local
+                # encoding, then we should leave it alone
+                pass
+            else:
+                # Otherwise take the url decoded one
+                res[i] = decoded
+    return u'/'.join(res)

=== added file 'foo bar'
=== modified file 'bzrlib/add.py'
--- bzrlib/add.py	
+++ bzrlib/add.py	
@@ -1,19 +1,20 @@
 # Copyright (C) 2005 Canonical Ltd
-
+#
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 2 of the License, or
 # (at your option) any later version.
-
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
-
+#
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
+import sys
 from os.path import dirname
 
 import bzrlib.errors as errors
@@ -22,6 +23,7 @@
 from bzrlib.errors import NotBranchError
 import bzrlib.osutils
 from bzrlib.workingtree import WorkingTree
+
 
 def glob_expand_for_win32(file_list):
     if not file_list:
@@ -42,7 +44,6 @@
 
 def _prepare_file_list(file_list):
     """Prepare a file list for use by smart_add_*."""
-    import sys
     if sys.platform == 'win32':
         file_list = glob_expand_for_win32(file_list)
     if not file_list:
@@ -51,34 +52,63 @@
     return file_list
 
 
-def add_action_null(inv, parent_ie, path, kind):
-    """Absorb add actions and do nothing."""
-    pass
-
-
-def add_action_print(inv, parent_ie, path, kind):
-    """Print a line to stdout for each file that would be added."""
-    print "added", bzrlib.osutils.quotefn(path)
-
-
-def add_action_add(inv, parent_ie, path, kind):
-    """Add each file to the given inventory. Produce no output."""
-    if parent_ie is not None:
-        entry = bzrlib.inventory.make_entry(
-            kind, bzrlib.osutils.basename(path),  parent_ie.file_id)
-        inv.add(entry)
-    else:
-        entry = inv.add_path(path, kind=kind)
-    # mutter("added %r kind %r file_id={%s}" % (path, kind, entry.file_id))
-
-
-def add_action_add_and_print(inv, parent_ie, path, kind):
-    """Add each file to the given inventory, and print a line to stdout."""
-    add_action_add(inv, parent_ie, path, kind)
-    add_action_print(inv, parent_ie, path, kind)
-
-
-def smart_add(file_list, recurse=True, action=add_action_add):
+class AddAction(object):
+    """A class which defines what action to take when adding a file."""
+
+    def __init__(self, to_file=None, should_add=None, should_print=None):
+        self._to_file = to_file
+        if to_file is None:
+            self._to_file = sys.stdout
+        self.should_add = False
+        if should_add is not None:
+            self.should_add = should_add
+        self.should_print = False
+        if should_print is not None:
+            self.should_print = should_print
+
+    def __call__(self, inv, parent_ie, path, kind):
+        """Add path to inventory.
+
+        The default action does nothing.
+
+        :param inv: The inventory we are working with.
+        :param path: The path being added
+        :param kind: The kind of the object being added.
+        """
+        if self.should_add:
+            self._add_to_inv(inv, parent_ie, path, kind)
+        if self.should_print:
+            self._print(inv, parent_ie, path, kind)
+
+    def _print(self, inv, parent_ie, path, kind):
+        """Print a line to self._to_file for each file that would be added."""
+        self._to_file.write('added ')
+        self._to_file.write(bzrlib.osutils.quotefn(path))
+        self._to_file.write('\n')
+
+    def _add_to_inv(self, inv, parent_ie, path, kind):
+        """Add each file to the given inventory. Produce no output."""
+        if parent_ie is not None:
+            entry = bzrlib.inventory.make_entry(
+                kind, bzrlib.osutils.basename(path),  parent_ie.file_id)
+            inv.add(entry)
+        else:
+            entry = inv.add_path(path, kind=kind)
+        # mutter("added %r kind %r file_id={%s}", path, kind, entry.file_id)
+
+
+# TODO: jam 20050105 These could be used for compatibility
+#       however, they bind against the current stdout, not the
+#       one which exists at the time they are called, so they
+#       don't work for the test suite.
+# deprecated
+add_action_null = AddAction()
+add_action_add = AddAction(should_add=True)
+add_action_print = AddAction(should_print=True)
+add_action_add_and_print = AddAction(should_add=True, should_print=True)
+
+
+def smart_add(file_list, recurse=True, action=None):
     """Add files to version, optionally recursing into directories.
 
     This is designed more towards DWIM for humans than API simplicity.
@@ -88,10 +118,10 @@
     """
     file_list = _prepare_file_list(file_list)
     tree = WorkingTree.open_containing(file_list[0])[0]
-    return smart_add_tree(tree, file_list, recurse, action)
-
-
-def smart_add_tree(tree, file_list, recurse=True, action=add_action_add):
+    return smart_add_tree(tree, file_list, recurse, action=action)
+
+
+def smart_add_tree(tree, file_list, recurse=True, action=None):
     """Add files to version, optionally recursing into directories.
 
     This is designed more towards DWIM for humans than API simplicity.
@@ -104,6 +134,8 @@
     import os, errno
     from bzrlib.errors import BadFileKindError, ForbiddenFileError
     assert isinstance(recurse, bool)
+    if action is None:
+        action = AddAction(should_add=True)
     
     prepared_list = _prepare_file_list(file_list)
     mutter("smart add of %r, originally %r", prepared_list, file_list)

=== modified file 'bzrlib/branch.py'
--- bzrlib/branch.py	
+++ bzrlib/branch.py	
@@ -47,9 +47,6 @@
                             safe_unicode,
                             rmtree,
                             )
-from bzrlib.textui import show_status
-from bzrlib.trace import mutter, note
-from bzrlib.tree import EmptyTree, RevisionTree
 from bzrlib.repository import Repository
 from bzrlib.revision import (
                              is_ancestor,
@@ -58,10 +55,13 @@
                              )
 from bzrlib.store import copy_all
 from bzrlib.symbol_versioning import *
+from bzrlib.textui import show_status
+from bzrlib.trace import mutter, note
 import bzrlib.transactions as transactions
 from bzrlib.transport import Transport, get_transport
 from bzrlib.tree import EmptyTree, RevisionTree
 import bzrlib.ui
+import bzrlib.urlutils as urlutils
 import bzrlib.xml5
 
 
@@ -1133,9 +1133,11 @@
         """See Branch.get_parent."""
         import errno
         _locs = ['parent', 'pull', 'x-pull']
+        assert self.base[-1] == '/'
         for l in _locs:
             try:
-                return self.control_files.get_utf8(l).read().strip('\n')
+                return urlutils.join(self.base[:-1], 
+                            self.control_files.get(l).read().strip('\n'))
             except NoSuchFile:
                 pass
         return None
@@ -1162,7 +1164,16 @@
         if url is None:
             self.control_files._transport.delete('parent')
         else:
-            self.control_files.put_utf8('parent', url + '\n')
+            if isinstance(url, unicode):
+                try: 
+                    url = url.encode('ascii')
+                except UnicodeEncodeError:
+                    raise bzrlib.errors.InvalidURL(url,
+                        "Urls must be 7-bit ascii, "
+                        "use bzrlib.urlutils.escape")
+                    
+            url = urlutils.relative_url(self.base, url)
+            self.control_files.put('parent', url + '\n')
 
     def tree_config(self):
         return TreeConfig(self)

=== modified file 'bzrlib/builtins.py'
--- bzrlib/builtins.py	
+++ bzrlib/builtins.py	
@@ -17,8 +17,10 @@
 """builtin bzr commands"""
 
 
+import codecs
 import errno
 import os
+from shutil import rmtree
 import sys
 
 import bzrlib
@@ -36,6 +38,7 @@
 from bzrlib.log import show_one_log
 from bzrlib.merge import Merge3Merger
 from bzrlib.option import Option
+import bzrlib.osutils
 from bzrlib.progress import DummyProgress, ProgressPhase
 from bzrlib.revision import common_ancestor
 from bzrlib.revisionspec import RevisionSpec
@@ -43,6 +46,7 @@
 from bzrlib.trace import mutter, note, log_error, warning, is_quiet
 from bzrlib.transport.local import LocalTransport
 import bzrlib.ui
+import bzrlib.urlutils as urlutils
 from bzrlib.workingtree import WorkingTree
 
 
@@ -152,14 +156,18 @@
     takes_args = ['file*']
     takes_options = ['all', 'show-ids', 'revision']
     aliases = ['st', 'stat']
+
+    encoding_type = 'replace'
     
     @display_command
     def run(self, all=False, show_ids=False, file_list=None, revision=None):
+        from bzrlib.status import show_tree_status
+
         tree, file_list = tree_files(file_list)
             
-        from bzrlib.status import show_tree_status
         show_tree_status(tree, show_unchanged=all, show_ids=show_ids,
-                         specific_files=file_list, revision=revision)
+                         specific_files=file_list, revision=revision,
+                         to_file=self.outf)
 
 
 class cmd_cat_revision(Command):
@@ -172,6 +180,8 @@
     hidden = True
     takes_args = ['revision_id?']
     takes_options = ['revision']
+    # cat-revision is more for frontends so should be exact
+    encoding = 'strict'
     
     @display_command
     def run(self, revision_id=None, revision=None):
@@ -181,24 +191,30 @@
         if revision_id is None and revision is None:
             raise BzrCommandError('You must supply either --revision or a revision_id')
         b = WorkingTree.open_containing(u'.')[0].branch
+
+        # TODO: jam 20060112 should cat-revision always output utf-8?
         if revision_id is not None:
-            sys.stdout.write(b.repository.get_revision_xml(revision_id))
+            self.outf.write(b.repository.get_revision_xml(revision_id).decode('utf-8'))
         elif revision is not None:
             for rev in revision:
                 if rev is None:
                     raise BzrCommandError('You cannot specify a NULL revision.')
                 revno, rev_id = rev.in_history(b)
-                sys.stdout.write(b.repository.get_revision_xml(rev_id))
+                self.outf.write(b.repository.get_revision_xml(rev_id).decode('utf-8'))
     
 
 class cmd_revno(Command):
     """Show current revision number.
 
-    This is equal to the number of revisions on this branch."""
+    This is equal to the number of revisions on this branch.
+    """
+
     takes_args = ['location?']
+
     @display_command
     def run(self, location=u'.'):
-        print Branch.open_containing(location)[0].revno()
+        self.outf.write(str(Branch.open_containing(location)[0].revno()))
+        self.outf.write('\n')
 
 
 class cmd_revision_info(Command):
@@ -207,6 +223,7 @@
     hidden = True
     takes_args = ['revision_info*']
     takes_options = ['revision']
+
     @display_command
     def run(self, revision=None, revision_info_list=[]):
 
@@ -257,35 +274,29 @@
     """
     takes_args = ['file*']
     takes_options = ['no-recurse', 'dry-run', 'verbose']
+    encoding_type = 'replace'
 
     def run(self, file_list, no_recurse=False, dry_run=False, verbose=False):
         import bzrlib.add
 
-        if dry_run:
-            if is_quiet():
-                # This is pointless, but I'd rather not raise an error
-                action = bzrlib.add.add_action_null
-            else:
-                action = bzrlib.add.add_action_print
-        elif is_quiet():
-            action = bzrlib.add.add_action_add
-        else:
-            action = bzrlib.add.add_action_add_and_print
+        action = bzrlib.add.AddAction(to_file=self.outf,
+            should_add=(not dry_run), should_print=(not is_quiet()))
 
         added, ignored = bzrlib.add.smart_add(file_list, not no_recurse, 
-                                              action)
+                                              action=action)
         if len(ignored) > 0:
             if verbose:
                 for glob in sorted(ignored.keys()):
                     for path in ignored[glob]:
-                        print "ignored %s matching \"%s\"" % (path, glob)
+                        self.outf.write("ignored %s matching \"%s\"\n" 
+                                        % (path, glob))
             else:
                 match_len = 0
                 for glob, paths in ignored.items():
                     match_len += len(paths)
-                print "ignored %d file(s)." % match_len
-            print "If you wish to add some of these files, please add them"\
-                " by name."
+                self.outf.write("ignored %d file(s).\n" % match_len)
+            self.outf.write("If you wish to add some of these files,"
+                            " please add them by name.\n")
 
 
 class cmd_mkdir(Command):
@@ -293,25 +304,31 @@
 
     This is equivalent to creating the directory and then adding it.
     """
+
     takes_args = ['dir+']
+    encoding_type = 'replace'
 
     def run(self, dir_list):
         for d in dir_list:
             os.mkdir(d)
             wt, dd = WorkingTree.open_containing(d)
             wt.add([dd])
-            print 'added', d
+            self.outf.write('added %s\n' % d)
 
 
 class cmd_relpath(Command):
     """Show path of a file relative to root"""
+
     takes_args = ['filename']
     hidden = True
     
     @display_command
     def run(self, filename):
+        # TODO: jam 20050106 Can relpath return a munged path if
+        #       sys.stdout encoding cannot represent it?
         tree, relpath = WorkingTree.open_containing(filename)
-        print relpath
+        self.outf.write(relpath)
+        self.outf.write('\n')
 
 
 class cmd_inventory(Command):
@@ -320,6 +337,7 @@
     It is possible to limit the output to a particular entry
     type using the --kind option.  For example; --kind file.
     """
+
     takes_options = ['revision', 'show-ids', 'kind']
     
     @display_command
@@ -340,9 +358,10 @@
             if kind and kind != entry.kind:
                 continue
             if show_ids:
-                print '%-50s %s' % (path, entry.file_id)
-            else:
-                print path
+                self.outf.write('%-50s %s\n' % (path, entry.file_id))
+            else:
+                self.outf.write(path)
+                self.outf.write('\n')
 
 
 class cmd_mv(Command):
@@ -358,8 +377,10 @@
 
     Files cannot be moved between branches.
     """
+
     takes_args = ['names*']
     aliases = ['move', 'rename']
+    encoding_type = 'replace'
 
     def run(self, names_list):
         if len(names_list) < 2:
@@ -369,13 +390,13 @@
         if os.path.isdir(names_list[-1]):
             # move into existing directory
             for pair in tree.move(rel_names[:-1], rel_names[-1]):
-                print "%s => %s" % pair
+                self.outf.write("%s => %s\n" % pair)
         else:
             if len(names_list) != 2:
                 raise BzrCommandError('to mv multiple files the destination '
                                       'must be a versioned directory')
             tree.rename_one(rel_names[0], rel_names[1])
-            print "%s => %s" % (rel_names[0], rel_names[1])
+            self.outf.write("%s => %s\n" % (rel_names[0], rel_names[1]))
             
     
 class cmd_pull(Command):
@@ -400,8 +421,10 @@
     that, you can omit the location to use the default.  To change the
     default, use --remember.
     """
+
     takes_options = ['remember', 'overwrite', 'revision', 'verbose']
     takes_args = ['location?']
+    encoding_type = 'replace'
 
     def run(self, location=None, remember=False, overwrite=False, revision=None, verbose=False):
         # FIXME: too much stuff is in the command class
@@ -410,19 +433,21 @@
             branch_to = tree_to.branch
         except NoWorkingTree:
             tree_to = None
-            branch_to = Branch.open_containing(u'.')[0] 
+            branch_to = Branch.open_containing(u'.')[0]
         stored_loc = branch_to.get_parent()
         if location is None:
             if stored_loc is None:
                 raise BzrCommandError("No pull location known or specified.")
             else:
-                print "Using saved location: %s" % stored_loc
+                display_url = urlutils.unescape_for_display(stored_loc,
+                        self.outf.encoding)
+                self.outf.write("Using saved location: %s\n" % display_url)
                 location = stored_loc
 
+        branch_from = Branch.open(location)
+
         if branch_to.get_parent() is None or remember:
-            branch_to.set_parent(location)
-
-        branch_from = Branch.open(location)
+            branch_to.set_parent(branch_from.base)
 
         if revision is None:
             rev_id = None
@@ -443,7 +468,8 @@
             if old_rh != new_rh:
                 # Something changed
                 from bzrlib.log import show_changed_revisions
-                show_changed_revisions(branch_to, old_rh, new_rh)
+                show_changed_revisions(branch_to, old_rh, new_rh,
+                                       to_file=self.outf)
 
 
 class cmd_push(Command):
@@ -470,11 +496,13 @@
     After that, you can omit the location to use the default.  To change the
     default, use --remember.
     """
-    takes_options = ['remember', 'overwrite', 
+
+    takes_options = ['remember', 'overwrite', 'verbose',
                      Option('create-prefix', 
                             help='Create the path leading up to the branch '
                                  'if it does not already exist')]
     takes_args = ['location?']
+    encoding_type = 'replace'
 
     def run(self, location=None, remember=False, overwrite=False,
             create_prefix=False, verbose=False):
@@ -488,25 +516,34 @@
             if stored_loc is None:
                 raise BzrCommandError("No push location known or specified.")
             else:
-                print "Using saved location: %s" % stored_loc
+                display_url = urlutils.unescape_for_display(stored_loc,
+                        self.outf.encoding)
+                self.outf.write("Using saved location: %s" % display_url)
                 location = stored_loc
+
+        transport = get_transport(location)
+        location_url = transport.base
         if br_from.get_push_location() is None or remember:
-            br_from.set_push_location(location)
+            br_from.set_push_location(location_url)
+
+        old_rh = []
         try:
-            dir_to = bzrlib.bzrdir.BzrDir.open(location)
+            dir_to = bzrlib.bzrdir.BzrDir.open(location_url)
             br_to = dir_to.open_branch()
         except NotBranchError:
             # create a branch.
-            transport = get_transport(location).clone('..')
+            transport = transport.clone('..')
             if not create_prefix:
                 try:
-                    transport.mkdir(transport.relpath(location))
+                    relurl = transport.relpath(location_url)
+                    mutter('creating directory %s => %s', location_url, relurl)
+                    transport.mkdir(relurl)
                 except NoSuchFile:
                     raise BzrCommandError("Parent directory of %s "
                                           "does not exist." % location)
             else:
                 current = transport.base
-                needed = [(transport, transport.relpath(location))]
+                needed = [(transport, transport.relpath(location_url))]
                 while needed:
                     try:
                         transport, relpath = needed[-1]
@@ -519,7 +556,7 @@
                         if new_transport.base == transport.base:
                             raise BzrCommandError("Could not create "
                                                   "path prefix.")
-            dir_to = br_from.bzrdir.clone(location,
+            dir_to = br_from.bzrdir.clone(location_url,
                 revision_id=br_from.last_revision())
             br_to = dir_to.open_branch()
             count = len(br_to.revision_history())
@@ -546,7 +583,8 @@
             if old_rh != new_rh:
                 # Something changed
                 from bzrlib.log import show_changed_revisions
-                show_changed_revisions(br_to, old_rh, new_rh)
+                show_changed_revisions(br_to, old_rh, new_rh,
+                                       to_file=self.outf)
 
 
 class cmd_branch(Command):
@@ -567,6 +605,7 @@
     aliases = ['get', 'clone']
 
     def run(self, from_location, to_location=None, revision=None, basis=None):
+        from bzrlib.transport import get_transport
         from bzrlib.osutils import rmtree
         if revision is None:
             revision = [None]
@@ -599,23 +638,23 @@
                 name = None
             else:
                 name = os.path.basename(to_location) + '\n'
+
+            to_transport = get_transport(to_location)
             try:
-                os.mkdir(to_location)
-            except OSError, e:
-                if e.errno == errno.EEXIST:
-                    raise BzrCommandError('Target directory "%s" already'
-                                          ' exists.' % to_location)
-                if e.errno == errno.ENOENT:
-                    raise BzrCommandError('Parent of "%s" does not exist.' %
-                                          to_location)
-                else:
-                    raise
+                to_transport.mkdir('.')
+            except bzrlib.errors.FileExists:
+                raise BzrCommandError('Target directory "%s" already'
+                                      ' exists.' % to_location)
+            except bzrlib.errors.NoSuchFile:
+                raise BzrCommandError('Parent of "%s" does not exist.' %
+                                      to_location)
             try:
                 # preserve whatever source format we have.
-                dir = br_from.bzrdir.sprout(to_location, revision_id, basis_dir)
+                dir = br_from.bzrdir.sprout(to_transport.base,
+                        revision_id, basis_dir)
                 branch = dir.open_branch()
             except bzrlib.errors.NoSuchRevision:
-                rmtree(to_location)
+                to_transport.delete_tree('.')
                 msg = "The branch %s has no revision %s." % (from_location, revision[0])
                 raise BzrCommandError(msg)
             except bzrlib.errors.UnlistableBranch:
@@ -624,7 +663,6 @@
                 raise BzrCommandError(msg)
             if name:
                 branch.control_files.put_utf8('branch-name', name)
-
             note('Branched %d revision(s).' % branch.revno())
         finally:
             br_from.unlock()
@@ -735,7 +773,7 @@
         renames = list(bzrlib.tree.find_renames(old_inv, new_inv))
         renames.sort()
         for old_name, new_name in renames:
-            print "%s => %s" % (old_name, new_name)        
+            self.outf.write("%s => %s\n" % (old_name, new_name))
 
 
 class cmd_update(Command):
@@ -804,6 +842,7 @@
     takes_args = ['file*']
     takes_options = ['verbose', Option('new', help='remove newly-added files')]
     aliases = ['rm']
+    encoding_type = 'replace'
     
     def run(self, file_list, verbose=False, new=False):
         tree, file_list = tree_files(file_list)
@@ -818,7 +857,7 @@
             file_list = sorted([f[0] for f in added[0]], reverse=True)
             if len(file_list) == 0:
                 raise BzrCommandError('No matching files.')
-        tree.remove(file_list, verbose=verbose)
+        tree.remove(file_list, verbose=verbose, to_file=self.outf)
 
 
 class cmd_file_id(Command):
@@ -828,8 +867,10 @@
     same through all revisions where the file exists, even when it is
     moved or renamed.
     """
+
     hidden = True
     takes_args = ['filename']
+
     @display_command
     def run(self, filename):
         tree, relpath = WorkingTree.open_containing(filename)
@@ -837,16 +878,19 @@
         if i == None:
             raise BzrError("%r is not a versioned file" % filename)
         else:
-            print i
+            self.outf.write(i + '\n')
 
 
 class cmd_file_path(Command):
     """Print path of file_ids to a file or directory.
 
     This prints one line for each directory down to the target,
-    starting at the branch root."""
+    starting at the branch root.
+    """
+
     hidden = True
     takes_args = ['filename']
+
     @display_command
     def run(self, filename):
         tree, relpath = WorkingTree.open_containing(filename)
@@ -855,7 +899,7 @@
         if fid == None:
             raise BzrError("%r is not a versioned file" % filename)
         for fip in inv.get_idpath(fid):
-            print fip
+            self.outf.write(fip + '\n')
 
 
 class cmd_reconcile(Command):
@@ -887,16 +931,19 @@
 class cmd_revision_history(Command):
     """Display list of revision ids on this branch."""
     hidden = True
+
     @display_command
     def run(self):
         branch = WorkingTree.open_containing(u'.')[0].branch
         for patchid in branch.revision_history():
-            print patchid
+            self.outf.write(patchid)
+            self.outf.write('\n')
 
 
 class cmd_ancestry(Command):
     """List all revisions merged into this branch."""
     hidden = True
+
     @display_command
     def run(self):
         tree = WorkingTree.open_containing(u'.')[0]
@@ -906,7 +953,7 @@
         assert revision_ids[0] == None
         revision_ids.pop(0)
         for revision_id in revision_ids:
-            print revision_id
+            self.outf.write(revision_id + '\n')
 
 
 class cmd_init(Command):
@@ -1037,6 +1084,7 @@
     takes_args = ['file*']
     takes_options = ['revision', 'diff-options', 'prefix']
     aliases = ['di', 'dif']
+    encoding_type = 'exact'
 
     @display_command
     def run(self, revision=None, file_list=None, diff_options=None,
@@ -1103,16 +1151,19 @@
     # directories with readdir, rather than stating each one.  Same
     # level of effort but possibly much less IO.  (Or possibly not,
     # if the directories are very large...)
+    takes_options = ['show-ids']
+
     @display_command
     def run(self, show_ids=False):
         tree = WorkingTree.open_containing(u'.')[0]
         old = tree.basis_tree()
         for path, ie in old.inventory.iter_entries():
             if not tree.has_id(ie.file_id):
+                self.outf.write(path)
                 if show_ids:
-                    print '%-50s %s' % (path, ie.file_id)
-                else:
-                    print path
+                    self.outf.write(' ')
+                    self.outf.write(ie.file_id)
+                self.outf.write('\n')
 
 
 class cmd_modified(Command):
@@ -1126,8 +1177,7 @@
         td = compare_trees(tree.basis_tree(), tree)
 
         for path, id, kind, text_modified, meta_modified in td.modified:
-            print path
-
+            self.outf.write(path + '\n')
 
 
 class cmd_added(Command):
@@ -1144,9 +1194,8 @@
             path = inv.id2path(file_id)
             if not os.access(bzrlib.osutils.abspath(path), os.F_OK):
                 continue
-            print path
-                
-        
+            self.outf.write(path + '\n')
+
 
 class cmd_root(Command):
     """Show the tree root directory.
@@ -1158,7 +1207,7 @@
     def run(self, filename=None):
         """Print the branch root."""
         tree = WorkingTree.open_containing(filename)[0]
-        print tree.basedir
+        self.outf.write(tree.basedir + '\n')
 
 
 class cmd_log(Command):
@@ -1192,6 +1241,8 @@
                             type=str),
                      'short',
                      ]
+    encoding_type = 'replace'
+
     @display_command
     def run(self, location=None, timezone='original',
             verbose=False,
@@ -1204,7 +1255,6 @@
             short=False,
             line=False):
         from bzrlib.log import log_formatter, show_log
-        import codecs
         assert message is None or isinstance(message, basestring), \
             "invalid message argument %r" % message
         direction = (forward and 'forward') or 'reverse'
@@ -1256,19 +1306,12 @@
         if rev1 > rev2:
             (rev2, rev1) = (rev1, rev2)
 
-        mutter('encoding log as %r', bzrlib.user_encoding)
-
-        # use 'replace' so that we don't abort if trying to write out
-        # in e.g. the default C locale.
-        outf = codecs.getwriter(bzrlib.user_encoding)(sys.stdout, errors='replace')
-
         if (log_format == None):
             default = bzrlib.config.BranchConfig(b).log_format()
             log_format = get_log_format(long=long, short=short, line=line, default=default)
-
         lf = log_formatter(log_format,
                            show_ids=show_ids,
-                           to_file=outf,
+                           to_file=self.outf,
                            show_timezone=timezone)
 
         show_log(b,
@@ -1295,9 +1338,12 @@
 class cmd_touching_revisions(Command):
     """Return revision-ids which affected a particular file.
 
-    A more user-friendly interface is "bzr log FILE"."""
+    A more user-friendly interface is "bzr log FILE".
+    """
+
     hidden = True
     takes_args = ["filename"]
+
     @display_command
     def run(self, filename):
         tree, relpath = WorkingTree.open_containing(filename)
@@ -1305,7 +1351,7 @@
         inv = tree.read_working_inventory()
         file_id = inv.path2id(relpath)
         for revno, revision_id, what in bzrlib.log.find_touching_revisions(b, file_id):
-            print "%6d %s" % (revno, what)
+            self.outf.write("%6d %s\n" % (revno, what))
 
 
 class cmd_ls(Command):
@@ -1344,6 +1390,7 @@
         if revision is not None:
             tree = tree.branch.repository.revision_tree(
                 revision[0].in_history(tree.branch).rev_id)
+
         for fp, fc, kind, fid, entry in tree.list_files():
             if fp.startswith(relpath):
                 fp = fp[len(relpath):]
@@ -1353,13 +1400,12 @@
                     continue
                 if verbose:
                     kindch = entry.kind_character()
-                    print '%-8s %s%s' % (fc, fp, kindch)
+                    self.outf.write('%-8s %s%s\n' % (fc, fp, kindch))
                 elif null:
-                    sys.stdout.write(fp)
-                    sys.stdout.write('\0')
-                    sys.stdout.flush()
+                    self.outf.write(fp + '\0')
+                    self.outf.flush()
                 else:
-                    print fp
+                    self.outf.write(fp + '\n')
 
 
 class cmd_unknowns(Command):
@@ -1368,7 +1414,7 @@
     def run(self):
         from bzrlib.osutils import quotefn
         for f in WorkingTree.open_containing(u'.')[0].unknowns():
-            print quotefn(f)
+            self.outf.write(quotefn(f) + '\n')
 
 
 class cmd_ignore(Command):
@@ -1591,7 +1637,6 @@
         from bzrlib.msgeditor import edit_commit_message, \
                 make_commit_message_template
         from tempfile import TemporaryFile
-        import codecs
 
         # TODO: Need a blackbox test for invoking the external editor; may be
         # slightly problematic to run this cross-platform.
@@ -1620,7 +1665,6 @@
             raise BzrCommandError("please specify either --message or --file")
         
         if file:
-            import codecs
             message = codecs.open(file, 'rt', bzrlib.user_encoding).read()
 
         if message == "":
@@ -2006,8 +2050,8 @@
         if merge_type is None:
             merge_type = Merge3Merger
 
-
         tree = WorkingTree.open_containing(u'.')[0]
+
         try:
             if branch is not None:
                 reader = BundleReader(file(branch, 'rb'))
@@ -2027,16 +2071,7 @@
             else:
                 return 1
 
-        stored_loc = tree.branch.get_parent()
-        if branch is None:
-            if stored_loc is None:
-                raise BzrCommandError("No merge branch known or specified.")
-            else:
-                print "Using saved branch: %s" % stored_loc
-                branch = stored_loc
-
-        if tree.branch.get_parent() is None or remember:
-            tree.branch.set_parent(branch)
+        branch = self._get_remembered_parent(tree, branch, 'Merging from')
 
         if revision is None or len(revision) < 1:
             base = [None, None]
@@ -2053,10 +2088,14 @@
                 if None in revision:
                     raise BzrCommandError(
                         "Merge doesn't permit that revision specifier.")
-                b, path = Branch.open_containing(branch)
-
-                base = [branch, revision[0].in_history(b).revno]
-                other = [branch, revision[1].in_history(b).revno]
+                other_branch, path = Branch.open_containing(branch)
+
+                base = [branch, revision[0].in_history(other_branch).revno]
+                other = [branch, revision[1].in_history(other_branch).revno]
+
+        if tree.branch.get_parent() is None or remember:
+            tree.branch.set_parent(other_branch.base)
+
         if path != "":
             interesting_files = [path]
         else:
@@ -2065,9 +2104,9 @@
         try:
             try:
                 conflict_count = merge(other, base, check_clean=(not force),
-                                       merge_type=merge_type, 
+                                       merge_type=merge_type,
                                        reprocess=reprocess,
-                                       show_base=show_base, 
+                                       show_base=show_base,
                                        pb=pb, file_list=interesting_files)
             finally:
                 pb.finished()
@@ -2083,6 +2122,22 @@
                  "please specify an explicit base with -r,\n"
                  "and (if you want) report this to the bzr developers\n")
             log_error(m)
+
+    # TODO: move up to common parent; this isn't merge-specific anymore. 
+    def _get_remembered_parent(self, tree, supplied_location, verb_string):
+        """Use tree.branch's parent if none was supplied.
+
+        Report if the remembered location was used.
+        """
+        if supplied_location is not None:
+            return supplied_location
+        stored_location = tree.branch.get_parent()
+        mutter("%s", stored_location)
+        if stored_location is None:
+            raise BzrCommandError("No location specified or remembered")
+        display_url = urlutils.unescape_for_display(stored_location, self.outf.encoding)
+        self.outf.write("%s remembered location %s\n" % (verb_string, display_url))
+        return stored_location
 
 
 class cmd_remerge(Command):
@@ -2334,7 +2389,7 @@
             try:
                 # handle race conditions - a parent might be set while we run.
                 if local_branch.get_parent() is None:
-                    local_branch.set_parent(other_branch)
+                    local_branch.set_parent(remote_branch.base)
             finally:
                 local_branch.unlock()
         return status_code

=== modified file 'bzrlib/bzrdir.py'
--- bzrlib/bzrdir.py	
+++ bzrlib/bzrdir.py	
@@ -29,7 +29,6 @@
 import bzrlib.errors as errors
 from bzrlib.lockable_files import LockableFiles, TransportLock
 from bzrlib.lockdir import LockDir
-from bzrlib.osutils import safe_unicode
 from bzrlib.osutils import (
                             abspath,
                             pathjoin,
@@ -43,8 +42,9 @@
 from bzrlib.symbol_versioning import *
 from bzrlib.trace import mutter
 from bzrlib.transactions import WriteTransaction
-from bzrlib.transport import get_transport, urlunescape
+from bzrlib.transport import get_transport
 from bzrlib.transport.local import LocalTransport
+import bzrlib.urlutils as urlutils
 from bzrlib.weave import Weave
 from bzrlib.xml4 import serializer_v4
 import bzrlib.xml5
@@ -173,16 +173,18 @@
                     basis_repo = None
         return basis_repo, basis_branch, basis_tree
 
+    # TODO: This should be given a Transport, and should chdir up; otherwise
+    # this will open a new connection.
     def _make_tail(self, url):
-        segments = url.split('/')
-        if segments and segments[-1] not in ('', '.'):
-            parent = '/'.join(segments[:-1])
-            t = bzrlib.transport.get_transport(parent)
+        head, tail = urlutils.split(url)
+        if tail and tail != '.':
+            t = bzrlib.transport.get_transport(head)
             try:
-                t.mkdir(segments[-1])
+                t.mkdir(tail)
             except errors.FileExists:
                 pass
 
+    # TODO: Should take a Transport
     @classmethod
     def create(cls, base):
         """Create a new BzrDir at the url 'base'.
@@ -196,12 +198,11 @@
         if cls is not BzrDir:
             raise AssertionError("BzrDir.create always creates the default format, "
                     "not one of %r" % cls)
-        segments = base.split('/')
-        if segments and segments[-1] not in ('', '.'):
-            parent = '/'.join(segments[:-1])
-            t = bzrlib.transport.get_transport(parent)
+        head, tail = urlutils.split(base)
+        if tail and tail != '.':
+            t = bzrlib.transport.get_transport(head)
             try:
-                t.mkdir(segments[-1])
+                t.mkdir(tail)
             except errors.FileExists:
                 pass
         return BzrDirFormat.get_default_format().initialize(safe_unicode(base))
@@ -489,6 +490,9 @@
         If there is one and it is either an unrecognised format or an unsupported 
         format, UnknownFormatError or UnsupportedFormatError are raised.
         If there is one, it is returned, along with the unused portion of url.
+
+        :return: The BzrDir that contains the path, and a Unicode path 
+                for the rest of the URL.
         """
         # this gets the normalised url back. I.e. '.' -> the full path.
         url = a_transport.base
@@ -496,9 +500,10 @@
             try:
                 format = BzrDirFormat.find_format(a_transport)
                 BzrDir._check_supported(format, False)
-                return format.open(a_transport), a_transport.relpath(url)
+                return format.open(a_transport), urlutils.unescape(a_transport.relpath(url))
             except errors.NotBranchError, e:
-                mutter('not a branch in: %r %s', a_transport.base, e)
+                ## mutter('not a branch in: %r %s', a_transport.base, e)
+                pass
             new_t = a_transport.clone('..')
             if new_t.base == a_transport.base:
                 # reached the root, whatever that may be
@@ -610,6 +615,8 @@
             source_branch.sprout(result, revision_id=revision_id)
         else:
             result.create_branch()
+        # TODO: jam 20060426 we probably need a test in here in the
+        #       case that the newly sprouted branch is a remote one
         if result_repo is None or result_repo.make_working_trees():
             result.create_workingtree()
         return result
@@ -1672,7 +1679,7 @@
             store_transport = self.bzrdir.transport.clone(store_name)
             store = TransportStore(store_transport, prefixed=True)
             for urlfilename in store_transport.list_dir('.'):
-                filename = urlunescape(urlfilename)
+                filename = urlutils.unescape(urlfilename)
                 if (filename.endswith(".weave") or
                     filename.endswith(".gz") or
                     filename.endswith(".sig")):

=== modified file 'bzrlib/commands.py'
--- bzrlib/commands.py	
+++ bzrlib/commands.py	
@@ -32,6 +32,7 @@
 import os
 from warnings import warn
 import errno
+import codecs
 
 import bzrlib
 from bzrlib.errors import (BzrError,
@@ -189,10 +190,21 @@
     hidden
         If true, this command isn't advertised.  This is typically
         for commands intended for expert users.
+
+    encoding_type
+        Command objects will get a 'outf' attribute, which has been
+        setup to properly handle encoding of unicode strings.
+        encoding_type determines what will happen when characters cannot
+        be encoded
+            strict - abort if we cannot decode
+            replace - put in a bogus character (typically '?')
+            exact - do not encode sys.stdout
+
     """
     aliases = []
     takes_args = []
     takes_options = []
+    encoding_type = 'strict'
 
     hidden = False
     
@@ -212,6 +224,27 @@
                 o = Option.OPTIONS[o]
             r[o.name] = o
         return r
+
+    def _setup_outf(self):
+        """Return a file linked to stdout, which has proper encoding."""
+        assert self.encoding_type in ['strict', 'exact', 'replace']
+
+        # Originally I was using self.stdout, but that looks
+        # *way* too much like sys.stdout
+        if self.encoding_type == 'exact':
+            self.outf = sys.stdout
+            return
+
+        output_encoding = getattr(sys.stdout, 'encoding', None)
+        if not output_encoding:
+            output_encoding = bzrlib.user_encoding
+            mutter('encoding stdout bzrlib.user_encoding %r', output_encoding)
+        else:
+            mutter('encoding stdout log as sys.stdout encoding %r', output_encoding)
+
+        # use 'replace' so that we don't abort if trying to write out
+        # in e.g. the default C locale.
+        self.outf = codecs.getwriter(output_encoding)(sys.stdout, errors=self.encoding_type)
 
     @deprecated_method(zero_eight)
     def run_argv(self, argv):
@@ -242,6 +275,8 @@
 
         all_cmd_args = cmdargs.copy()
         all_cmd_args.update(cmdopts)
+
+        self._setup_outf()
 
         return self.run(**all_cmd_args)
     
@@ -512,6 +547,8 @@
     
     argv
        The command-line arguments, without the program name from argv[0]
+       These should already be decoded. All library/test code calling
+       run_bzr should be passing valid strings (don't need decoding).
     
     Returns a command status or raises an exception.
 
@@ -534,7 +571,7 @@
     --lsprof
         Run under the Python lsprof profiler.
     """
-    argv = [a.decode(bzrlib.user_encoding) for a in argv]
+    argv = list(argv)
 
     opt_lsprof = opt_profile = opt_no_plugins = opt_builtin =  \
                 opt_no_aliases = False
@@ -639,7 +676,9 @@
     ## bzrlib.trace.enable_default_logging()
     bzrlib.trace.log_startup(argv)
     bzrlib.ui.ui_factory = TextUIFactory()
-    ret = run_bzr_catch_errors(argv[1:])
+
+    argv = [a.decode(bzrlib.user_encoding) for a in argv[1:]]
+    ret = run_bzr_catch_errors(argv)
     mutter("return code %d", ret)
     return ret
 

=== modified file 'bzrlib/config.py'
--- bzrlib/config.py	
+++ bzrlib/config.py	
@@ -568,6 +568,7 @@
                               "a reasonable email address" % e)
     return m.group(0)
 
+
 class TreeConfig(object):
     """Branch configuration data associated with its contents, not location"""
     def __init__(self, branch):

=== modified file 'bzrlib/doc/api/transport.txt'
--- bzrlib/doc/api/transport.txt	
+++ bzrlib/doc/api/transport.txt	
@@ -2,6 +2,7 @@
 
    >>> import os
    >>> from bzrlib.osutils import getcwd, dirname
+   >>> from bzrlib.urlutils import local_path_from_url
    >>> import bzrlib.transport as transport
    >>> root = transport.get_transport("file:///")
    >>>
@@ -9,11 +10,11 @@
 Each Transport instance represents a single logical directory.
 
    >>> dir = transport.get_transport(".")
-   >>> dir.base == getcwd() + '/'
+   >>> local_path_from_url(dir.base) == getcwd() + '/'
    True
 
 You can change directories via the clone method:
 
    >>> parent = dir.clone('..')
-   >>> parent.base == (dirname(getcwd()).rstrip('/') + '/')
+   >>> local_path_from_url(parent.base) == (dirname(getcwd()).rstrip('/') + '/')
    True

=== modified file 'bzrlib/errors.py'
--- bzrlib/errors.py	
+++ bzrlib/errors.py	
@@ -218,6 +218,19 @@
     """Permission denied: %(path)r%(extra)s"""
 
 
+class InvalidURL(PathError):
+    """Invalid url supplied to transport: %(path)r%(extra)s"""
+
+
+class InvalidURLJoin(PathError):
+    """Invalid URL join request: %(args)s%(extra)s"""
+
+    def __init__(self, msg, base, args):
+        PathError.__init__(self, base, msg)
+        self.args = [base]
+        self.args.extend(args)
+
+
 class PathNotChild(BzrNewError):
     """Path %(path)r is not a child of path %(base)r%(extra)s"""
     def __init__(self, path, base, extra=None):
@@ -230,8 +243,15 @@
             self.extra = ''
 
 
+# TODO: This is given a URL; we try to unescape it but doing that from inside
+# the exception object is a bit undesirable.
+# TODO: Probably this behavior of should be a common superclass 
 class NotBranchError(PathError):
     """Not a branch: %(path)s"""
+
+    def __init__(self, path):
+       import bzrlib.urlutils as urlutils
+       self.path = urlutils.unescape_for_display(path, 'ascii')
 
 
 class AlreadyBranchError(PathError):

=== modified file 'bzrlib/info.py'
--- bzrlib/info.py	
+++ bzrlib/info.py	
@@ -1,17 +1,15 @@
-# Copyright (C) 2004, 2005 by Martin Pool
-# Copyright (C) 2005 by Canonical Ltd
-
-
+# Copyright (C) 2005, 2006 by Canonical Ltd
+# 
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 2 of the License, or
 # (at your option) any later version.
-
+# 
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
-
+# 
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

=== modified file 'bzrlib/lockable_files.py'
--- bzrlib/lockable_files.py	
+++ bzrlib/lockable_files.py	
@@ -26,6 +26,8 @@
 from bzrlib.symbol_versioning import *
 from bzrlib.trace import mutter, note
 import bzrlib.transactions as transactions
+import bzrlib.urlutils as urlutils
+
 
 # XXX: The tracking here of lock counts and whether the lock is held is
 # somewhat redundant with what's done in LockDir; the main difference is that
@@ -118,7 +120,7 @@
             file_or_path = '/'.join(file_or_path)
         if file_or_path == '':
             return u''
-        return bzrlib.transport.urlescape(safe_unicode(file_or_path))
+        return urlutils.escape(safe_unicode(file_or_path))
 
     def _find_modes(self):
         """Determine the appropriate modes for files and directories."""

=== modified file 'bzrlib/msgeditor.py'
--- bzrlib/msgeditor.py	
+++ bzrlib/msgeditor.py	
@@ -17,12 +17,13 @@
 
 """Commit message editor support."""
 
-
+import codecs
 import errno
 import os
 from subprocess import call
 import sys
 
+import bzrlib
 import bzrlib.config as config
 from bzrlib.errors import BzrError
 
@@ -96,7 +97,8 @@
         if infotext is not None and infotext != "":
             hasinfo = True
             msgfile = file(msgfilename, "w")
-            msgfile.write("\n%s\n\n%s" % (ignoreline, infotext))
+            msgfile.write("\n\n%s\n\n%s" % (ignoreline,
+                infotext.encode(bzrlib.user_encoding, 'replace')))
             msgfile.close()
         else:
             hasinfo = False
@@ -107,7 +109,7 @@
         started = False
         msg = []
         lastline, nlines = 0, 0
-        for line in file(msgfilename, "r"):
+        for line in codecs.open(msgfilename, 'r', bzrlib.user_encoding):
             stripped_line = line.strip()
             # strip empty line before the log message starts
             if not started:

=== modified file 'bzrlib/osutils.py'
--- bzrlib/osutils.py	
+++ bzrlib/osutils.py	
@@ -31,6 +31,12 @@
 import time
 import types
 import tempfile
+import unicodedata
+from ntpath import (abspath as _nt_abspath,
+                    join as _nt_join,
+                    normpath as _nt_normpath,
+                    realpath as _nt_realpath,
+                    )
 
 import bzrlib
 from bzrlib.errors import (BzrError,
@@ -172,10 +178,57 @@
             else:
                 rename_func(tmp_name, new)
 
+
+# In Python 2.4.2 and older, os.path.abspath and os.path.realpath
+# choke on a Unicode string containing a relative path if
+# os.getcwd() returns a non-sys.getdefaultencoding()-encoded
+# string.
+_fs_enc = sys.getfilesystemencoding()
+def _posix_abspath(path):
+    return os.path.abspath(path.encode(_fs_enc)).decode(_fs_enc)
+    # jam 20060426 This is another possibility which mimics 
+    # os.path.abspath, only uses unicode characters instead
+    # if not os.path.isabs(path):
+    #     return os.path.join(os.getcwdu(), path)
+    # return path
+
+
+def _posix_realpath(path):
+    return os.path.realpath(path.encode(_fs_enc)).decode(_fs_enc)
+
+
+def _win32_abspath(path):
+    return _nt_abspath(path.encode(_fs_enc)).decode(_fs_enc).replace('\\', '/')
+
+
+def _win32_realpath(path):
+    return _nt_realpath(path.encode(_fs_enc)).decode(_fs_enc).replace('\\', '/')
+
+
+def _win32_pathjoin(*args):
+    return _nt_join(*args).replace('\\', '/')
+
+
+def _win32_normpath(path):
+    return _nt_normpath(path).replace('\\', '/')
+
+
+def _win32_getcwd():
+    return os.getcwdu().replace('\\', '/')
+
+
+def _win32_mkdtemp(*args, **kwargs):
+    return tempfile.mkdtemp(*args, **kwargs).replace('\\', '/')
+
+
+def _win32_rename(old, new):
+    fancy_rename(old, new, rename_func=os.rename, unlink_func=os.unlink)
+
+
 # Default is to just use the python builtins, but these can be rebound on
 # particular platforms.
-abspath = os.path.abspath
-realpath = os.path.realpath
+abspath = _posix_abspath
+realpath = _posix_realpath
 pathjoin = os.path.join
 normpath = os.path.normpath
 getcwd = os.getcwdu
@@ -187,41 +240,15 @@
 
 MIN_ABS_PATHLENGTH = 1
 
-if os.name == "posix":
-    # In Python 2.4.2 and older, os.path.abspath and os.path.realpath
-    # choke on a Unicode string containing a relative path if
-    # os.getcwd() returns a non-sys.getdefaultencoding()-encoded
-    # string.
-    _fs_enc = sys.getfilesystemencoding() or 'ascii'
-    def abspath(path):
-        return os.path.abspath(path.encode(_fs_enc)).decode(_fs_enc)
-
-    def realpath(path):
-        return os.path.realpath(path.encode(_fs_enc)).decode(_fs_enc)
 
 if sys.platform == 'win32':
-    # We need to use the Unicode-aware os.path.abspath and
-    # os.path.realpath on Windows systems.
-    def abspath(path):
-        return os.path.abspath(path).replace('\\', '/')
-
-    def realpath(path):
-        return os.path.realpath(path).replace('\\', '/')
-
-    def pathjoin(*args):
-        return os.path.join(*args).replace('\\', '/')
-
-    def normpath(path):
-        return os.path.normpath(path).replace('\\', '/')
-
-    def getcwd():
-        return os.getcwdu().replace('\\', '/')
-
-    def mkdtemp(*args, **kwargs):
-        return tempfile.mkdtemp(*args, **kwargs).replace('\\', '/')
-
-    def rename(old, new):
-        fancy_rename(old, new, rename_func=os.rename, unlink_func=os.unlink)
+    abspath = _win32_abspath
+    realpath = _win32_realpath
+    pathjoin = _win32_pathjoin
+    normpath = _win32_normpath
+    getcwd = _win32_getcwd
+    mkdtemp = _win32_mkdtemp
+    rename = _win32_rename
 
     MIN_ABS_PATHLENGTH = 3
 
@@ -629,6 +656,7 @@
     assert len(base) >= MIN_ABS_PATHLENGTH, ('Length of base must be equal or'
         ' exceed the platform minimum length (which is %d)' % 
         MIN_ABS_PATHLENGTH)
+
     rp = abspath(path)
 
     s = []
@@ -640,8 +668,6 @@
         if tail:
             s.insert(0, tail)
     else:
-        # XXX This should raise a NotChildPath exception, as its not tied
-        # to branch anymore.
         raise PathNotChild(rp, base)
 
     if s:
@@ -664,6 +690,54 @@
         return unicode_or_utf8_string.decode('utf8')
     except UnicodeDecodeError:
         raise BzrBadParameterNotUnicode(unicode_or_utf8_string)
+
+
+_platform_normalizes_filenames = False
+if sys.platform == 'darwin':
+    _platform_normalizes_filenames = True
+
+
+def normalizes_filenames():
+    """Return True if this platform normalizes unicode filenames.
+
+    Mac OSX does, Windows/Linux do not.
+    """
+    return _platform_normalizes_filenames
+
+
+if _platform_normalizes_filenames:
+    def unicode_filename(path):
+        """Make sure 'path' is a properly normalized filename.
+
+        On platforms where the system normalizes filenames (Mac OSX),
+        you can access a file by any path which will normalize
+        correctly.
+        Internally, bzr only supports NFC/NFKC normalization, since
+        that is the standard for XML documents.
+        So we return an normalized path, and indicate this has been
+        properly normalized.
+
+        :return: (path, is_normalized) Return a path which can
+                access the file, and whether or not this path is
+                normalized.
+        """
+        return unicodedata.normalize('NFKC', path), True
+else:
+    def unicode_filename(path):
+        """Make sure 'path' is a properly normalized filename.
+
+        On platforms where the system does not normalize filenames 
+        (Windows, Linux), you have to access a file by its exact path.
+        Internally, bzr only supports NFC/NFKC normalization, since
+        that is the standard for XML documents.
+        So we return the original path, and indicate if this is
+        properly normalized.
+
+        :return: (path, is_normalized) Return a path which can
+                access the file, and whether or not this path is
+                normalized.
+        """
+        return path, unicodedata.normalize('NFKC', path) == path
 
 
 def terminal_width():
@@ -693,16 +767,6 @@
     return sys.platform != "win32"
 
 
-def strip_trailing_slash(path):
-    """Strip trailing slash, except for root paths.
-    The definition of 'root path' is platform-dependent.
-    """
-    if len(path) != MIN_ABS_PATHLENGTH and path[-1] == '/':
-        return path[:-1]
-    else:
-        return path
-
-
 _validWin32PathRE = re.compile(r'^([A-Za-z]:[/\\])?[^:<>*"?\|]*$')
 
 

=== modified file 'bzrlib/repository.py'
--- bzrlib/repository.py	
+++ bzrlib/repository.py	
@@ -519,17 +519,10 @@
         # use inventory as it was in that revision
         file_id = tree.inventory.path2id(file)
         if not file_id:
-            raise BzrError("%r is not present in revision %s" % (file, revno))
-            try:
-                revno = self.revision_id_to_revno(revision_id)
-            except errors.NoSuchRevision:
-                # TODO: This should not be BzrError,
-                # but NoSuchFile doesn't fit either
-                raise BzrError('%r is not present in revision %s' 
-                                % (file, revision_id))
-            else:
-                raise BzrError('%r is not present in revision %s'
-                                % (file, revno))
+            # TODO: jam 20060427 Write a test for this code path
+            #       it had a bug in it, and was raising the wrong
+            #       exception.
+            raise errors.BzrError("%r is not present in revision %s" % (file, revision_id))
         tree.print_file(file_id)
 
     def get_transaction(self):

=== modified file 'bzrlib/revision.py'
--- bzrlib/revision.py	
+++ bzrlib/revision.py	
@@ -404,3 +404,48 @@
     def unlock(self):
         for source in self._revision_sources:
             source.unlock()
+
+
+ at deprecated_function(zero_eight)
+def get_intervening_revisions(ancestor_id, rev_id, rev_source,
+                              revision_history=None):
+    """Find the longest line of descent from maybe_ancestor to revision.
+    Revision history is followed where possible.
+
+    If ancestor_id == rev_id, list will be empty.
+    Otherwise, rev_id will be the last entry.  ancestor_id will never appear.
+    If ancestor_id is not an ancestor, NotAncestor will be thrown
+    """
+    root, ancestors, descendants = revision_graph(rev_id, rev_source)
+    if len(descendants) == 0:
+        raise NoSuchRevision(rev_source, rev_id)
+    if ancestor_id not in descendants:
+        rev_source.get_revision(ancestor_id)
+        raise bzrlib.errors.NotAncestor(rev_id, ancestor_id)
+    root_descendants = all_descendants(descendants, ancestor_id)
+    root_descendants.add(ancestor_id)
+    if rev_id not in root_descendants:
+        raise bzrlib.errors.NotAncestor(rev_id, ancestor_id)
+    distances = node_distances(descendants, ancestors, ancestor_id,
+                               root_descendants=root_descendants)
+
+    def best_ancestor(rev_id):
+        best = None
+        for anc_id in ancestors[rev_id]:
+            try:
+                distance = distances[anc_id]
+            except KeyError:
+                continue
+            if revision_history is not None and anc_id in revision_history:
+                return anc_id
+            elif best is None or distance > best[1]:
+                best = (anc_id, distance)
+        return best[0]
+
+    next = rev_id
+    path = []
+    while next != ancestor_id:
+        path.append(next)
+        next = best_ancestor(next)
+    path.reverse()
+    return path

=== modified file 'bzrlib/store/__init__.py'
--- bzrlib/store/__init__.py	
+++ bzrlib/store/__init__.py	
@@ -34,8 +34,9 @@
 from bzrlib.errors import BzrError, UnlistableStore, TransportNotPossible
 from bzrlib.symbol_versioning import *
 from bzrlib.trace import mutter
-from bzrlib.transport import Transport, urlescape
+from bzrlib.transport import Transport
 from bzrlib.transport.local import LocalTransport
+import bzrlib.urlutils as urlutils
 
 ######################################################################
 # stores
@@ -302,7 +303,7 @@
         fileid = self._escape_file_id(fileid)
         path = prefix + fileid
         full_path = u'.'.join([path] + suffixes)
-        return urlescape(full_path)
+        return urlutils.escape(full_path)
 
     def _escape_file_id(self, file_id):
         """Turn a file id into a filesystem safe string.

=== modified file 'bzrlib/tests/__init__.py'
--- bzrlib/tests/__init__.py	
+++ bzrlib/tests/__init__.py	
@@ -62,13 +62,14 @@
 from bzrlib.revision import common_ancestor
 import bzrlib.store
 import bzrlib.trace
-from bzrlib.transport import urlescape, get_transport
+from bzrlib.transport import get_transport
 import bzrlib.transport
 from bzrlib.transport.local import LocalRelpathServer
 from bzrlib.transport.readonly import ReadonlyServer
 from bzrlib.trace import mutter
 from bzrlib.tests.TestUtil import TestLoader, TestSuite
 from bzrlib.tests.treeshape import build_tree_contents
+import bzrlib.urlutils as urlutils
 from bzrlib.workingtree import WorkingTree, WorkingTreeFormat2
 
 default_transport = LocalRelpathServer
@@ -328,7 +329,11 @@
         test_root = TestCaseInTempDir.TEST_ROOT
         if result.wasSuccessful() or not self.keep_output:
             if test_root is not None:
-                    osutils.rmtree(test_root)
+                # If LANG=C we probably have created some bogus paths
+                # which rmtree(unicode) will fail to delete
+                # so make sure we are using rmtree(str) to delete everything
+                osutils.rmtree(test_root.encode(
+                    sys.getfilesystemencoding()))
         else:
             if self.pb is not None:
                 self.pb.note("Failed tests working directories are in '%s'\n",
@@ -363,6 +368,33 @@
 
 class CommandFailed(Exception):
     pass
+
+
+class StringIOWrapper(object):
+    """A wrapper around cStringIO which just adds an encoding attribute.
+    
+    Internally we can check sys.stdout to see what the output encoding
+    should be. However, cStringIO has no encoding attribute that we can
+    set. So we wrap it instead.
+    """
+    encoding='ascii'
+    _cstring = None
+
+    def __init__(self, s=None):
+        if s is not None:
+            self.__dict__['_cstring'] = StringIO(s)
+        else:
+            self.__dict__['_cstring'] = StringIO()
+
+    def __getattr__(self, name, getattr=getattr):
+        return getattr(self.__dict__['_cstring'], name)
+
+    def __setattr__(self, name, val):
+        if name == 'encoding':
+            self.__dict__['encoding'] = val
+        else:
+            return setattr(self._cstring, name, val)
+
 
 class TestCase(unittest.TestCase):
     """Base class for bzr unit tests.
@@ -602,7 +634,7 @@
         """Shortcut that splits cmd into words, runs, and returns stdout"""
         return self.run_bzr_captured(cmd.split(), retcode=retcode)[0]
 
-    def run_bzr_captured(self, argv, retcode=0, stdin=None):
+    def run_bzr_captured(self, argv, retcode=0, encoding=None, stdin=None):
         """Invoke bzr and return (stdout, stderr).
 
         Useful for code that wants to check the contents of the
@@ -619,15 +651,21 @@
         errors, and with logging set to something approximating the
         default, so that error reporting can be checked.
 
-        argv -- arguments to invoke bzr
-        retcode -- expected return code, or None for don't-care.
+        :param argv: arguments to invoke bzr
+        :param retcode: expected return code, or None for don't-care.
+        :param encoding: encoding for sys.stdout and sys.stderr
         :param stdin: A string to be used as stdin for the command.
         """
+        if encoding is None:
+            encoding = bzrlib.user_encoding
         if stdin is not None:
             stdin = StringIO(stdin)
-        stdout = StringIO()
-        stderr = StringIO()
-        self.log('run bzr: %s', ' '.join(argv))
+        stdout = StringIOWrapper()
+        stderr = StringIOWrapper()
+        stdout.encoding = encoding
+        stderr.encoding = encoding
+
+        self.log('run bzr: %r', argv)
         # FIXME: don't call into logging here
         handler = logging.StreamHandler(stderr)
         handler.setFormatter(bzrlib.trace.QuietFormatter())
@@ -646,14 +684,15 @@
         finally:
             logger.removeHandler(handler)
             bzrlib.ui.ui_factory = old_ui_factory
+
         out = stdout.getvalue()
         err = stderr.getvalue()
         if out:
-            self.log('output:\n%s', out)
+            self.log('output:\n%r', out)
         if err:
-            self.log('errors:\n%s', err)
+            self.log('errors:\n%r', err)
         if retcode is not None:
-            self.assertEquals(result, retcode)
+            self.assertEquals(retcode, result)
         return out, err
 
     def run_bzr(self, *args, **kwargs):
@@ -669,8 +708,16 @@
         :param stdin: A string to be used as stdin for the command.
         """
         retcode = kwargs.pop('retcode', 0)
+        encoding = kwargs.pop('encoding', None)
         stdin = kwargs.pop('stdin', None)
-        return self.run_bzr_captured(args, retcode, stdin)
+        return self.run_bzr_captured(args, retcode=retcode, encoding=encoding, stdin=stdin)
+
+    def run_bzr_decode(self, *args, **kwargs):
+        if kwargs.has_key('encoding'):
+            encoding = kwargs['encoding']
+        else:
+            encoding = bzrlib.user_encoding
+        return self.run_bzr(*args, **kwargs)[0].decode(encoding)
 
     def check_inventory_shape(self, inv, shape):
         """Compare an inventory to a list of expected names.
@@ -841,7 +888,7 @@
         for name in shape:
             self.assert_(isinstance(name, basestring))
             if name[-1] == '/':
-                transport.mkdir(urlescape(name[:-1]))
+                transport.mkdir(urlutils.escape(name[:-1]))
             else:
                 if line_endings == 'binary':
                     end = '\n'
@@ -849,8 +896,8 @@
                     end = os.linesep
                 else:
                     raise errors.BzrError('Invalid line ending request %r' % (line_endings,))
-                content = "contents of %s%s" % (name, end)
-                transport.put(urlescape(name), StringIO(content))
+                content = "contents of %s%s" % (name.encode('utf-8'), end)
+                transport.put(urlutils.escape(name), StringIO(content))
 
     def build_tree_contents(self, shape):
         build_tree_contents(shape)
@@ -866,6 +913,7 @@
     def assertFileEqual(self, content, path):
         """Fail if path does not contain 'content'."""
         self.failUnless(osutils.lexists(path))
+        # TODO: jam 20060427 Shouldn't this be 'rb'?
         self.assertEqualDiff(content, open(path, 'r').read())
 
 
@@ -947,7 +995,7 @@
         if relpath is not None and relpath != '.':
             if not base.endswith('/'):
                 base = base + '/'
-            base = base + relpath
+            base = base + urlutils.escape(relpath)
         return base
 
     def get_transport(self):
@@ -974,9 +1022,10 @@
     def make_bzrdir(self, relpath, format=None):
         try:
             url = self.get_url(relpath)
-            segments = relpath.split('/')
+            mutter('relpath %r => url %r', relpath, url)
+            segments = url.split('/')
             if segments and segments[-1] not in ('', '.'):
-                parent = self.get_url('/'.join(segments[:-1]))
+                parent = '/'.join(segments[:-1])
                 t = get_transport(parent)
                 try:
                     t.mkdir(segments[-1])
@@ -1180,6 +1229,7 @@
                    'bzrlib.tests.test_tuned_gzip',
                    'bzrlib.tests.test_ui',
                    'bzrlib.tests.test_upgrade',
+                   'bzrlib.tests.test_urlutils',
                    'bzrlib.tests.test_versionedfile',
                    'bzrlib.tests.test_weave',
                    'bzrlib.tests.test_whitebox',

=== modified file 'bzrlib/tests/blackbox/__init__.py'
--- bzrlib/tests/blackbox/__init__.py	
+++ bzrlib/tests/blackbox/__init__.py	
@@ -26,10 +26,13 @@
 import sys
 
 from bzrlib.tests import (
+                          adapt_modules,
                           TestCaseWithTransport,
                           TestSuite,
                           TestLoader,
+                          iter_suite_tests,
                           )
+from bzrlib.tests.EncodingAdapter import EncodingTestAdapter
 import bzrlib.ui as ui
 
 
@@ -45,6 +48,7 @@
                      'bzrlib.tests.blackbox.test_bound_branches',
                      'bzrlib.tests.blackbox.test_cat',
                      'bzrlib.tests.blackbox.test_checkout',
+                     'bzrlib.tests.blackbox.test_command_encoding',
                      'bzrlib.tests.blackbox.test_commit',
                      'bzrlib.tests.blackbox.test_conflicts',
                      'bzrlib.tests.blackbox.test_diff',
@@ -77,9 +81,17 @@
                      'bzrlib.tests.blackbox.test_upgrade',
                      'bzrlib.tests.blackbox.test_versioning',
                      ]
+    test_encodings = [
+        'bzrlib.tests.blackbox.test_non_ascii',
+    ]
 
     loader = TestLoader()
-    return loader.loadTestsFromModuleNames(testmod_names)
+    suite = loader.loadTestsFromModuleNames(testmod_names) 
+
+    adapter = EncodingTestAdapter()
+    adapt_modules(test_encodings, adapter, loader, suite)
+
+    return suite
 
 
 class ExternalBase(TestCaseWithTransport):

=== modified file 'bzrlib/tests/blackbox/test_bound_branches.py'
--- bzrlib/tests/blackbox/test_bound_branches.py	
+++ bzrlib/tests/blackbox/test_bound_branches.py	
@@ -21,10 +21,11 @@
 import os
 from cStringIO import StringIO
 
-from bzrlib.tests import TestCaseWithTransport
 from bzrlib.branch import Branch
 from bzrlib.bzrdir import (BzrDir, BzrDirFormat, BzrDirMetaFormat1)
 from bzrlib.osutils import getcwd
+from bzrlib.tests import TestCaseWithTransport
+import bzrlib.urlutils as urlutils
 from bzrlib.workingtree import WorkingTree
 
 
@@ -41,15 +42,19 @@
         # bind on a format 6 bzrdir should error
         out,err = self.run_bzr('bind', '../master', retcode=3)
         self.assertEqual('', out)
+        # TODO: jam 20060427 Probably something like this really should
+        #       print out the actual path, rather than the URL
+        cwd = urlutils.local_path_to_url(getcwd())
         self.assertEqual('bzr: ERROR: To use this feature you must '
-                         'upgrade your branch at %s/.\n' % getcwd(), err)
+                         'upgrade your branch at %s/.\n' % cwd, err)
     
     def test_unbind_format_6_bzrdir(self):
         # bind on a format 6 bzrdir should error
         out,err = self.run_bzr('unbind', retcode=3)
         self.assertEqual('', out)
+        cwd = urlutils.local_path_to_url(getcwd())
         self.assertEqual('bzr: ERROR: To use this feature you must '
-                         'upgrade your branch at %s/.\n' % getcwd(), err)
+                         'upgrade your branch at %s/.\n' % cwd, err)
 
 
 class TestBoundBranches(TestCaseWithTransport):

=== modified file 'bzrlib/tests/blackbox/test_log.py'
--- bzrlib/tests/blackbox/test_log.py	
+++ bzrlib/tests/blackbox/test_log.py	
@@ -20,7 +20,9 @@
 
 import os
 
+import bzrlib
 from bzrlib.tests.blackbox import ExternalBase
+from bzrlib.tests import TestCaseInTempDir
 
 
 class TestLog(ExternalBase):
@@ -151,3 +153,105 @@
         self.assertTrue('      branch 1' in out)
         self.assertTrue('  first post' in out)
         self.assertEqual('', err)
+
+
+class TestLogEncodings(TestCaseInTempDir):
+
+    _mu = u'\xb5'
+    _message = u'Message with \xb5'
+
+    # Encodings which can encode mu
+    good_encodings = [
+        'utf-8',
+        'latin-1',
+        'iso-8859-1',
+        'cp437', # Common windows encoding
+        'cp1251', # Alexander Belchenko's windows encoding
+        'cp1258', # Common windows encoding
+    ]
+    # Encodings which cannot encode mu
+    bad_encodings = [
+        'ascii',
+        'iso-8859-2',
+        'koi8_r',
+    ]
+
+    def setUp(self):
+        TestCaseInTempDir.setUp(self)
+        self.user_encoding = bzrlib.user_encoding
+
+    def tearDown(self):
+        bzrlib.user_encoding = self.user_encoding
+        TestCaseInTempDir.tearDown(self)
+
+    def create_branch(self):
+        bzr = self.run_bzr
+        bzr('init')
+        open('a', 'wb').write('some stuff\n')
+        bzr('add', 'a')
+        bzr('commit', '-m', self._message)
+
+    def try_encoding(self, encoding, fail=False):
+        bzr = self.run_bzr
+        if fail:
+            self.assertRaises(UnicodeEncodeError,
+                self._mu.encode, encoding)
+            encoded_msg = self._message.encode(encoding, 'replace')
+        else:
+            encoded_msg = self._message.encode(encoding)
+
+        old_encoding = bzrlib.user_encoding
+        # This test requires that 'run_bzr' uses the current
+        # bzrlib, because we override user_encoding, and expect
+        # it to be used
+        try:
+            bzrlib.user_encoding = 'ascii'
+            # We should be able to handle any encoding
+            out, err = bzr('log', encoding=encoding)
+            if not fail:
+                # Make sure we wrote mu as we expected it to exist
+                self.assertNotEqual(-1, out.find(encoded_msg))
+                out_unicode = out.decode(encoding)
+                self.assertNotEqual(-1, out_unicode.find(self._message))
+            else:
+                self.assertNotEqual(-1, out.find('Message with ?'))
+        finally:
+            bzrlib.user_encoding = old_encoding
+
+    def test_log_handles_encoding(self):
+        self.create_branch()
+
+        for encoding in self.good_encodings:
+            self.try_encoding(encoding)
+
+    def test_log_handles_bad_encoding(self):
+        self.create_branch()
+
+        for encoding in self.bad_encodings:
+            self.try_encoding(encoding, fail=True)
+
+    def test_stdout_encoding(self):
+        bzr = self.run_bzr
+        bzrlib.user_encoding = "cp1251"
+
+        bzr('init')
+        self.build_tree(['a'])
+        bzr('add', 'a')
+        bzr('commit', '-m', u'\u0422\u0435\u0441\u0442')
+        stdout, stderr = self.run_bzr('log', encoding='cp866')
+
+        message = stdout.splitlines()[-1]
+
+        # explanation of the check:
+        # u'\u0422\u0435\u0441\u0442' is word 'Test' in russian
+        # in cp866  encoding this is string '\x92\xa5\xe1\xe2'
+        # in cp1251 encoding this is string '\xd2\xe5\xf1\xf2'
+        # This test should check that output of log command
+        # encoded to sys.stdout.encoding
+        test_in_cp866 = '\x92\xa5\xe1\xe2'
+        test_in_cp1251 = '\xd2\xe5\xf1\xf2'
+        # Make sure the log string is encoded in cp866
+        self.assertEquals(test_in_cp866, message[2:])
+        # Make sure the cp1251 string is not found anywhere
+        self.assertEquals(-1, stdout.find(test_in_cp1251))
+

=== modified file 'bzrlib/tests/blackbox/test_merge.py'
--- bzrlib/tests/blackbox/test_merge.py	
+++ bzrlib/tests/blackbox/test_merge.py	
@@ -25,6 +25,7 @@
 from bzrlib.bzrdir import BzrDir
 from bzrlib.osutils import abspath
 from bzrlib.tests.blackbox import ExternalBase
+import bzrlib.urlutils as urlutils
 from bzrlib.workingtree import WorkingTree
 
 
@@ -141,7 +142,7 @@
         os.chdir('branch_b')
         out = self.runbzr('merge', retcode=3)
         self.assertEquals(out,
-                ('','bzr: ERROR: No merge branch known or specified.\n'))
+                ('','bzr: ERROR: No location specified or remembered\n'))
         # test implicit --remember when no parent set, this merge conflicts
         self.build_tree(['d'])
         tree_b.add('d')
@@ -152,7 +153,9 @@
         # test implicit --remember after resolving conflict
         tree_b.commit('commit d')
         out, err = self.runbzr('merge')
-        self.assertEquals(out, 'Using saved branch: ../branch_a\n')
+        
+        base = urlutils.local_path_from_url(branch_a.base)
+        self.assertEquals(out, 'Merging from remembered location %s\n' % (base,))
         self.assertEquals(err, 'All changes applied successfully.\n')
         self.assertEquals(abspath(branch_b.get_parent()), abspath(parent))
         # re-open tree as external runbzr modified it

=== modified file 'bzrlib/tests/blackbox/test_missing.py'
--- bzrlib/tests/blackbox/test_missing.py	
+++ bzrlib/tests/blackbox/test_missing.py	
@@ -1,5 +1,22 @@
-"""Black-box tests for bzr missing.
-"""
+# Copyright (C) 2005 by Canonical Ltd
+# -*- coding: utf-8 -*-
+# vim: encoding=utf-8
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Black-box tests for bzr missing."""
 
 import os
 
@@ -10,21 +27,23 @@
 class TestMissing(TestCaseInTempDir):
 
     def test_missing(self):
+        def bzr(*args, **kwargs):
+            return self.run_bzr(*args, **kwargs)[0]
         missing = "You are missing 1 revision(s):"
 
         # create a source branch
         os.mkdir('a')
         os.chdir('a')
-        self.capture('init')
+        bzr('init')
         open('a', 'wb').write('initial\n')
-        self.capture('add a')
-        self.capture('commit -m inital')
+        bzr('add', 'a')
+        bzr('commit', '-m', 'inital')
 
         # clone and add a differing revision
-        self.capture('branch . ../b')
+        bzr('branch', '.', '../b')
         os.chdir('../b')
         open('a', 'ab').write('more\n')
-        self.capture('commit -m more')
+        bzr('commit', '-m', 'more')
 
         # run missing in a against b
         os.chdir('../a')
@@ -51,51 +70,51 @@
         branch_b.unlock()
 
         # get extra revision from b
-        self.capture('merge ../b')
-        self.capture('commit -m merge')
+        bzr('merge', '../b')
+        bzr('commit', '-m', 'merge')
 
         # compare again, but now we have the 'merge' commit extra
-        lines = self.capture('missing ../b', retcode=1).splitlines()
+        lines = bzr('missing', '../b', retcode=1).splitlines()
         self.assertEqual("You have 1 extra revision(s):", lines[0])
         self.assertEqual(8, len(lines))
-        lines2 = self.capture('missing ../b --mine-only', retcode=1)
+        lines2 = bzr('missing', '../b', '--mine-only', retcode=1)
         lines2 = lines2.splitlines()
         self.assertEqual(lines, lines2)
-        lines3 = self.capture('missing ../b --theirs-only', retcode=1)
+        lines3 = bzr('missing', '../b', '--theirs-only', retcode=1)
         lines3 = lines3.splitlines()
         self.assertEqual(0, len(lines3))
 
         # relative to a, missing the 'merge' commit 
         os.chdir('../b')
-        lines = self.capture('missing ../a', retcode=1).splitlines()
+        lines = bzr('missing', '../a', retcode=1).splitlines()
         self.assertEqual(missing, lines[0])
         self.assertEqual(8, len(lines))
-        lines2 = self.capture('missing ../a --theirs-only', retcode=1)
+        lines2 = bzr('missing', '../a', '--theirs-only', retcode=1)
         lines2 = lines2.splitlines()
         self.assertEqual(lines, lines2)
-        lines3 = self.capture('missing ../a --mine-only', retcode=1)
+        lines3 = bzr('missing', '../a', '--mine-only', retcode=1)
         lines3 = lines3.splitlines()
         self.assertEqual(0, len(lines3))
-        lines4 = self.capture('missing ../a --short', retcode=1)
+        lines4 = bzr('missing', '../a', '--short', retcode=1)
         lines4 = lines4.splitlines()
         self.assertEqual(4, len(lines4))
-        lines5 = self.capture('missing ../a --line', retcode=1)
+        lines5 = bzr('missing', '../a', '--line', retcode=1)
         lines5 = lines5.splitlines()
         self.assertEqual(2, len(lines5))
-        lines6 = self.capture('missing ../a --reverse', retcode=1)
+        lines6 = bzr('missing', '../a', '--reverse', retcode=1)
         lines6 = lines6.splitlines()
         self.assertEqual(lines6, lines)
-        lines7 = self.capture('missing ../a --show-ids', retcode=1)
+        lines7 = bzr('missing', '../a', '--show-ids', retcode=1)
         lines7 = lines7.splitlines()
         self.assertEqual(11, len(lines7))
-        lines8 = self.capture('missing ../a --verbose', retcode=1)
+        lines8 = bzr('missing', '../a', '--verbose', retcode=1)
         lines8 = lines8.splitlines()
         self.assertEqual("modified:", lines8[-2])
         self.assertEqual("  a", lines8[-1])
 
         
         # after a pull we're back on track
-        self.capture('pull')
+        bzr('pull')
         self.assertEqual("Branches are up to date.\n", 
-                         self.capture('missing ../a'))
+                         bzr('missing', '../a'))
 

=== modified file 'bzrlib/tests/blackbox/test_outside_wt.py'
--- bzrlib/tests/blackbox/test_outside_wt.py	
+++ bzrlib/tests/blackbox/test_outside_wt.py	
@@ -23,6 +23,7 @@
 
 from bzrlib.tests import ChrootedTestCase
 from bzrlib.osutils import getcwd
+import bzrlib.urlutils as urlutils
 
 
 class TestOutsideWT(ChrootedTestCase):
@@ -30,10 +31,8 @@
 
     def test_cwd_log(self):
         os.chdir(tempfile.mkdtemp())
-        cwd = getcwd()
         out, err = self.run_bzr('log', retcode=3)
-
-        self.assertEqual(u'bzr: ERROR: Not a branch: %s/\n' % (cwd,),
+        self.assertEqual(u'bzr: ERROR: Not a branch: %s/\n' % (getcwd(),),
                          err)
 
     def test_url_log(self):

=== modified file 'bzrlib/tests/blackbox/test_pull.py'
--- bzrlib/tests/blackbox/test_pull.py	
+++ bzrlib/tests/blackbox/test_pull.py	
@@ -22,7 +22,6 @@
 import sys
 
 from bzrlib.branch import Branch
-from bzrlib.osutils import abspath
 from bzrlib.tests.blackbox import ExternalBase
 from bzrlib.uncommit import uncommit
 
@@ -249,13 +248,13 @@
         out = self.runbzr('pull ../branch_a', retcode=3)
         self.assertEquals(out,
                 ('','bzr: ERROR: These branches have diverged.  Try merge.\n'))
-        self.assertEquals(abspath(branch_b.get_parent()), abspath(parent))
+        self.assertEquals(branch_b.get_parent(), parent)
         # test implicit --remember after resolving previous failure
         uncommit(branch=branch_b, tree=tree_b)
         transport.delete('branch_b/d')
         self.runbzr('pull')
-        self.assertEquals(abspath(branch_b.get_parent()), abspath(parent))
+        self.assertEquals(branch_b.get_parent(), parent)
         # test explicit --remember
         self.runbzr('pull ../branch_c --remember')
-        self.assertEquals(abspath(branch_b.get_parent()),
-                          abspath(branch_c.bzrdir.root_transport.base))
+        self.assertEquals(branch_b.get_parent(),
+                          branch_c.bzrdir.root_transport.base)

=== modified file 'bzrlib/tests/blackbox/test_selftest.py'
--- bzrlib/tests/blackbox/test_selftest.py	
+++ bzrlib/tests/blackbox/test_selftest.py	
@@ -90,7 +90,7 @@
 
 class TestRunBzr(ExternalBase):
 
-    def run_bzr_captured(self, argv, retcode=0, stdin=None):
+    def run_bzr_captured(self, argv, retcode=0, encoding=None, stdin=None):
         self.stdin = stdin
 
     def test_stdin(self):

=== modified file 'bzrlib/tests/blackbox/test_status.py'
--- bzrlib/tests/blackbox/test_status.py	
+++ bzrlib/tests/blackbox/test_status.py	
@@ -13,7 +13,6 @@
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
 
 """Tests of status command.
 
@@ -23,11 +22,11 @@
 interface later, they will be non blackbox tests.
 """
 
-
 from cStringIO import StringIO
+import codecs
 from os import mkdir, chdir
+import sys
 from tempfile import TemporaryFile
-import codecs
 
 import bzrlib.branch
 from bzrlib.builtins import merge
@@ -36,7 +35,7 @@
 from bzrlib.osutils import pathjoin
 from bzrlib.revisionspec import RevisionSpec
 from bzrlib.status import show_tree_status
-from bzrlib.tests import TestCaseWithTransport
+from bzrlib.tests import TestCaseWithTransport, TestSkipped
 from bzrlib.workingtree import WorkingTree
 
 
@@ -219,3 +218,49 @@
         self.assertEquals(result2, result)
 
 
+class TestStatusEncodings(TestCaseWithTransport):
+    
+    def setUp(self):
+        TestCaseWithTransport.setUp(self)
+        self.user_encoding = bzrlib.user_encoding
+        self.stdout = sys.stdout
+
+    def tearDown(self):
+        bzrlib.user_encoding = self.user_encoding
+        sys.stdout = self.stdout
+        TestCaseWithTransport.tearDown(self)
+
+    def make_uncommitted_tree(self):
+        """Build a branch with uncommitted unicode named changes in the cwd."""
+        working_tree = self.make_branch_and_tree(u'.')
+        filename = u'hell\u00d8'
+        try:
+            self.build_tree_contents([(filename, 'contents of hello')])
+        except UnicodeEncodeError:
+            raise TestSkipped("can't build unicode working tree in "
+                "filesystem encoding %s" % sys.getfilesystemencoding())
+        working_tree.add(filename)
+        return working_tree
+
+    def test_stdout_ascii(self):
+        sys.stdout = StringIO()
+        bzrlib.user_encoding = 'ascii'
+        working_tree = self.make_uncommitted_tree()
+        stdout, stderr = self.run_bzr("status")
+
+        self.assertEquals(stdout, """\
+added:
+  hell?
+""")
+
+    def test_stdout_latin1(self):
+        sys.stdout = StringIO()
+        bzrlib.user_encoding = 'latin-1'
+        working_tree = self.make_uncommitted_tree()
+        stdout, stderr = self.run_bzr('status')
+
+        self.assertEquals(stdout, u"""\
+added:
+  hell\u00d8
+""".encode('latin-1'))
+

=== modified file 'bzrlib/tests/branch_implementations/test_parent.py'
--- bzrlib/tests/branch_implementations/test_parent.py	
+++ bzrlib/tests/branch_implementations/test_parent.py	
@@ -18,7 +18,9 @@
 import os
 
 from bzrlib.branch import Branch
-from bzrlib.osutils import abspath, realpath
+import bzrlib.errors
+from bzrlib.osutils import abspath, realpath, getcwd
+from bzrlib.urlutils import local_path_from_url, local_path_to_url, escape
 from bzrlib.tests import TestCaseWithTransport
 
 
@@ -38,5 +40,26 @@
         url = 'http://bazaar-vcs.org/bzr/bzr.dev'
         b.set_parent(url)
         self.assertEquals(b.get_parent(), url)
+        self.assertEqual(b.control_files.get('parent').read().strip('\n'), url)
+
         b.set_parent(None)
         self.assertEquals(b.get_parent(), None)
+
+        b.set_parent('../other_branch')
+        cwd = getcwd()
+
+        self.assertEquals(b.get_parent(), local_path_to_url('../other_branch'))
+        path = local_path_to_url('../yanb')
+        b.set_parent(path)
+        self.assertEqual(b.control_files.get('parent').read().strip('\n'), 
+            '../yanb')
+        self.assertEqual(b.get_parent(), path)
+
+
+        self.assertRaises(bzrlib.errors.InvalidURL, b.set_parent, u'\xb5')
+        b.set_parent(escape(u'\xb5'))
+        self.assertEqual(b.control_files.get('parent').read().strip('\n'), 
+            '%C2%B5')
+
+        self.assertEqual(b.get_parent(), b.base + '%C2%B5')
+

=== modified file 'bzrlib/tests/branch_implementations/test_permissions.py'
--- bzrlib/tests/branch_implementations/test_permissions.py	
+++ bzrlib/tests/branch_implementations/test_permissions.py	
@@ -36,45 +36,10 @@
 from bzrlib.bzrdir import BzrDir
 from bzrlib.lockable_files import LockableFiles
 from bzrlib.tests import TestCaseWithTransport, TestSkipped
+from bzrlib.tests.test_permissions import chmod_r, check_mode_r
 from bzrlib.tests.test_sftp_transport import TestCaseWithSFTPServer
 from bzrlib.transport import get_transport
 from bzrlib.workingtree import WorkingTree
-
-# TODO RBC consolidate the helper methods here and in tests/test_permissions.py
-
-def chmod_r(base, file_mode, dir_mode):
-    """Recursively chmod from a base directory"""
-    assert os.path.isdir(base)
-    os.chmod(base, dir_mode)
-    for root, dirs, files in os.walk(base):
-        for d in dirs:
-            p = os.path.join(root, d)
-            os.chmod(p, dir_mode)
-        for f in files:
-            p = os.path.join(root, f)
-            os.chmod(p, file_mode)
-
-
-def check_mode_r(test, base, file_mode, dir_mode, include_base=True):
-    """Check that all permissions match
-
-    :param test: The TestCase being run
-    :param base: The path to the root directory to check
-    :param file_mode: The mode for all files
-    :param dir_mode: The mode for all directories
-    :param include_base: If false, only check the subdirectories
-    """
-    assert os.path.isdir(base)
-    t = get_transport(".")
-    if include_base:
-        test.assertTransportMode(t, base, dir_mode)
-    for root, dirs, files in os.walk(base):
-        for d in dirs:
-            p = os.path.join(root, d)
-            test.assertTransportMode(t, p, dir_mode)
-        for f in files:
-            p = os.path.join(root, f)
-            test.assertTransportMode(t, p, file_mode)
 
 
 class TestPermissions(TestCaseWithTransport):
@@ -95,18 +60,18 @@
         b = self.make_branch('b')
         self.assertEqualMode(02777, b.control_files._dir_mode)
         self.assertEqualMode(00666, b.control_files._file_mode)
-        check_mode_r(self, b.control_files._transport.base, 00666, 02777)
+        check_mode_r(self, 'b/.bzr', 00666, 02777)
 
         os.mkdir('c')
         os.chmod('c', 02750)
         b = self.make_branch('c')
         self.assertEqualMode(02750, b.control_files._dir_mode)
         self.assertEqualMode(00640, b.control_files._file_mode)
-        check_mode_r(self, b.control_files._transport.base, 00640, 02750)
+        check_mode_r(self, 'c/.bzr', 00640, 02750)
 
         os.mkdir('d')
         os.chmod('d', 0700)
         b = self.make_branch('d')
         self.assertEqualMode(0700, b.control_files._dir_mode)
         self.assertEqualMode(0600, b.control_files._file_mode)
-        check_mode_r(self, b.control_files._transport.base, 00600, 00700)
+        check_mode_r(self, 'd/.bzr', 00600, 00700)

=== modified file 'bzrlib/tests/stub_sftp.py'
--- bzrlib/tests/stub_sftp.py	
+++ bzrlib/tests/stub_sftp.py	
@@ -93,6 +93,9 @@
         path = self._realpath(path)
         try:
             out = [ ]
+            # TODO: win32 incorrectly lists paths with non-ascii if path is not
+            # unicode. However on Linux the server should only deal with
+            # bytestreams and posix.listdir does the right thing 
             flist = os.listdir(path)
             for fname in flist:
                 attr = SFTPAttributes.from_stat(os.stat(pathjoin(path, fname)))
@@ -127,6 +130,7 @@
                 fd = os.open(path, flags)
         except OSError, e:
             return SFTPServer.convert_errno(e.errno)
+
         if (flags & os.O_CREAT) and (attr is not None):
             attr._flags &= ~attr.FLAG_PERMISSIONS
             SFTPServer.set_file_attr(path, attr)

=== modified file 'bzrlib/tests/test_branch.py'
--- bzrlib/tests/test_branch.py	
+++ bzrlib/tests/test_branch.py	
@@ -48,7 +48,7 @@
         try:
             # the default branch format is used by the meta dir format
             # which is not the default bzrdir format at this point
-            dir = BzrDirMetaFormat1().initialize('memory:/')
+            dir = BzrDirMetaFormat1().initialize('memory:///')
             result = dir.create_branch()
             self.assertEqual(result, 'A branch')
         finally:

=== modified file 'bzrlib/tests/test_bzrdir.py'
--- bzrlib/tests/test_bzrdir.py	
+++ bzrlib/tests/test_bzrdir.py	
@@ -45,7 +45,7 @@
         bzrdir.BzrDirFormat.set_default_format(SampleBzrDirFormat())
         # creating a bzr dir should now create an instrumented dir.
         try:
-            result = bzrdir.BzrDir.create('memory:/')
+            result = bzrdir.BzrDir.create('memory:///')
             self.failUnless(isinstance(result, SampleBzrDir))
         finally:
             bzrdir.BzrDirFormat.set_default_format(old_format)

=== modified file 'bzrlib/tests/test_escaped_store.py'
--- bzrlib/tests/test_escaped_store.py	
+++ bzrlib/tests/test_escaped_store.py	
@@ -50,7 +50,7 @@
         # hash_prefix() is not defined for unicode characters
         # it is only defined for byte streams.
         # so hash_prefix() needs to operate on *at most* utf-8
-        # encoded. However urlescape() does both encoding to utf-8
+        # encoded. However urlutils.escape() does both encoding to utf-8
         # and urllib quoting, so we will use the escaped form
         # as the path passed to hash_prefix
 

=== modified file 'bzrlib/tests/test_knit.py'
--- bzrlib/tests/test_knit.py	
+++ bzrlib/tests/test_knit.py	
@@ -28,8 +28,7 @@
     WeaveToKnit)
 from bzrlib.osutils import split_lines
 from bzrlib.tests import TestCaseWithTransport
-from bzrlib.transport import TransportLogger
-from bzrlib.transport.local import LocalTransport
+from bzrlib.transport import TransportLogger, get_transport
 from bzrlib.transport.memory import MemoryTransport
 from bzrlib.weave import Weave
 
@@ -42,7 +41,7 @@
             factory = KnitPlainFactory()
         else:
             factory = None
-        return KnitVersionedFile('test', LocalTransport('.'), access_mode='w', factory=factory, create=True)
+        return KnitVersionedFile('test', get_transport('.'), access_mode='w', factory=factory, create=True)
 
 
 class BasicKnitTests(KnitTests):
@@ -67,7 +66,7 @@
         k = self.make_test_knit()
         k.add_lines('text-1', [], split_lines(TEXT_1))
         del k
-        k2 = KnitVersionedFile('test', LocalTransport('.'), access_mode='r', factory=KnitPlainFactory(), create=True)
+        k2 = KnitVersionedFile('test', get_transport('.'), access_mode='r', factory=KnitPlainFactory(), create=True)
         self.assertTrue(k2.has_version('text-1'))
         self.assertEqualDiff(''.join(k2.get_lines('text-1')), TEXT_1)
 
@@ -95,11 +94,11 @@
     def test_incomplete(self):
         """Test if texts without a ending line-end can be inserted and
         extracted."""
-        k = KnitVersionedFile('test', LocalTransport('.'), delta=False, create=True)
+        k = KnitVersionedFile('test', get_transport('.'), delta=False, create=True)
         k.add_lines('text-1', [], ['a\n',    'b'  ])
         k.add_lines('text-2', ['text-1'], ['a\rb\n', 'b\n'])
         # reopening ensures maximum room for confusion
-        k = KnitVersionedFile('test', LocalTransport('.'), delta=False, create=True)
+        k = KnitVersionedFile('test', get_transport('.'), delta=False, create=True)
         self.assertEquals(k.get_lines('text-1'), ['a\n',    'b'  ])
         self.assertEquals(k.get_lines('text-2'), ['a\rb\n', 'b\n'])
 
@@ -127,7 +126,7 @@
 
     def test_add_delta(self):
         """Store in knit with parents"""
-        k = KnitVersionedFile('test', LocalTransport('.'), factory=KnitPlainFactory(),
+        k = KnitVersionedFile('test', get_transport('.'), factory=KnitPlainFactory(),
             delta=True, create=True)
         self.add_stock_one_and_one_a(k)
         k.clear_cache()
@@ -135,7 +134,7 @@
 
     def test_annotate(self):
         """Annotations"""
-        k = KnitVersionedFile('knit', LocalTransport('.'), factory=KnitAnnotateFactory(),
+        k = KnitVersionedFile('knit', get_transport('.'), factory=KnitAnnotateFactory(),
             delta=True, create=True)
         self.insert_and_test_small_annotate(k)
 
@@ -150,7 +149,7 @@
 
     def test_annotate_fulltext(self):
         """Annotations"""
-        k = KnitVersionedFile('knit', LocalTransport('.'), factory=KnitAnnotateFactory(),
+        k = KnitVersionedFile('knit', get_transport('.'), factory=KnitAnnotateFactory(),
             delta=False, create=True)
         self.insert_and_test_small_annotate(k)
 
@@ -229,7 +228,7 @@
 
     def test_knit_join(self):
         """Store in knit with parents"""
-        k1 = KnitVersionedFile('test1', LocalTransport('.'), factory=KnitPlainFactory(), create=True)
+        k1 = KnitVersionedFile('test1', get_transport('.'), factory=KnitPlainFactory(), create=True)
         k1.add_lines('text-a', [], split_lines(TEXT_1))
         k1.add_lines('text-b', ['text-a'], split_lines(TEXT_1))
 
@@ -238,21 +237,21 @@
 
         k1.add_lines('text-m', ['text-b', 'text-d'], split_lines(TEXT_1))
 
-        k2 = KnitVersionedFile('test2', LocalTransport('.'), factory=KnitPlainFactory(), create=True)
+        k2 = KnitVersionedFile('test2', get_transport('.'), factory=KnitPlainFactory(), create=True)
         count = k2.join(k1, version_ids=['text-m'])
         self.assertEquals(count, 5)
         self.assertTrue(k2.has_version('text-a'))
         self.assertTrue(k2.has_version('text-c'))
 
     def test_reannotate(self):
-        k1 = KnitVersionedFile('knit1', LocalTransport('.'),
+        k1 = KnitVersionedFile('knit1', get_transport('.'),
                                factory=KnitAnnotateFactory(), create=True)
         # 0
         k1.add_lines('text-a', [], ['a\n', 'b\n'])
         # 1
         k1.add_lines('text-b', ['text-a'], ['a\n', 'c\n'])
 
-        k2 = KnitVersionedFile('test2', LocalTransport('.'),
+        k2 = KnitVersionedFile('test2', get_transport('.'),
                                factory=KnitAnnotateFactory(), create=True)
         k2.join(k1, version_ids=['text-b'])
 
@@ -363,19 +362,19 @@
             "\nrevid2 line-delta 84 82 0 :",
             'test.kndx')
         # we should be able to load this file again
-        knit = KnitVersionedFile('test', LocalTransport('.'), access_mode='r')
+        knit = KnitVersionedFile('test', get_transport('.'), access_mode='r')
         self.assertEqual(['revid', 'revid2'], knit.versions())
         # write a short write to the file and ensure that its ignored
         indexfile = file('test.kndx', 'at')
         indexfile.write('\nrevid3 line-delta 166 82 1 2 3 4 5 .phwoar:demo ')
         indexfile.close()
         # we should be able to load this file again
-        knit = KnitVersionedFile('test', LocalTransport('.'), access_mode='w')
+        knit = KnitVersionedFile('test', get_transport('.'), access_mode='w')
         self.assertEqual(['revid', 'revid2'], knit.versions())
         # and add a revision with the same id the failed write had
         knit.add_lines('revid3', ['revid2'], ['a\n'])
         # and when reading it revid3 should now appear.
-        knit = KnitVersionedFile('test', LocalTransport('.'), access_mode='r')
+        knit = KnitVersionedFile('test', get_transport('.'), access_mode='r')
         self.assertEqual(['revid', 'revid2', 'revid3'], knit.versions())
         self.assertEqual(['revid2'], knit.get_parents('revid3'))
 

=== modified file 'bzrlib/tests/test_log.py'
--- bzrlib/tests/test_log.py	
+++ bzrlib/tests/test_log.py	
@@ -1,15 +1,17 @@
 # Copyright (C) 2005 by Canonical Ltd
-
+# -*- coding: utf-8 -*-
+# vim: encoding=utf-8
+#
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 2 of the License, or
 # (at your option) any later version.
-
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
-
+#
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
@@ -22,6 +24,7 @@
                         ShortLogFormatter, LineLogFormatter)
 from bzrlib.branch import Branch
 from bzrlib.errors import InvalidRevisionNumber
+
 
 class _LogEntry(object):
     # should probably move into bzrlib.log?

=== modified file 'bzrlib/tests/test_missing.py'
--- bzrlib/tests/test_missing.py	
+++ bzrlib/tests/test_missing.py	
@@ -1,15 +1,15 @@
 # Copyright (C) 2005 by Canonical Ltd
-
+#
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 2 of the License, or
 # (at your option) any later version.
-
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
-
+#
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

=== modified file 'bzrlib/tests/test_msgeditor.py'
--- bzrlib/tests/test_msgeditor.py	
+++ bzrlib/tests/test_msgeditor.py	
@@ -21,8 +21,9 @@
 
 from bzrlib.branch import Branch
 from bzrlib.config import ensure_config_dir_exists, config_filename
-from bzrlib.msgeditor import make_commit_message_template, _get_editor
+import bzrlib.msgeditor 
 from bzrlib.tests import TestCaseWithTransport, TestSkipped
+from bzrlib.trace import mutter
 
 
 class MsgEditorTest(TestCaseWithTransport):
@@ -43,12 +44,90 @@
     def test_commit_template(self):
         """Test building a commit message template"""
         working_tree = self.make_uncommitted_tree()
-        template = make_commit_message_template(working_tree, None)
+        template = bzrlib.msgeditor.make_commit_message_template(working_tree, None)
         self.assertEqualDiff(template,
 u"""\
 added:
   hell\u00d8
 """)
+
+    def setUp(self):
+        super(MsgEditorTest, self).setUp()
+        self._bzr_editor = os.environ.get('BZR_EDITOR', None)
+
+    def tearDown(self):
+        if self._bzr_editor != None:
+            os.environ['BZR_EDITOR'] = self._bzr_editor
+        else:
+            if os.environ.get('BZR_EDITOR', None) != None:
+                del os.environ['BZR_EDITOR']
+        super(MsgEditorTest, self).tearDown()
+
+    def test_run_editor(self):
+        if sys.platform == "win32":
+            f = file('fed.bat', 'w')
+            f.write('@rem dummy fed')
+            f.close()
+            os.environ['BZR_EDITOR'] = 'fed.bat'
+        else:
+            f = file('fed.sh', 'wb')
+            f.write('#!/bin/sh\n')
+            f.close()
+            os.chmod('fed.sh', 0755)
+            os.environ['BZR_EDITOR'] = './fed.sh'
+
+        self.assertEqual(True, bzrlib.msgeditor._run_editor(''),
+                         'Unable to run dummy fake editor')
+
+    def test_edit_commit_message(self):
+        working_tree = self.make_uncommitted_tree()
+        # make fake editor
+        f = file('fed.py', 'wb')
+        f.write('#!%s\n' % sys.executable)
+        f.write("""\
+import sys
+if len(sys.argv) == 2:
+    fn = sys.argv[1]
+    f = file(fn, 'rb')
+    s = f.read()
+    f.close()
+    f = file(fn, 'wb')
+    f.write('test message from fed\\n')
+    f.write(s)
+    f.close()
+""")
+        f.close()
+        if sys.platform == "win32":
+            # [win32] make batch file and set BZR_EDITOR
+            f = file('fed.bat', 'w')
+            f.write("""\
+ at echo off
+%s fed.py %%1
+""" % sys.executable)
+            f.close()
+            os.environ['BZR_EDITOR'] = 'fed.bat'
+        else:
+            # [non-win32] make python script executable and set BZR_EDITOR
+            os.chmod('fed.py', 0755)
+            os.environ['BZR_EDITOR'] = './fed.py'
+
+        mutter('edit_commit_message without infotext')
+        self.assertEqual('test message from fed\n',
+                         bzrlib.msgeditor.edit_commit_message(''))
+
+        mutter('edit_commit_message with unicode infotext')
+        self.assertEqual('test message from fed\n',
+                         bzrlib.msgeditor.edit_commit_message(u'\u1234'))
+
+    def test_deleted_commit_message(self):
+        working_tree = self.make_uncommitted_tree()
+
+        if sys.platform == 'win32':
+            os.environ['BZR_EDITOR'] = 'del'
+        else:
+            os.environ['BZR_EDITOR'] = 'rm'
+
+        self.assertRaises((IOError, OSError), bzrlib.msgeditor.edit_commit_message, '')
 
     def test__get_editor(self):
         # Test that _get_editor can return a decent list of items
@@ -65,7 +144,7 @@
             f.write('editor = config_editor\n')
             f.close()
 
-            editors = list(_get_editor())
+            editors = list(bzrlib.msgeditor._get_editor())
 
             self.assertEqual(['bzr_editor', 'config_editor', 'visual',
                               'editor'], editors[:4])

=== modified file 'bzrlib/tests/test_nonascii.py'
--- bzrlib/tests/test_nonascii.py	
+++ bzrlib/tests/test_nonascii.py	
@@ -17,8 +17,10 @@
 """Test that various operations work in a non-ASCII environment."""
 
 import os
+import sys
+from unicodedata import normalize
 
-from bzrlib.osutils import pathjoin
+from bzrlib.osutils import pathjoin, normalizes_filenames, unicode_filename
 from bzrlib.tests import TestCaseWithTransport, TestSkipped
 from bzrlib.workingtree import WorkingTree
 
@@ -35,3 +37,57 @@
             return
         file(pathjoin(br_dir, "a"), "w").write("hello")
         wt.add(["a"], ["a-id"])
+
+
+a_circle_c = u'\xe5'
+a_dots_c = u'\xe4'
+a_circle_d = u'a\u030a'
+a_dots_d = u'a\u0308'
+z_umlat_c = u'\u017d'
+z_umlat_d = u'Z\u030c'
+
+
+class UnicodeFilename(TestCaseWithTransport):
+    """Test that UnicodeFilename returns the expected values."""
+
+    def test_a_circle(self):
+        self.assertEqual(a_circle_d, normalize('NFKD', a_circle_c))
+        self.assertEqual(a_circle_c, normalize('NFKC', a_circle_d))
+
+        self.assertEqual((a_circle_c, True), unicode_filename(a_circle_c))
+        if normalizes_filenames():
+            self.assertEqual((a_circle_c, True), unicode_filename(a_circle_d))
+        else:
+            self.assertEqual((a_circle_d, False), unicode_filename(a_circle_d))
+
+    def test_platform(self):
+        try:
+            self.build_tree([a_circle_c, a_dots_c, z_umlat_c])
+        except UnicodeError:
+            raise TestSkipped("filesystem cannot create unicode files")
+
+        if sys.platform == 'darwin':
+            expected = sorted([a_circle_d, a_dots_d, z_umlat_d])
+        else:
+            expected = sorted([a_circle_c, a_dots_c, z_umlat_c])
+
+        present = sorted(os.listdir(u'.'))
+        self.assertEqual(expected, present)
+
+    def test_access(self):
+        # We should always be able to access files by the path returned
+        # from unicode_filename
+        files = [a_circle_c, a_dots_c, z_umlat_c]
+        try:
+            self.build_tree(files)
+        except UnicodeError:
+            raise TestSkipped("filesystem cannot create unicode files")
+
+        for fname in files:
+            path = unicode_filename(fname)[0]
+            # We should get an exception if we can't open the file at
+            # this location.
+            f = open(path, 'rb')
+            f.close()
+
+

=== modified file 'bzrlib/tests/test_osutils.py'
--- bzrlib/tests/test_osutils.py	
+++ bzrlib/tests/test_osutils.py	
@@ -14,8 +14,7 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
-"""Tests for the osutils wrapper.
-"""
+"""Tests for the osutils wrapper."""
 
 import errno
 import os
@@ -24,9 +23,9 @@
 import sys
 
 import bzrlib
-from bzrlib.errors import BzrBadParameterNotUnicode
+from bzrlib.errors import BzrBadParameterNotUnicode, InvalidURL
 import bzrlib.osutils as osutils
-from bzrlib.tests import TestCaseInTempDir, TestCase
+from bzrlib.tests import TestCaseInTempDir, TestCase, TestSkipped
 
 
 class TestOSUtils(TestCaseInTempDir):
@@ -148,6 +147,68 @@
                           '\xbb\xbb')
 
 
+class TestWin32Funcs(TestCase):
+    """Test that the _win32 versions of os utilities return appropriate paths."""
+
+    def test_abspath(self):
+        self.assertEqual('C:/foo', osutils._win32_abspath('C:\\foo'))
+        self.assertEqual('C:/foo', osutils._win32_abspath('C:/foo'))
+
+    def test_realpath(self):
+        self.assertEqual('C:/foo', osutils._win32_realpath('C:\\foo'))
+        self.assertEqual('C:/foo', osutils._win32_realpath('C:/foo'))
+
+    def test_pathjoin(self):
+        self.assertEqual('path/to/foo', osutils._win32_pathjoin('path', 'to', 'foo'))
+        self.assertEqual('C:/foo', osutils._win32_pathjoin('path\\to', 'C:\\foo'))
+        self.assertEqual('C:/foo', osutils._win32_pathjoin('path/to', 'C:/foo'))
+        self.assertEqual('path/to/foo', osutils._win32_pathjoin('path/to/', 'foo'))
+        self.assertEqual('/foo', osutils._win32_pathjoin('C:/path/to/', '/foo'))
+        self.assertEqual('/foo', osutils._win32_pathjoin('C:\\path\\to\\', '\\foo'))
+
+    def test_normpath(self):
+        self.assertEqual('path/to/foo', osutils._win32_normpath(r'path\\from\..\to\.\foo'))
+        self.assertEqual('path/to/foo', osutils._win32_normpath('path//from/../to/./foo'))
+
+    def test_getcwd(self):
+        self.assertEqual(os.getcwdu().replace('\\', '/'), osutils._win32_getcwd())
+
+
+class TestWin32FuncsDirs(TestCaseInTempDir):
+    """Test win32 functions that create files."""
+    
+    def test_getcwd(self):
+        # Make sure getcwd can handle unicode filenames
+        try:
+            os.mkdir(u'B\xe5gfors')
+        except UnicodeError:
+            raise TestSkipped("Unable to create Unicode filename")
+
+        os.chdir(u'B\xe5gfors')
+        # TODO: jam 20060427 This will probably fail on Mac OSX because
+        #       it will change the normalization of B\xe5gfors
+        #       Consider using a different unicode character, or make
+        #       osutils.getcwd() renormalize the path.
+        self.assertTrue(osutils._win32_getcwd().endswith(u'/B\xe5gfors'))
+
+    def test_mkdtemp(self):
+        tmpdir = osutils._win32_mkdtemp(dir='.')
+        self.assertFalse('\\' in tmpdir)
+
+    def test_rename(self):
+        a = open('a', 'wb')
+        a.write('foo\n')
+        a.close()
+        b = open('b', 'wb')
+        b.write('baz\n')
+        b.close()
+
+        osutils._win32_rename('b', 'a')
+        self.failUnlessExists('a')
+        self.failIfExists('b')
+        self.assertFileEqual('baz\n', 'a')
+
+
 class TestSplitLines(TestCase):
 
     def test_split_unicode(self):

=== modified file 'bzrlib/tests/test_repository.py'
--- bzrlib/tests/test_repository.py	
+++ bzrlib/tests/test_repository.py	
@@ -1,15 +1,15 @@
-# (C) 2006 Canonical Ltd
-
+# Copyright (C) 2006 Canonical Ltd
+#
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 2 of the License, or
 # (at your option) any later version.
-
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
-
+#
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
@@ -50,7 +50,7 @@
         try:
             # the default branch format is used by the meta dir format
             # which is not the default bzrdir format at this point
-            dir = bzrdir.BzrDirMetaFormat1().initialize('memory:/')
+            dir = bzrdir.BzrDirMetaFormat1().initialize('memory:///')
             result = dir.create_repository()
             self.assertEqual(result, 'A bzr repository dir')
         finally:

=== modified file 'bzrlib/tests/test_revision.py'
--- bzrlib/tests/test_revision.py	
+++ bzrlib/tests/test_revision.py	
@@ -16,6 +16,7 @@
 
 
 import os
+import warnings
 
 from bzrlib.branch import Branch
 from bzrlib.errors import NoSuchRevision
@@ -27,6 +28,12 @@
 from bzrlib.tests import TestCaseWithTransport
 from bzrlib.trace import mutter
 from bzrlib.workingtree import WorkingTree
+
+# We're allowed to test deprecated interfaces
+warnings.filterwarnings('ignore',
+        '.*get_intervening_revisions was deprecated',
+        DeprecationWarning,
+        r'bzrlib\.tests\.test_revision')
 
 # XXX: Make this a method of a merge base case
 def make_branches(self):

=== modified file 'bzrlib/tests/test_smart_add.py'
--- bzrlib/tests/test_smart_add.py	
+++ bzrlib/tests/test_smart_add.py	
@@ -187,26 +187,24 @@
 class TestAddActions(TestCase):
 
     def test_null(self):
-        from bzrlib.add import add_action_null
-        self.run_action(add_action_null, "", False)
+        self.run_action("", False)
 
     def test_add(self):
-        self.entry = InventoryFile("id", "name", None)
-        from bzrlib.add import add_action_add
-        self.run_action(add_action_add, "", True)
+        self.run_action("", True)
 
     def test_add_and_print(self):
-        from bzrlib.add import add_action_add_and_print
-        self.run_action(add_action_add_and_print, "added path\n", True)
+        self.run_action("added path\n", True)
 
     def test_print(self):
-        from bzrlib.add import add_action_print
-        self.run_action(add_action_print, "added path\n", False)
-
-    def run_action(self, action, output, should_add):
-        from StringIO import StringIO
+        self.run_action("added path\n", False)
+
+    def run_action(self, output, should_add):
+        from bzrlib.add import AddAction
+        from cStringIO import StringIO
         inv = Inventory()
         stdout = StringIO()
+        action = AddAction(to_file=stdout,
+            should_print=bool(output), should_add=should_add)
 
         self.apply_redirected(None, stdout, None, action, inv, None, 'path', 'file')
         self.assertEqual(stdout.getvalue(), output)

=== modified file 'bzrlib/tests/test_store.py'
--- bzrlib/tests/test_store.py	
+++ bzrlib/tests/test_store.py	
@@ -81,7 +81,7 @@
 class TestCompressedTextStore(TestCaseInTempDir, TestStores):
 
     def get_store(self, path=u'.'):
-        t = LocalTransport(path)
+        t = transport.get_transport(path)
         return TextStore(t, compressed=True)
 
     def test_total_size(self):
@@ -138,7 +138,7 @@
 class TestTextStore(TestCaseInTempDir, TestStores):
 
     def get_store(self, path=u'.'):
-        t = LocalTransport(path)
+        t = transport.get_transport(path)
         return TextStore(t, compressed=False)
 
     def test_total_size(self):
@@ -156,7 +156,7 @@
 class TestMixedTextStore(TestCaseInTempDir, TestStores):
 
     def get_store(self, path=u'.', compressed=True):
-        t = LocalTransport(path)
+        t = transport.get_transport(path)
         return TextStore(t, compressed=compressed)
 
     def test_get_mixed(self):

=== modified file 'bzrlib/tests/test_trace.py'
--- bzrlib/tests/test_trace.py	
+++ bzrlib/tests/test_trace.py	
@@ -66,3 +66,19 @@
         self.log(u'the unicode character for benzene is \N{BENZENE RING}')
         self.assertContainsRe('the unicode character',
                 self._get_log())
+
+    def test_mutter_never_fails(self):
+        # Even if the decode/encode stage fails, mutter should not
+        # raise an exception
+        mutter(u'Writing a greek mu (\xb5) works in a unicode string')
+        mutter('But fails in an ascii string \xb5')
+        # TODO: jam 20051227 mutter() doesn't flush the log file, and
+        #       self._get_log() opens the file directly and reads it.
+        #       So we need to manually flush the log file
+        import bzrlib.trace
+        bzrlib.trace._trace_file.flush()
+        log = self._get_log()
+        self.assertContainsRe(log, 'Writing a greek mu')
+        self.assertContainsRe(log, 'UnicodeError')
+        self.assertContainsRe(log, "'But fails in an ascii string")
+

=== modified file 'bzrlib/tests/test_transform.py'
--- bzrlib/tests/test_transform.py	
+++ bzrlib/tests/test_transform.py	
@@ -28,6 +28,7 @@
 from bzrlib.transform import (TreeTransform, ROOT_PARENT, FinalPaths, 
                               resolve_conflicts, cook_conflicts, 
                               find_interesting, build_tree, get_backup_name)
+import bzrlib.urlutils as urlutils
 
 class TestTreeTransform(TestCaseInTempDir):
 
@@ -42,7 +43,8 @@
         return transform, transform.trans_id_tree_file_id(self.wt.get_root_id())
 
     def test_existing_limbo(self):
-        limbo_name = self.wt._control_files.controlfilename('limbo')
+        limbo_name = urlutils.local_path_from_url(
+            self.wt._control_files.controlfilename('limbo'))
         transform, root = self.get_transform()
         os.mkdir(pathjoin(limbo_name, 'hehe'))
         self.assertRaises(ImmortalLimbo, transform.apply)

=== modified file 'bzrlib/tests/test_transport.py'
--- bzrlib/tests/test_transport.py	
+++ bzrlib/tests/test_transport.py	
@@ -25,6 +25,7 @@
                            TransportNotPossible,
                            ConnectionError,
                            DependencyNotPresent,
+                           InvalidURL,
                            )
 from bzrlib.tests import TestCase, TestCaseInTempDir
 from bzrlib.transport import (_get_protocol_handlers,
@@ -32,7 +33,6 @@
                               get_transport,
                               register_lazy_transport,
                               _set_protocol_handlers,
-                              urlescape,
                               Transport,
                               )
 from bzrlib.transport.memory import MemoryTransport
@@ -41,9 +41,6 @@
 
 class TestTransport(TestCase):
     """Test the non transport-concrete class functionality."""
-
-    def test_urlescape(self):
-        self.assertEqual('%25', urlescape('%'))
 
     def test__get_set_protocol_handlers(self):
         handlers = _get_protocol_handlers()
@@ -74,9 +71,11 @@
         try:
             register_lazy_transport('foo', 'bzrlib.tests.test_transport',
                     'BadTransportHandler')
-            t = get_transport('foo://fooserver/foo')
-            # because we failed to load the transport
-            self.assertTrue(isinstance(t, LocalTransport))
+            # TODO: jam 20060427 Now we get InvalidURL because it looks like 
+            #       a URL but we have no support for it.
+            #       Is it better to always fall back to LocalTransport?
+            #       I think this is a better error than a future NoSuchFile
+            self.assertRaises(InvalidURL, get_transport, 'foo://fooserver/foo')
         finally:
             # restore original values
             _set_protocol_handlers(saved_handlers)
@@ -107,7 +106,7 @@
 
     def test_abspath(self):
         transport = MemoryTransport()
-        self.assertEqual("memory:/relpath", transport.abspath('relpath'))
+        self.assertEqual("memory:///relpath", transport.abspath('relpath'))
 
     def test_relpath(self):
         transport = MemoryTransport()

=== modified file 'bzrlib/tests/test_transport_implementations.py'
--- bzrlib/tests/test_transport_implementations.py	
+++ bzrlib/tests/test_transport_implementations.py	
@@ -26,12 +26,14 @@
 import sys
 
 from bzrlib.errors import (DirectoryNotEmpty, NoSuchFile, FileExists,
-                           LockError,
-                           PathError,
-                           TransportNotPossible, ConnectionError)
+                           LockError, PathError,
+                           TransportNotPossible, ConnectionError,
+                           InvalidURL)
+from bzrlib.osutils import getcwd
 from bzrlib.tests import TestCaseInTempDir, TestSkipped
-from bzrlib.transport import memory, urlescape
+from bzrlib.transport import memory
 import bzrlib.transport
+import bzrlib.urlutils as urlutils
 
 
 def _append(fn, txt):
@@ -113,11 +115,11 @@
         self.build_tree(files, transport=t)
         self.assertEqual(True, t.has('a'))
         self.assertEqual(False, t.has('c'))
-        self.assertEqual(True, t.has(urlescape('%')))
+        self.assertEqual(True, t.has(urlutils.escape('%')))
         self.assertEqual(list(t.has_multi(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])),
                 [True, True, False, False, True, False, True, False])
         self.assertEqual(True, t.has_any(['a', 'b', 'c']))
-        self.assertEqual(False, t.has_any(['c', 'd', 'f', urlescape('%%')]))
+        self.assertEqual(False, t.has_any(['c', 'd', 'f', urlutils.escape('%%')]))
         self.assertEqual(list(t.has_multi(iter(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']))),
                 [True, True, False, False, True, False, True, False])
         self.assertEqual(False, t.has_any(['c', 'c', 'c']))
@@ -279,7 +281,7 @@
                                               transport_from, f)
 
         t = self.get_transport()
-        temp_transport = MemoryTransport('memory:/')
+        temp_transport = MemoryTransport('memory:///')
         simple_copy_files(t, temp_transport)
         if not t.is_readonly():
             t.mkdir('copy_to_simple')
@@ -299,7 +301,7 @@
         t.copy_to(['e/f'], temp_transport)
 
         del temp_transport
-        temp_transport = MemoryTransport('memory:/')
+        temp_transport = MemoryTransport('memory:///')
 
         files = ['a', 'b', 'c', 'd']
         t.copy_to(iter(files), temp_transport)
@@ -309,7 +311,7 @@
         del temp_transport
 
         for mode in (0666, 0644, 0600, 0400):
-            temp_transport = MemoryTransport("memory:/")
+            temp_transport = MemoryTransport("memory:///")
             t.copy_to(files, temp_transport, mode=mode)
             for f in files:
                 self.assertTransportMode(temp_transport, f, mode)
@@ -859,6 +861,15 @@
         self.assertEqual(transport.base + 'relpath',
                          transport.abspath('relpath'))
 
+    def test_local_abspath(self):
+        transport = self.get_transport()
+        try:
+            p = transport.local_abspath('.')
+        except TransportNotPossible:
+            pass # This is not a local transport
+        else:
+            self.assertEqual(getcwd(), p)
+
     def test_abspath_at_root(self):
         t = self.get_transport()
         # clone all the way to the top
@@ -898,6 +909,32 @@
         paths = set(sub_transport.iter_files_recursive())
         self.assertEqual(set(['dir/foo', 'dir/bar', 'bar']), paths)
 
+    def test_unicode_paths(self):
+        """Test that we can read/write files with Unicode names."""
+        t = self.get_transport()
+
+        files = [u'\xe5', # a w/ circle iso-8859-1
+                 u'\xe4', # a w/ dots iso-8859-1
+                 u'\u017d', # Z with umlat iso-8859-2
+                 u'\u062c', # Arabic j
+                 u'\u0410', # Russian A
+                 u'\u65e5', # Kanji person
+                ]
+
+        try:
+            self.build_tree(files, transport=t)
+        except UnicodeError:
+            raise TestSkipped("cannot handle unicode paths in current encoding")
+
+        # A plain unicode string is not a valid url
+        for fname in files:
+            self.assertRaises(InvalidURL, t.get, fname)
+
+        for fname in files:
+            fname_utf8 = fname.encode('utf-8')
+            contents = 'contents of %s\n' % (fname_utf8,)
+            self.check_transport_contents(contents, t, urlutils.escape(fname))
+
     def test_connect_twice_is_same_content(self):
         # check that our server (whatever it is) is accessable reliably
         # via get_transport and multiple connections share content.

=== modified file 'bzrlib/tests/workingtree_implementations/test_workingtree.py'
--- bzrlib/tests/workingtree_implementations/test_workingtree.py	
+++ bzrlib/tests/workingtree_implementations/test_workingtree.py	
@@ -30,6 +30,7 @@
 from bzrlib.tests import TestSkipped
 from bzrlib.tests.workingtree_implementations import TestCaseWithWorkingTree
 from bzrlib.trace import mutter
+import bzrlib.urlutils as urlutils
 import bzrlib.workingtree as workingtree
 from bzrlib.workingtree import (TreeEntry, TreeDirectory, TreeFile, TreeLink,
                                 WorkingTree)
@@ -78,16 +79,16 @@
         branch = self.make_branch_and_tree('.').branch
         wt, relpath = WorkingTree.open_containing()
         self.assertEqual('', relpath)
-        self.assertEqual(wt.basedir + '/', branch.base)
+        self.assertEqual(wt.basedir + '/', urlutils.local_path_from_url(branch.base))
         wt, relpath = WorkingTree.open_containing(u'.')
         self.assertEqual('', relpath)
-        self.assertEqual(wt.basedir + '/', branch.base)
+        self.assertEqual(wt.basedir + '/', urlutils.local_path_from_url(branch.base))
         wt, relpath = WorkingTree.open_containing('./foo')
         self.assertEqual('foo', relpath)
-        self.assertEqual(wt.basedir + '/', branch.base)
+        self.assertEqual(wt.basedir + '/', urlutils.local_path_from_url(branch.base))
         wt, relpath = WorkingTree.open_containing('file://' + getcwd() + '/foo')
         self.assertEqual('foo', relpath)
-        self.assertEqual(wt.basedir + '/', branch.base)
+        self.assertEqual(wt.basedir + '/', urlutils.local_path_from_url(branch.base))
 
     def test_basic_relpath(self):
         # for comprehensive relpath tests, see whitebox.py.

=== modified file 'bzrlib/textui.py'
--- bzrlib/textui.py	
+++ bzrlib/textui.py	
@@ -1,23 +1,25 @@
 # Bazaar-NG -- distributed version control
-
-# Copyright (C) 2005 by Canonical Ltd
-
+#
+# Copyright (C) 2005, 2006 by Canonical Ltd
+#
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 2 of the License, or
 # (at your option) any later version.
-
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
-
+#
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
+import sys
 
-def show_status(state, kind, name):
+
+def show_status(state, kind, name, to_file=None):
     if kind == 'directory':
         # use this even on windows?
         kind_ch = '/'
@@ -29,5 +31,8 @@
 
     assert len(state) == 1
         
-    print state + '       ' + name + kind_ch
+    if to_file is None:
+        to_file = sys.stdout
+
+    to_file.write(state + '       ' + name + kind_ch + '\n')
     

=== modified file 'bzrlib/trace.py'
--- bzrlib/trace.py	
+++ bzrlib/trace.py	
@@ -100,11 +100,20 @@
     if hasattr(_trace_file, 'closed') and _trace_file.closed:
         return
     if len(args) > 0:
-        out = fmt % args
+        # It seems that if we do ascii % (unicode, ascii) we can
+        # get a unicode cannot encode ascii error, so make sure that "fmt"
+        # is a unicode string
+        out = unicode(fmt) % args
     else:
         out = fmt
     out += '\n'
-    _trace_file.write(out)
+    try:
+        _trace_file.write(out)
+    except UnicodeError, e:
+        warning('UnicodeError: %s', e)
+        _trace_file.write(repr(out))
+    # TODO: jam 20051227 Consider flushing the trace file to help debugging
+    #_trace_file.flush()
 debug = mutter
 
 

=== modified file 'bzrlib/transform.py'
--- bzrlib/transform.py	
+++ bzrlib/transform.py	
@@ -27,6 +27,7 @@
 from bzrlib.progress import DummyProgress, ProgressPhase
 from bzrlib.trace import mutter, warning
 import bzrlib.ui 
+import bzrlib.urlutils as urlutils
 
 
 ROOT_PARENT = "root-parent"
@@ -79,7 +80,8 @@
         self._tree.lock_write()
         try:
             control_files = self._tree._control_files
-            self._limbodir = control_files.controlfilename('limbo')
+            self._limbodir = urlutils.local_path_from_url(
+                control_files.controlfilename('limbo'))
             try:
                 os.mkdir(self._limbodir)
             except OSError, e:

=== modified file 'bzrlib/transport/__init__.py'
--- bzrlib/transport/__init__.py	
+++ bzrlib/transport/__init__.py	
@@ -29,16 +29,19 @@
 import errno
 from collections import deque
 from copy import deepcopy
+import re
 from stat import *
 import sys
 from unittest import TestSuite
 import urllib
 import urlparse
 
-from bzrlib.trace import mutter, warning
 import bzrlib.errors as errors
 from bzrlib.errors import DependencyNotPresent
+import bzrlib.osutils as osutils
 from bzrlib.symbol_versioning import *
+from bzrlib.trace import mutter, warning
+import bzrlib.urlutils as urlutils
 
 # {prefix: [transport_classes]}
 # Transports are inserted onto the list LIFO and tried in order; as a result
@@ -127,6 +130,7 @@
 
 
 def split_url(url):
+    # TODO: jam 20060606 urls should only be ascii, or they should raise InvalidURL
     if isinstance(url, unicode):
         url = url.encode('utf-8')
     (scheme, netloc, path, params,
@@ -283,6 +287,16 @@
             raise errors.PathNotChild(abspath, self.base)
         pl = len(self.base)
         return abspath[pl:].strip('/')
+
+    def local_abspath(self, relpath):
+        """Return the absolute path on the local filesystem.
+
+        This function will only be defined for Transports which have a
+        physical local filesystem representation.
+        """
+        # TODO: jam 20060426 Should this raise NotLocalUrl instead?
+        raise errors.TransportNotPossible('This is not a LocalTransport,'
+            ' so there is no local representation for a path')
 
     def has(self, relpath):
         """Does the file relpath exist?
@@ -662,6 +676,13 @@
         return False
 
 
+# jam 20060426 For compatibility we copy the functions here
+# TODO: The should be marked as deprecated
+urlescape = urlutils.escape
+urlunescape = urlutils.unescape
+_urlRE = re.compile(r'^(?P<proto>[^:/\\]+)://(?P<path>.*)$')
+
+
 def get_transport(base):
     """Open a transport to access a URL or directory.
 
@@ -671,14 +692,37 @@
     # handler for the scheme?
     global _protocol_handlers
     if base is None:
-        base = u'.'
-    else:
-        base = unicode(base)
+        base = '.'
+
+    def convert_path_to_url(base, error_str):
+        m = _urlRE.match(base)
+        if m:
+            # This looks like a URL, but we weren't able to 
+            # instantiate it as such raise an appropriate error
+            raise errors.InvalidURL(base, error_str % m.group('proto'))
+        # This doesn't look like a protocol, consider it a local path
+        new_base = urlutils.local_path_to_url(base)
+        mutter('converting os path %r => url %s' , base, new_base)
+        return new_base
+
+    # Catch any URLs which are passing Unicode rather than ASCII
+    try:
+        base = base.encode('ascii')
+    except UnicodeError:
+        # Only local paths can be Unicode
+        base = convert_path_to_url(base,
+            'URLs must be properly escaped (protocol: %s)')
+    
     for proto, factory_list in _protocol_handlers.iteritems():
         if proto is not None and base.startswith(proto):
             t = _try_transport_factories(base, factory_list)
             if t:
                 return t
+
+    # We tried all the different protocols, now try one last time
+    # as a local protocol
+    base = convert_path_to_url(base, 'Unsupported protocol: %s')
+
     # The default handler is the filesystem handler, stored as protocol None
     return _try_transport_factories(base, _protocol_handlers[None])
 
@@ -692,19 +736,6 @@
                     (factory, base, e))
             continue
     return None
-
-
-def urlescape(relpath):
-    """Escape relpath to be a valid url."""
-    if isinstance(relpath, unicode):
-        relpath = relpath.encode('utf-8')
-    return urllib.quote(relpath)
-
-
-def urlunescape(relpath):
-    """Unescape relpath from url format."""
-    return urllib.unquote(relpath)
-    # TODO de-utf8 it last. relpath = utf8relpath.decode('utf8')
 
 
 class Server(object):
@@ -831,7 +862,7 @@
 register_lazy_transport('https://', 'bzrlib.transport.http._pycurl', 'PyCurlTransport')
 register_lazy_transport('ftp://', 'bzrlib.transport.ftp', 'FtpTransport')
 register_lazy_transport('aftp://', 'bzrlib.transport.ftp', 'FtpTransport')
-register_lazy_transport('memory:/', 'bzrlib.transport.memory', 'MemoryTransport')
+register_lazy_transport('memory://', 'bzrlib.transport.memory', 'MemoryTransport')
 register_lazy_transport('readonly+', 'bzrlib.transport.readonly', 'ReadonlyTransportDecorator')
 register_lazy_transport('fakenfs+', 'bzrlib.transport.fakenfs', 'FakeNFSTransportDecorator')
 register_lazy_transport('vfat+', 

=== modified file 'bzrlib/transport/http/__init__.py'
--- bzrlib/transport/http/__init__.py	
+++ bzrlib/transport/http/__init__.py	
@@ -30,8 +30,7 @@
 
 from bzrlib.transport import Transport, register_transport, Server
 from bzrlib.errors import (TransportNotPossible, NoSuchFile,
-                           TransportError, ConnectionError)
-from bzrlib.errors import BzrError, BzrCheckError
+                           TransportError, ConnectionError, InvalidURL)
 from bzrlib.branch import Branch
 from bzrlib.trace import mutter
 # TODO: load these only when running http tests
@@ -113,6 +112,8 @@
         implementation qualifier.
         """
         assert isinstance(relpath, basestring)
+        if isinstance(relpath, unicode):
+            raise InvalidURL(relpath, 'paths must not be unicode.')
         if isinstance(relpath, basestring):
             relpath_parts = relpath.split('/')
         else:

=== modified file 'bzrlib/transport/local.py'
--- bzrlib/transport/local.py	
+++ bzrlib/transport/local.py	
@@ -23,12 +23,13 @@
 import sys
 from stat import ST_MODE, S_ISDIR, ST_SIZE
 import tempfile
-import urllib
-
+
+from bzrlib.osutils import (abspath, realpath, normpath, pathjoin, rename, 
+                            check_legal_path, rmtree)
+from bzrlib.symbol_versioning import warn
 from bzrlib.trace import mutter
 from bzrlib.transport import Transport, Server
-from bzrlib.osutils import (abspath, realpath, normpath, pathjoin, rename, 
-                            check_legal_path, rmtree)
+import bzrlib.urlutils as urlutils
 
 
 class LocalTransport(Transport):
@@ -36,14 +37,19 @@
 
     def __init__(self, base):
         """Set the base path where files will be stored."""
-        if base.startswith('file://'):
-            base = base[len('file://'):]
-        # realpath is incompatible with symlinks. When we traverse
-        # up we might be able to normpath stuff. RBC 20051003
-        base = normpath(abspath(base))
+        if not base.startswith('file://'):
+            warn("Instantiating LocalTransport with a filesystem path"
+                " is deprecated as of bzr 0.8."
+                " Please use bzrlib.transport.get_transport()"
+                " or pass in a file:// url.",
+                 DeprecationWarning,
+                 stacklevel=2
+                 )
+            base = urlutils.local_path_to_url(base)
         if base[-1] != '/':
             base = base + '/'
         super(LocalTransport, self).__init__(base)
+        self._local_base = urlutils.local_path_from_url(base)
 
     def should_cache(self):
         return False
@@ -61,23 +67,35 @@
     def abspath(self, relpath):
         """Return the full url to the given relative URL."""
         assert isinstance(relpath, basestring), (type(relpath), relpath)
-        result = normpath(pathjoin(self.base, urllib.unquote(relpath)))
-        #if result[-1] != '/':
-        #    result += '/'
-        return result
+        # jam 20060426 Using normpath on the real path, because that ensures
+        #       proper handling of stuff like
+        path = normpath(pathjoin(self._local_base, urlutils.unescape(relpath)))
+        return urlutils.local_path_to_url(path)
+
+    def local_abspath(self, relpath):
+        """Transform the given relative path URL into the actual path on disk
+
+        This function only exists for the LocalTransport, since it is
+        the only one that has direct local access.
+        This is mostly for stuff like WorkingTree which needs to know
+        the local working directory.
+        """
+        absurl = self.abspath(relpath)
+        # mutter(u'relpath %s => base: %s, absurl %s', relpath, self.base, absurl)
+        return urlutils.local_path_from_url(absurl)
 
     def relpath(self, abspath):
         """Return the local path portion from a given absolute path.
         """
-        from bzrlib.osutils import relpath, strip_trailing_slash
         if abspath is None:
             abspath = u'.'
 
-        return relpath(strip_trailing_slash(self.base), 
-                       strip_trailing_slash(abspath))
+        return urlutils.file_relpath(
+            urlutils.strip_trailing_slash(self.base), 
+            urlutils.strip_trailing_slash(abspath))
 
     def has(self, relpath):
-        return os.access(self.abspath(relpath), os.F_OK)
+        return os.access(self.local_abspath(relpath), os.F_OK)
 
     def get(self, relpath):
         """Get the file at the given relative path.
@@ -85,7 +103,8 @@
         :param relpath: The relative path to the file
         """
         try:
-            path = self.abspath(relpath)
+            path = self.local_abspath(relpath)
+            # mutter('LocalTransport.get(%r) => %r', relpath, path)
             return open(path, 'rb')
         except (IOError, OSError),e:
             self._translate_error(e, path)
@@ -100,7 +119,7 @@
 
         path = relpath
         try:
-            path = self.abspath(relpath)
+            path = self.local_abspath(relpath)
             check_legal_path(path)
             fp = AtomicFile(path, 'wb', new_mode=mode)
         except (IOError, OSError),e:
@@ -127,7 +146,7 @@
         """Create a directory at the given path."""
         path = relpath
         try:
-            path = self.abspath(relpath)
+            path = self.local_abspath(relpath)
             os.mkdir(path)
             if mode is not None:
                 os.chmod(path, mode)
@@ -139,9 +158,9 @@
         location.
         """
         try:
-            fp = open(self.abspath(relpath), 'ab')
+            fp = open(self.local_abspath(relpath), 'ab')
             if mode is not None:
-                os.chmod(self.abspath(relpath), mode)
+                os.chmod(self.local_abspath(relpath), mode)
         except (IOError, OSError),e:
             self._translate_error(e, relpath)
         # win32 workaround (tell on an unwritten file returns 0)
@@ -152,8 +171,8 @@
 
     def copy(self, rel_from, rel_to):
         """Copy the item at rel_from to the location at rel_to"""
-        path_from = self.abspath(rel_from)
-        path_to = self.abspath(rel_to)
+        path_from = self.local_abspath(rel_from)
+        path_to = self.local_abspath(rel_to)
         try:
             shutil.copy(path_from, path_to)
         except (IOError, OSError),e:
@@ -161,19 +180,19 @@
             self._translate_error(e, path_from)
 
     def rename(self, rel_from, rel_to):
-        path_from = self.abspath(rel_from)
+        path_from = self.local_abspath(rel_from)
         try:
             # *don't* call bzrlib.osutils.rename, because we want to 
             # detect errors on rename
-            os.rename(path_from, self.abspath(rel_to))
+            os.rename(path_from, self.local_abspath(rel_to))
         except (IOError, OSError),e:
             # TODO: What about path_to?
             self._translate_error(e, path_from)
 
     def move(self, rel_from, rel_to):
         """Move the item at rel_from to the location at rel_to"""
-        path_from = self.abspath(rel_from)
-        path_to = self.abspath(rel_to)
+        path_from = self.local_abspath(rel_from)
+        path_to = self.local_abspath(rel_to)
 
         try:
             # this version will delete the destination if necessary
@@ -186,7 +205,7 @@
         """Delete the item at relpath"""
         path = relpath
         try:
-            path = self.abspath(relpath)
+            path = self.local_abspath(relpath)
             os.remove(path)
         except (IOError, OSError),e:
             # TODO: What about path_to?
@@ -206,8 +225,8 @@
             for path in relpaths:
                 self._update_pb(pb, 'copy-to', count, total)
                 try:
-                    mypath = self.abspath(path)
-                    otherpath = other.abspath(path)
+                    mypath = self.local_abspath(path)
+                    otherpath = other.local_abspath(path)
                     shutil.copy(mypath, otherpath)
                     if mode is not None:
                         os.chmod(otherpath, mode)
@@ -227,9 +246,9 @@
         WARNING: many transports do not support this, so trying avoid using
         it if at all possible.
         """
-        path = self.abspath(relpath)
-        try:
-            return [urllib.quote(entry) for entry in os.listdir(path)]
+        path = self.local_abspath(relpath)
+        try:
+            return [urlutils.escape(entry) for entry in os.listdir(path)]
         except (IOError, OSError), e:
             self._translate_error(e, path)
 
@@ -238,7 +257,7 @@
         """
         path = relpath
         try:
-            path = self.abspath(relpath)
+            path = self.local_abspath(relpath)
             return os.stat(path)
         except (IOError, OSError),e:
             self._translate_error(e, path)
@@ -250,7 +269,7 @@
         from bzrlib.lock import ReadLock
         path = relpath
         try:
-            path = self.abspath(relpath)
+            path = self.local_abspath(relpath)
             return ReadLock(path)
         except (IOError, OSError), e:
             self._translate_error(e, path)
@@ -262,13 +281,13 @@
         :return: A lock object, which should be passed to Transport.unlock()
         """
         from bzrlib.lock import WriteLock
-        return WriteLock(self.abspath(relpath))
+        return WriteLock(self.local_abspath(relpath))
 
     def rmdir(self, relpath):
         """See Transport.rmdir."""
         path = relpath
         try:
-            path = self.abspath(relpath)
+            path = self.local_abspath(relpath)
             os.rmdir(path)
         except (IOError, OSError),e:
             self._translate_error(e, path)
@@ -319,8 +338,7 @@
 
     def get_url(self):
         """See Transport.Server.get_url."""
-        # FIXME: \ to / on windows
-        return "file://%s" % os.path.abspath("")
+        return urlutils.local_path_to_url('')
 
 
 def get_test_permutations():

=== modified file 'bzrlib/transport/memory.py'
--- bzrlib/transport/memory.py	
+++ bzrlib/transport/memory.py	
@@ -27,9 +27,11 @@
 from stat import *
 from cStringIO import StringIO
 
+from bzrlib.errors import TransportError, NoSuchFile, FileExists, LockError
 from bzrlib.trace import mutter
-from bzrlib.errors import TransportError, NoSuchFile, FileExists, LockError
-from bzrlib.transport import Transport, register_transport, Server
+from bzrlib.transport import (Transport, register_transport, Server)
+import bzrlib.urlutils as urlutils
+
 
 
 class MemoryStat(object):
@@ -52,13 +54,13 @@
     def __init__(self, url=""):
         """Set the 'base' path where files will be stored."""
         if url == "":
-            url = "memory:/"
+            url = "memory:///"
         if url[-1] != '/':
             url = url + '/'
         super(MemoryTransport, self).__init__(url)
-        self._cwd = url[url.find(':') + 1:]
+        self._cwd = url[url.find(':') + 3:]
         # dictionaries from absolute path to file mode
-        self._dirs = {}
+        self._dirs = {'/':None}
         self._files = {}
         self._locks = {}
 
@@ -77,7 +79,7 @@
                     cwdsegments.pop()
                 continue
             cwdsegments.append(segment)
-        url = self.base[:self.base.find(':') + 1] + '/'.join(cwdsegments) + '/'
+        url = self.base[:self.base.find(':') + 3] + '/'.join(cwdsegments) + '/'
         result = MemoryTransport(url)
         result._dirs = self._dirs
         result._files = self._files
@@ -90,7 +92,7 @@
         # current environment - XXX RBC 20060404 move the clone '..' handling
         # into here and call abspath from clone
         temp_t = self.clone(relpath)
-        if temp_t.base.count('/') == 1:
+        if temp_t.base.count('/') == 3:
             return temp_t.base
         else:
             return temp_t.base[:-1]
@@ -215,8 +217,6 @@
         if _abspath in self._files:
             return MemoryStat(len(self._files[_abspath][0]), False, 
                               self._files[_abspath][1])
-        elif _abspath == '':
-            return MemoryStat(0, True, None)
         elif _abspath in self._dirs:
             return MemoryStat(0, True, self._dirs[_abspath])
         else:
@@ -232,9 +232,12 @@
 
     def _abspath(self, relpath):
         """Generate an internal absolute path."""
+        relpath = urlutils.unescape(relpath)
         if relpath.find('..') != -1:
             raise AssertionError('relpath contains ..')
         if relpath == '.':
+            if (self._cwd == '/'):
+                return self._cwd
             return self._cwd[:-1]
         if relpath.endswith('/'):
             relpath = relpath[:-1]
@@ -270,10 +273,10 @@
 
     def setUp(self):
         """See bzrlib.transport.Server.setUp."""
-        self._dirs = {}
+        self._dirs = {'/':None}
         self._files = {}
         self._locks = {}
-        self._scheme = "memory+%s:" % id(self)
+        self._scheme = "memory+%s:///" % id(self)
         def memory_factory(url):
             result = MemoryTransport(url)
             result._dirs = self._dirs

=== modified file 'bzrlib/transport/sftp.py'
--- bzrlib/transport/sftp.py	
+++ bzrlib/transport/sftp.py	
@@ -46,9 +46,9 @@
     Server,
     split_url,
     Transport,
-    urlescape,
     )
 import bzrlib.ui
+import bzrlib.urlutils as urlutils
 
 try:
     import paramiko
@@ -355,7 +355,7 @@
         """
         # FIXME: share the common code across transports
         assert isinstance(relpath, basestring)
-        relpath = urllib.unquote(relpath).split('/')
+        relpath = urlutils.unescape(relpath).split('/')
         basepath = self._path.split('/')
         if len(basepath) > 0 and basepath[-1] == '':
             basepath = basepath[:-1]
@@ -956,7 +956,7 @@
         global _ssh_vendor
         self._original_vendor = _ssh_vendor
         _ssh_vendor = self._vendor
-        self._homedir = os.getcwdu()
+        self._homedir = os.getcwd()
         if self._server_homedir is None:
             self._server_homedir = self._homedir
         self._root = '/'
@@ -977,7 +977,7 @@
 
     def get_url(self):
         """See bzrlib.transport.Server.get_url."""
-        return self._get_sftp_url(urlescape(self._homedir[1:]))
+        return self._get_sftp_url(urlutils.escape(self._homedir[1:]))
 
 
 class SFTPServerWithoutSSH(SFTPServer):
@@ -1011,7 +1011,7 @@
 
     def get_url(self):
         """See bzrlib.transport.Server.get_url."""
-        return self._get_sftp_url(urlescape(self._homedir[1:]))
+        return self._get_sftp_url(urlutils.escape(self._homedir[1:]))
 
 
 class SFTPHomeDirServer(SFTPServerWithoutSSH):

=== modified file 'bzrlib/workingtree.py'
--- bzrlib/workingtree.py	
+++ bzrlib/workingtree.py	
@@ -99,6 +99,7 @@
 from bzrlib.trace import mutter, note
 from bzrlib.transport import get_transport
 from bzrlib.transport.local import LocalTransport
+import bzrlib.urlutils as urlutils
 import bzrlib.ui
 import bzrlib.xml5
 
@@ -279,7 +280,7 @@
         # if needed, or, when the cache sees a change, append it to the hash
         # cache file, and have the parser take the most recent entry for a
         # given path only.
-        cache_filename = self.bzrdir.get_workingtree_transport(None).abspath('stat-cache')
+        cache_filename = self.bzrdir.get_workingtree_transport(None).local_abspath('stat-cache')
         hc = self._hashcache = HashCache(basedir, cache_filename, self._control_files._file_mode)
         hc.read()
         # is this scan needed ? it makes things kinda slow.
@@ -349,10 +350,13 @@
         run into /.  If there isn't one, raises NotBranchError.
         TODO: give this a new exception.
         If there is one, it is returned, along with the unused portion of path.
+
+        :return: The WorkingTree that contains 'path', and the rest of path
         """
         if path is None:
             path = os.getcwdu()
         control, relpath = bzrdir.BzrDir.open_containing(path)
+
         return control.open_workingtree(), relpath
 
     @staticmethod
@@ -1081,7 +1085,8 @@
         l = bzrlib.DEFAULT_IGNORE[:]
         if self.has_filename(bzrlib.IGNORE_FILENAME):
             f = self.get_file_byname(bzrlib.IGNORE_FILENAME)
-            l.extend([line.rstrip("\n\r") for line in f.readlines()])
+            l.extend([line.rstrip("\n\r").decode('utf-8') 
+                      for line in f.readlines()])
         self._ignorelist = l
         self._ignore_regex = self._combine_ignore_rules(l)
         return l
@@ -1235,7 +1240,7 @@
         return result
 
     @needs_write_lock
-    def remove(self, files, verbose=False):
+    def remove(self, files, verbose=False, to_file=None):
         """Remove nominated files from the working inventory..
 
         This does not remove their text.  This does not run on XXX on what? RBC
@@ -1270,7 +1275,7 @@
                     new_status = 'I'
                 else:
                     new_status = '?'
-                show_status(new_status, inv[fid].kind, quotefn(f))
+                show_status(new_status, inv[fid].kind, quotefn(f), to_file=to_file)
             del inv[fid]
 
         self._write_inventory(inv)
@@ -1653,7 +1658,7 @@
                 branch.unlock()
         revision = branch.last_revision()
         inv = Inventory() 
-        wt = WorkingTree(a_bzrdir.root_transport.base,
+        wt = WorkingTree(a_bzrdir.root_transport.local_abspath('.'),
                          branch,
                          inv,
                          _internal=True,
@@ -1681,7 +1686,7 @@
             raise NotImplementedError
         if not isinstance(a_bzrdir.transport, LocalTransport):
             raise errors.NotLocalUrl(a_bzrdir.transport.base)
-        return WorkingTree(a_bzrdir.root_transport.base,
+        return WorkingTree(a_bzrdir.root_transport.local_abspath('.'),
                            _internal=True,
                            _format=self,
                            _bzrdir=a_bzrdir)
@@ -1732,7 +1737,7 @@
         if revision_id is None:
             revision_id = branch.last_revision()
         inv = Inventory() 
-        wt = WorkingTree3(a_bzrdir.root_transport.base,
+        wt = WorkingTree3(a_bzrdir.root_transport.local_abspath('.'),
                          branch,
                          inv,
                          _internal=True,
@@ -1767,7 +1772,7 @@
         if not isinstance(a_bzrdir.transport, LocalTransport):
             raise errors.NotLocalUrl(a_bzrdir.transport.base)
         control_files = self._open_control_files(a_bzrdir)
-        return WorkingTree3(a_bzrdir.root_transport.base,
+        return WorkingTree3(a_bzrdir.root_transport.local_abspath('.'),
                            _internal=True,
                            _format=self,
                            _bzrdir=a_bzrdir,



More information about the bazaar mailing list