[merge] updated store-escaping for windows

Martin Pool mbp at sourcefrog.net
Tue Apr 11 09:57:03 BST 2006


This change, based on John's previous work, changes Knit stores to avoid
characters in filenames that can't be safely stored on Windows.
(Basically everything but lowercase letters, digits, and a few
punctuation characters.)

- Add a FakeVFAT transport decorator that lets us check some of these 
  constraints.  (Eventually it might be nice to be able to run
  WorkingTree tests through this too to trap Windows-specific bugs
  even when testing on Unix.)

- Add an 'escaped' parameter to the TransportStore.


-- 
Martin
-------------- next part --------------
=== added file 'b/bzrlib/tests/test_escaped_store.py'
--- /dev/null	
+++ b/bzrlib/tests/test_escaped_store.py	
@@ -0,0 +1,109 @@
+# Copyright (C) 2005 by Canonical Development Ltd
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Test Escaped Stores."""
+
+from cStringIO import StringIO
+import os
+import gzip
+
+from bzrlib.errors import BzrError, UnlistableStore, NoSuchFile
+from bzrlib.store import copy_all
+from bzrlib.store.text import TextStore
+from bzrlib.tests import TestCaseWithTransport
+import bzrlib.transport
+
+
+class TestEscaped(TestCaseWithTransport):
+    """Mixin template class that provides some common tests for stores"""
+
+    def get_store(self, prefixed=False, escaped=True):
+        t = bzrlib.transport.get_transport(self.get_url())
+        return TextStore(t, prefixed=prefixed, escaped=escaped)
+
+    def test_paths(self):
+        text_store = self.get_store()
+
+        self.assertEqual('a', text_store._relpath('a'))
+        self.assertEqual('a', text_store._relpath(u'a'))
+        self.assertEqual('%2520', text_store._relpath(' '))
+        self.assertEqual('%40%253a%253c%253e', text_store._relpath('@:<>'))
+        self.assertEqual('%25c3%25a5', text_store._relpath(u'\xe5'))
+
+    def test_prefixed(self):
+        # Prefix should be determined by unescaped string
+        text_store = self.get_store(prefixed=True)
+
+        # hash_prefix() is not defined for unicode characters
+        # it is only defined for byte streams.
+        # so hash_prefix() needs to operate on *at most* utf-8
+        # encoded. However urlescape() does both encoding to utf-8
+        # and urllib quoting, so we will use the escaped form
+        # as the path passed to hash_prefix
+
+        self.assertEqual('62/a', text_store._relpath('a'))
+        self.assertEqual('88/%2520', text_store._relpath(' '))
+        self.assertEqual('72/%40%253a%253c%253e',
+                text_store._relpath('@:<>'))
+        self.assertEqual('77/%25c3%25a5', text_store._relpath(u'\xe5'))
+
+    def test_files(self):
+        text_store = self.get_store(prefixed=True)
+
+        text_store.add(StringIO('a'), 'a')
+        self.failUnlessExists('62/a')
+
+        text_store.add(StringIO('space'), ' ')
+        self.failUnlessExists('88/%20')
+        self.assertEquals('space', text_store.get(' ').read())
+
+        text_store.add(StringIO('surprise'), '@:<>')
+        self.failUnlessExists('72/@%3a%3c%3e')
+        self.assertEquals('surprise', text_store.get('@:<>').read())
+
+        text_store.add(StringIO('unicode'), u'\xe5')
+        self.failUnlessExists('77/%c3%a5')
+        self.assertEquals('unicode', text_store.get(u'\xe5').read())
+
+    def test_weave(self):
+        from bzrlib.store.versioned import WeaveStore
+        from bzrlib.transactions import PassThroughTransaction
+
+        trans = PassThroughTransaction()
+
+        t = bzrlib.transport.get_transport(self.get_url())
+        weave_store = WeaveStore(t, prefixed=True, escaped=True)
+        def add_text(file_id, rev_id, contents, parents, transaction):
+            vfile = weave_store.get_weave_or_empty(file_id, transaction)
+            vfile.add_lines(rev_id, parents, contents)
+
+        add_text('a', 'r', ['a'], [], trans)
+        self.failUnlessExists('62/a.weave')
+        self.assertEqual(['a'], weave_store.get_lines('a', 'r', trans))
+
+        add_text(' ', 'r', ['space'], [], trans)
+        self.failIfExists('21/ .weave')
+        self.failUnlessExists('88/%20.weave')
+        self.assertEquals(['space'], weave_store.get_lines(' ', 'r', trans))
+
+        add_text('@:<>', 'r', ['surprise'], [], trans)
+        self.failUnlessExists('72/@%3a%3c%3e.weave')
+        self.assertEquals(['surprise'], weave_store.get_lines('@:<>', 'r', trans))
+
+        add_text(u'\xe5', 'r', ['unicode'], [], trans)
+        self.failUnlessExists('77/%c3%a5.weave')
+        self.assertEquals(['unicode'], weave_store.get_lines(u'\xe5', 'r', trans))
+

=== added file 'b/bzrlib/transport/fakevfat.py'
--- /dev/null	
+++ b/bzrlib/transport/fakevfat.py	
@@ -0,0 +1,111 @@
+# Copyright (C) 2006 Canonical Ltd
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Fake transport with some restrictions of Windows VFAT filesystems.
+
+VFAT on Windows has several restrictions that are not present on unix
+filesystems, which are imposed by this transport. 
+
+VFAT is strictly 8-bit using codepages to represent non-ascii characters. 
+This implementation currently doesn't model the codepage but just insists
+on only ascii characters being written.
+
+Restrictions imposed by this transport:
+
+ * filenames are squashed to lowercase
+ * filenames containing non-ascii characters are not allowed
+ * filenames containing the characters "@<>" are not allowed
+   (there should be more?)
+
+Some other restrictions are not implemented yet, but possibly could be:
+
+ * open files can't be deleted or renamed
+ * directories containing open files can't be renamed
+ * special device names (NUL, LPT, ...) are not allowed
+
+"""
+
+import re
+
+from bzrlib.errors import TransportNotPossible
+from bzrlib.transport.decorator import TransportDecorator, DecoratorServer
+
+
+# TODO: It might be nice if these hooks were available in a more general way
+# on all paths passed in to the Transport, so that we didn't have to hook
+# every single method.
+
+# TODO: Perhaps don't inherit from TransportDecorator so that methods
+# which are not implemented here fail by default?
+    
+
+class FakeVFATTransportDecorator(TransportDecorator):
+    """A decorator that can convert any transport to be readonly.
+
+    This is requested via the 'vfat+' prefix to get_transport().
+
+    This is intended only for use in testing and doesn't implement every
+    method very well yet.
+
+    This transport is typically layered on a local or memory transport
+    which actually stored the files.
+    """
+
+    @classmethod
+    def _get_url_prefix(self):
+        """Readonly transport decorators are invoked via 'vfat+'"""
+        return 'vfat+'
+
+    def _squash_name(self, name):
+        """Return vfat-squashed filename.
+
+        The name is returned as it will be stored on disk.  This raises an
+        error if there are invalid characters in the name.
+        """
+        if re.search(r'[?*:;<>]', name):
+            raise ValueError("illegal characters for VFAT filename: %r" % name)
+        return name.lower()
+
+    def get(self, relpath):
+        return self._decorated.get(self._squash_name(relpath))
+
+    def mkdir(self, relpath, mode=None):
+        return self._decorated.mkdir(self._squash_name(relpath), 0755)
+
+    def has(self, relpath):
+        return self._decorated.has(self._squash_name(relpath))
+
+    def readv(self, relpath, offsets):
+        return self._decorated.readv(self._squash_name(relpath), offsets)
+
+    def put(self, relpath, f, mode=None):
+        return self._decorated.put(self._squash_name(relpath), f, mode)
+
+
+class FakeVFATServer(DecoratorServer):
+    """A server that suggests connections through FakeVFATTransportDecorator
+
+    For use in testing.
+    """
+
+    def get_decorator_class(self):
+        return FakeVFATTransportDecorator
+
+
+def get_test_permutations():
+    """Return the permutations to be used in testing."""
+    return [(FakeVFATTransportDecorator, FakeVFATServer),
+            ]

=== modified file 'a/bzrlib/bzrdir.py'
--- a/bzrlib/bzrdir.py	
+++ b/bzrlib/bzrdir.py	
@@ -884,27 +884,36 @@
         """
         raise NotImplementedError(self.get_converter)
 
-    def initialize(self, url):
-        """Create a bzr control dir at this url and return an opened copy."""
-        # Since we don't have a .bzr directory, inherit the
+    def initialize(self, url, _cloning=False):
+        """Create a bzr control dir at this url and return an opened copy.
+        
+        Subclasses should typically override initialize_on_transport
+        instead of this method.
+        """
+        return self.initialize_on_transport(get_transport(url),
+                _cloning=_cloning)
+
+    def initialize_on_transport(self, transport, _cloning=False):
+        """Initialize a new bzrdir in the base directory of a Transport."""
+        # Since we don'transport have a .bzr directory, inherit the
         # mode from the root directory
-        t = get_transport(url)
-        temp_control = LockableFiles(t, '', TransportLock)
+        temp_control = LockableFiles(transport, '', TransportLock)
         temp_control._transport.mkdir('.bzr',
                                       # FIXME: RBC 20060121 dont peek under
                                       # the covers
                                       mode=temp_control._dir_mode)
         file_mode = temp_control._file_mode
         del temp_control
-        mutter('created control directory in ' + t.base)
-        control = t.clone('.bzr')
+        mutter('created control directory in ' + transport.base)
+        control = transport.clone('.bzr')
         utf8_files = [('README', 
                        "This is a Bazaar-NG control directory.\n"
                        "Do not change any files in this directory.\n"),
                       ('branch-format', self.get_format_string()),
                       ]
         # NB: no need to escape relative paths that are url safe.
-        control_files = LockableFiles(control, self._lock_file_name, self._lock_class)
+        control_files = LockableFiles(control, self._lock_file_name, 
+                                      self._lock_class)
         control_files.create_lock()
         control_files.lock_write()
         try:
@@ -912,7 +921,7 @@
                 control_files.put_utf8(file, content)
         finally:
             control_files.unlock()
-        return self.open(t, _found=True)
+        return self.open(transport, _found=True)
 
     def is_supported(self):
         """Is this format supported?
@@ -982,7 +991,7 @@
         # there is one and only one upgrade path here.
         return ConvertBzrDir4To5()
         
-    def initialize(self, url):
+    def initialize_on_transport(self, transport, _cloning=False):
         """Format 4 branches cannot be created."""
         raise errors.UninitializableFormat(self)
 
@@ -1028,7 +1037,7 @@
         # there is one and only one upgrade path here.
         return ConvertBzrDir5To6()
         
-    def initialize(self, url, _cloning=False):
+    def initialize_on_transport(self, transport, _cloning=False):
         """Format 5 dirs always have working tree, branch and repository.
         
         Except when they are being cloned.
@@ -1036,7 +1045,8 @@
         from bzrlib.branch import BzrBranchFormat4
         from bzrlib.repository import RepositoryFormat5
         from bzrlib.workingtree import WorkingTreeFormat2
-        result = super(BzrDirFormat5, self).initialize(url)
+        result = (super(BzrDirFormat5, self)
+                  .initialize_on_transport(transport, _cloning))
         RepositoryFormat5().initialize(result, _internal=True)
         if not _cloning:
             BzrBranchFormat4().initialize(result)
@@ -1075,7 +1085,7 @@
         # there is one and only one upgrade path here.
         return ConvertBzrDir6ToMeta()
         
-    def initialize(self, url, _cloning=False):
+    def initialize_on_transport(self, transport, _cloning=False):
         """Format 6 dirs always have working tree, branch and repository.
         
         Except when they are being cloned.
@@ -1083,7 +1093,8 @@
         from bzrlib.branch import BzrBranchFormat4
         from bzrlib.repository import RepositoryFormat6
         from bzrlib.workingtree import WorkingTreeFormat2
-        result = super(BzrDirFormat6, self).initialize(url)
+        result = (super(BzrDirFormat6, self)
+                  .initialize_on_transport(transport, _cloning))
         RepositoryFormat6().initialize(result, _internal=True)
         if not _cloning:
             BzrBranchFormat4().initialize(result)
@@ -1545,11 +1556,12 @@
         return BzrDir.open(self.bzrdir.root_transport.base)
 
     def _convert_to_prefixed(self):
-        from bzrlib.store import hash_prefix
+        from bzrlib.store import TransportStore
         self.bzrdir.transport.delete('branch-format')
         for store_name in ["weaves", "revision-store"]:
-            self.pb.note("adding prefixes to %s" % store_name) 
+            self.pb.note("adding prefixes to %s" % store_name)
             store_transport = self.bzrdir.transport.clone(store_name)
+            store = TransportStore(store_transport, prefixed=True)
             for urlfilename in store_transport.list_dir('.'):
                 filename = urlunescape(urlfilename)
                 if (filename.endswith(".weave") or
@@ -1558,7 +1570,7 @@
                     file_id = os.path.splitext(filename)[0]
                 else:
                     file_id = filename
-                prefix_dir = hash_prefix(file_id)
+                prefix_dir = store.hash_prefix(file_id)
                 # FIXME keep track of the dirs made RBC 20060121
                 try:
                     store_transport.move(filename, prefix_dir + '/' + filename)

=== modified file 'a/bzrlib/lockable_files.py'
--- a/bzrlib/lockable_files.py	
+++ b/bzrlib/lockable_files.py	
@@ -16,6 +16,7 @@
 
 from cStringIO import StringIO
 import codecs
+#import traceback
 
 import bzrlib
 from bzrlib.decorators import *
@@ -23,7 +24,8 @@
 from bzrlib.errors import LockError, ReadOnlyError
 from bzrlib.osutils import file_iterator, safe_unicode
 from bzrlib.symbol_versioning import *
-from bzrlib.trace import mutter
+from bzrlib.symbol_versioning import deprecated_method, zero_eight
+from bzrlib.trace import mutter, note
 import bzrlib.transactions as transactions
 
 # XXX: The tracking here of lock counts and whether the lock is held is
@@ -80,7 +82,7 @@
         self._lock_mode = None
         self._lock_count = 0
         esc_name = self._escape(lock_name)
-        self._lock = lock_class(transport, esc_name, 
+        self._lock = lock_class(transport, esc_name,
                                 file_modebits=self._file_mode,
                                 dir_modebits=self._dir_mode)
 
@@ -95,6 +97,16 @@
     def __repr__(self):
         return '%s(%r)' % (self.__class__.__name__,
                            self._transport)
+    def __str__(self):
+        return 'LockableFiles(%s, %s)' % (self.lock_name, self._transport.base)
+
+    def __del__(self):
+        if self.is_locked():
+            # XXX: This should show something every time, and be suitable for
+            # headless operation and embedding
+            from warnings import warn
+            warn("file group %r was not explicitly unlocked" % self)
+            self._lock.unlock()
 
     def _escape(self, file_or_path):
         if not isinstance(file_or_path, basestring):
@@ -201,6 +213,8 @@
             self._lock_count += 1
         else:
             self._lock.lock_write()
+            #note('write locking %s', self)
+            #traceback.print_stack()
             self._lock_mode = 'w'
             self._lock_count = 1
             self._set_transaction(transactions.WriteTransaction())
@@ -213,6 +227,8 @@
             self._lock_count += 1
         else:
             self._lock.lock_read()
+            #note('read locking %s', self)
+            #traceback.print_stack()
             self._lock_mode = 'r'
             self._lock_count = 1
             self._set_transaction(transactions.ReadOnlyTransaction())
@@ -226,6 +242,8 @@
         if self._lock_count > 1:
             self._lock_count -= 1
         else:
+            #note('unlocking %s', self)
+            #traceback.print_stack()
             self._finish_transaction()
             self._lock.unlock()
             self._lock_mode = self._lock_count = None

=== modified file 'a/bzrlib/merge.py'
--- a/bzrlib/merge.py	
+++ b/bzrlib/merge.py	
@@ -282,8 +282,13 @@
         for file_id in old_entries:
             entry = old_entries[file_id]
             path = id2path(file_id)
+            if file_id in self.base_tree.inventory:
+                executable = getattr(self.base_tree.inventory[file_id], 'executable', False)
+            else:
+                executable = getattr(entry, 'executable', False)
             new_inventory[file_id] = (path, file_id, entry.parent_id, 
-                                      entry.kind)
+                                      entry.kind, executable)
+                                      
             by_path[path] = file_id
         
         deletions = 0
@@ -306,7 +311,11 @@
                 parent = by_path[os.path.dirname(path)]
             abspath = pathjoin(self.this_tree.basedir, path)
             kind = bzrlib.osutils.file_kind(abspath)
-            new_inventory[file_id] = (path, file_id, parent, kind)
+            if file_id in self.base_tree.inventory:
+                executable = getattr(self.base_tree.inventory[file_id], 'executable', False)
+            else:
+                executable = False
+            new_inventory[file_id] = (path, file_id, parent, kind, executable)
             by_path[path] = file_id 
 
         # Get a list in insertion order

=== modified file 'a/bzrlib/repository.py'
--- a/bzrlib/repository.py	
+++ b/bzrlib/repository.py	
@@ -33,7 +33,7 @@
 from bzrlib.store.versioned import VersionedFileStore, WeaveStore
 from bzrlib.store.text import TextStore
 from bzrlib.symbol_versioning import *
-from bzrlib.trace import mutter
+from bzrlib.trace import mutter, note
 from bzrlib.tree import RevisionTree
 from bzrlib.tsort import topo_sort
 from bzrlib.testament import Testament
@@ -96,7 +96,7 @@
             else:
                 # yes, this is not suitable for adding with ghosts.
                 self.add_inventory(rev_id, inv, rev.parent_ids)
-        self._revision_store.add_revision(rev, self.get_transaction())   
+        self._revision_store.add_revision(rev, self.get_transaction())
 
     @needs_read_lock
     def _all_possible_ids(self):
@@ -143,7 +143,7 @@
         getting file texts, inventories and revisions, then
         this construct will accept instances of those things.
         """
-        object.__init__(self)
+        super(Repository, self).__init__()
         self._format = _format
         # the following are part of the public API for Repository:
         self.bzrdir = a_bzrdir
@@ -156,6 +156,8 @@
         # 
         self.control_store = control_store
         self.control_weaves = control_store
+        # TODO: make sure to construct the right store classes, etc, depending
+        # on whether escaping is required.
 
     def lock_write(self):
         self.control_files.lock_write()
@@ -225,6 +227,20 @@
             result = self._format.initialize(a_bzrdir, shared=self.is_shared())
         self.copy_content_into(result, revision_id, basis)
         return result
+
+    # XXX: Is this actually called?
+    @needs_read_lock
+    def copy(self, destination):
+        raise NotImplementedError("Repository.copy() no longer required?")
+        destination.lock_write()
+        try:
+            copy_all(self.weave_store, destination.weave_store)
+            note('copying inventories')
+            destination.control_weaves.copy_multi(self.control_weaves,
+                ['inventory'])
+            copy_all(self.revision_store, destination.revision_store)
+        finally:
+            destination.unlock()
 
     @needs_read_lock
     def has_revision(self, revision_id):
@@ -905,14 +921,16 @@
                                   transport,
                                   control_files,
                                   prefixed=True,
-                                  versionedfile_class=WeaveFile):
+                                  versionedfile_class=WeaveFile,
+                                  escaped=False):
         weave_transport = control_files._transport.clone(name)
         dir_mode = control_files._dir_mode
         file_mode = control_files._file_mode
         return VersionedFileStore(weave_transport, prefixed=prefixed,
-                                dir_mode=dir_mode,
-                                file_mode=file_mode,
-                                versionedfile_class=versionedfile_class)
+                                  dir_mode=dir_mode,
+                                  file_mode=file_mode,
+                                  versionedfile_class=versionedfile_class,
+                                  escaped=escaped)
 
     def initialize(self, a_bzrdir, shared=False):
         """Initialize a repository of this format in a_bzrdir.
@@ -1259,6 +1277,8 @@
      - an optional 'shared-storage' flag
      - an optional 'no-working-trees' flag
      - a LockDir lock
+
+    This format was introduced in bzr 0.8.
     """
 
     def _get_control_store(self, repo_transport, control_files):
@@ -1280,11 +1300,13 @@
         from bzrlib.store.revision.knit import KnitRevisionStore
         versioned_file_store = VersionedFileStore(
             repo_transport,
-            file_mode = control_files._file_mode,
+            file_mode=control_files._file_mode,
             prefixed=False,
             precious=True,
             versionedfile_class=KnitVersionedFile,
-            versionedfile_kwargs={'delta':False, 'factory':KnitPlainFactory()})
+            versionedfile_kwargs={'delta':False, 'factory':KnitPlainFactory()},
+            escaped=True,
+            )
         return KnitRevisionStore(versioned_file_store)
 
     def _get_text_store(self, transport, control_files):
@@ -1292,7 +1314,8 @@
         return self._get_versioned_file_store('knits',
                                               transport,
                                               control_files,
-                                              versionedfile_class=KnitVersionedFile)
+                                              versionedfile_class=KnitVersionedFile,
+                                              escaped=True)
 
     def initialize(self, a_bzrdir, shared=False):
         """Create a knit format 1 repository.

=== modified file 'a/bzrlib/store/__init__.py'
--- a/bzrlib/store/__init__.py	
+++ b/bzrlib/store/__init__.py	
@@ -1,4 +1,4 @@
-# Copyright (C) 2005 by Canonical Development Ltd
+# Copyright (C) 2005, 2006 by Canonical Development Ltd
 
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -206,7 +206,10 @@
 
     def has_id(self, fileid, suffix=None):
         """See Store.has_id."""
-        return self._transport.has_any(self._id_to_names(fileid, suffix))
+        paths = self._id_to_names(fileid, suffix)
+        if not self._transport.has_any(paths):
+            return False
+        return True
 
     def _get_name(self, fileid, suffix=None):
         """A special check, which returns the name of an existing file.
@@ -238,7 +241,8 @@
         raise KeyError(fileid)
 
     def __init__(self, a_transport, prefixed=False, compressed=False,
-                 dir_mode=None, file_mode=None):
+                 dir_mode=None, file_mode=None,
+                 escaped=False):
         assert isinstance(a_transport, transport.Transport)
         super(TransportStore, self).__init__()
         self._transport = a_transport
@@ -246,15 +250,25 @@
         # FIXME RBC 20051128 this belongs in TextStore.
         self._compressed = compressed
         self._suffixes = set()
+        self._escaped = escaped
 
         # It is okay for these to be None, it just means they
         # will just use the filesystem defaults
         self._dir_mode = dir_mode
         self._file_mode = file_mode
 
+    def _unescape(self, file_id):
+        """If filename escaping is enabled for this store, unescape and return the filename."""
+        if self._escaped:
+            return urllib.unquote(file_id)
+        else:
+            return file_id
+
     def _iter_files_recursive(self):
         """Iterate through the files in the transport."""
         for quoted_relpath in self._transport.iter_files_recursive():
+            # transport iterator always returns quoted paths, regardless of
+            # escaping
             yield urllib.unquote(quoted_relpath)
 
     def __iter__(self):
@@ -269,7 +283,7 @@
                     if name.endswith('.' + suffix):
                         skip = True
             if not skip:
-                yield name
+                yield self._unescape(name)
 
     def __len__(self):
         return len(list(self.__iter__()))
@@ -284,11 +298,39 @@
         else:
             suffixes = []
         if self._prefixed:
-            path = [hash_prefix(fileid) + fileid]
-        else:
-            path = [fileid]
-        path.extend(suffixes)
-        return transport.urlescape(u'.'.join(path))
+            # hash_prefix adds the '/' separator
+            prefix = self.hash_prefix(fileid)
+        else:
+            prefix = ''
+        fileid = self._escape_file_id(fileid)
+        path = prefix + fileid
+        full_path = u'.'.join([path] + suffixes)
+        return transport.urlescape(full_path)
+
+    def _escape_file_id(self, file_id):
+        """Turn a file id into a filesystem safe string.
+
+        This is similar to a plain urllib.quote, except
+        it uses specific safe characters, so that it doesn't
+        have to translate a lot of valid file ids.
+        """
+        if not self._escaped:
+            return file_id
+        if isinstance(file_id, unicode):
+            file_id = file_id.encode('utf-8')
+        # @ does not get escaped. This is because it is a valid
+        # filesystem character we use all the time, and it looks
+        # a lot better than seeing %40 all the time.
+        safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
+        r = [((c in safe) and c or ('%%%02x' % ord(c)))
+             for c in file_id]
+        return ''.join(r)
+
+    def hash_prefix(self, fileid):
+        # fileid should be unescaped
+        if self._escaped:
+            fileid = self._escape_file_id(fileid)
+        return "%02x/" % (adler32(fileid) & 0xff)
 
     def __repr__(self):
         if self._transport is None:
@@ -331,9 +373,3 @@
 def copy_all(store_from, store_to, pb=None):
     """Copy all ids from one store to another."""
     store_to.copy_all_ids(store_from, pb)
-
-
-def hash_prefix(fileid):
-    return "%02x/" % (adler32(fileid) & 0xff)
-
-

=== modified file 'a/bzrlib/store/text.py'
--- a/bzrlib/store/text.py	
+++ b/bzrlib/store/text.py	
@@ -14,8 +14,7 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
-"""
-A store that keeps the full text of every version.
+"""A store that keeps the full text of every version.
 
 This store keeps uncompressed versions of the full text. It does not
 do any sort of delta compression.
@@ -23,7 +22,6 @@
 
 import os
 import bzrlib.store
-from bzrlib.store import hash_prefix
 from bzrlib.trace import mutter
 from bzrlib.errors import BzrError, NoSuchFile, FileExists
 
@@ -95,13 +93,13 @@
             raise KeyError(fileid + '-' + str(suffix))
 
         try:
-            result = other._transport.copy_to([path], self._transport, 
+            result = other._transport.copy_to([path], self._transport,
                                               mode=self._file_mode)
         except NoSuchFile:
             if not self._prefixed:
                 raise
             try:
-                self._transport.mkdir(hash_prefix(fileid)[:-1], mode=self._dir_mode)
+                self._transport.mkdir(self.hash_prefix(fileid)[:-1], mode=self._dir_mode)
             except FileExists:
                 pass
             result = other._transport.copy_to([path], self._transport,

=== modified file 'a/bzrlib/store/versioned/__init__.py'
--- a/bzrlib/store/versioned/__init__.py	
+++ b/bzrlib/store/versioned/__init__.py	
@@ -1,4 +1,4 @@
-# Copyright (C) 2005 Canonical Ltd
+# Copyright (C) 2005, 2006 Canonical Ltd
 
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -24,24 +24,28 @@
 import urllib
 
 from bzrlib.weavefile import read_weave, write_weave_v5
-from bzrlib.weave import WeaveFile
-from bzrlib.store import TransportStore, hash_prefix
+from bzrlib.weave import WeaveFile, Weave
+from bzrlib.store import TransportStore
 from bzrlib.atomicfile import AtomicFile
 from bzrlib.errors import NoSuchFile, FileExists
 from bzrlib.symbol_versioning import *
 from bzrlib.trace import mutter
+import bzrlib.ui
 
 
 class VersionedFileStore(TransportStore):
     """Collection of many versioned files in a transport."""
 
+    # TODO: Rather than passing versionedfile_kwargs, perhaps pass in a
+    # transport factory callable?
     def __init__(self, transport, prefixed=False, precious=False,
                  dir_mode=None, file_mode=None,
                  versionedfile_class=WeaveFile,
-                 versionedfile_kwargs={}):
-        super(WeaveStore, self).__init__(transport,
+                 versionedfile_kwargs={},
+                 escaped=False):
+        super(VersionedFileStore, self).__init__(transport,
                 dir_mode=dir_mode, file_mode=file_mode,
-                prefixed=prefixed, compressed=False)
+                prefixed=prefixed, compressed=False, escaped=escaped)
         self._precious = precious
         self._versionedfile_class = versionedfile_class
         self._versionedfile_kwargs = versionedfile_kwargs
@@ -63,10 +67,7 @@
 
     def filename(self, file_id):
         """Return the path relative to the transport root."""
-        if self._prefixed:
-            return hash_prefix(file_id) + urllib.quote(file_id)
-        else:
-            return urllib.quote(file_id)
+        return self._relpath(file_id)
 
     def __iter__(self):
         suffixes = self._versionedfile_class.get_suffixes()
@@ -74,10 +75,13 @@
         for relpath in self._iter_files_recursive():
             for suffix in suffixes:
                 if relpath.endswith(suffix):
-                    id = os.path.basename(relpath[:-len(suffix)])
-                    if not id in ids:
-                        yield id
-                        ids.add(id)
+                    # TODO: use standard remove_suffix function
+                    escaped_id = os.path.basename(relpath[:-len(suffix)])
+                    file_id = self._unescape(escaped_id)
+                    if file_id not in ids:
+                        ids.add(file_id)
+                        yield file_id
+                    break # only one suffix can match
 
     def has_id(self, fileid):
         suffixes = self._versionedfile_class.get_suffixes()
@@ -100,6 +104,19 @@
         for suffix in suffixes:
             self._transport.delete(filename + suffix)
         self._clear_cache_id(file_id, transaction)
+
+    def _get(self, file_id):
+        return self._transport.get(self.filename(file_id))
+
+    def _put(self, file_id, f):
+        fn = self.filename(file_id)
+        try:
+            return self._transport.put(fn, f, mode=self._file_mode)
+        except NoSuchFile:
+            if not self._prefixed:
+                raise
+            self._transport.mkdir(os.path.dirname(fn), mode=self._dir_mode)
+            return self._transport.put(fn, f, mode=self._file_mode)
 
     def get_weave(self, file_id, transaction):
         weave = transaction.map.find_weave(file_id)
@@ -128,7 +145,6 @@
 
         Returned as a list of lines.
         """
-        assert 0
         w = self.get_weave(file_id, transaction)
         return w.get_lines(rev_id)
     
@@ -151,8 +167,9 @@
                 # unexpected error - NoSuchFile is raised on a missing dir only and that
                 # only occurs when we are prefixed.
                 raise
-            self._transport.mkdir(hash_prefix(file_id), mode=self._dir_mode)
-            weave = self._versionedfile_class(self.filename(file_id), self._transport, self._file_mode, create=True,
+            self._transport.mkdir(self.hash_prefix(file_id), mode=self._dir_mode)
+            weave = self._versionedfile_class(self.filename(file_id), self._transport, 
+                                              self._file_mode, create=True,
                                               **self._versionedfile_kwargs)
         return weave
 
@@ -252,17 +269,16 @@
             # we are copying single objects, and there may be open tranasactions
             # so again with the passthrough
             to_transaction = PassThroughTransaction()
+        pb = bzrlib.ui.ui_factory.nested_progress_bar()
         for count, f in enumerate(file_ids):
             mutter("copy weave {%s} into %s", f, self)
-            if pb:
-                pb.update('copy', count, len(file_ids))
+            pb.update('copy', count, len(file_ids))
             # if we have it in cache, its faster.
             # joining is fast with knits, and bearable for weaves -
             # indeed the new case can be optimised if needed.
             target = self._make_new_versionedfile(f, to_transaction)
             target.join(from_store.get_weave(f, from_transaction))
-        if pb:
-            pb.clear()
+        pb.finished()
 
     def total_size(self):
         count, bytes =  super(VersionedFileStore, self).total_size()

=== modified file 'a/bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py	
+++ b/bzrlib/tests/__init__.py	
@@ -935,6 +935,7 @@
                    'bzrlib.tests.test_diff',
                    'bzrlib.tests.test_doc_generate',
                    'bzrlib.tests.test_errors',
+                   'bzrlib.tests.test_escaped_store',
                    'bzrlib.tests.test_fetch',
                    'bzrlib.tests.test_gpg',
                    'bzrlib.tests.test_graph',

=== modified file 'a/bzrlib/tests/test_branch.py'
--- a/bzrlib/tests/test_branch.py	
+++ b/bzrlib/tests/test_branch.py	
@@ -72,6 +72,43 @@
         self.assertIsDirectory('.bzr/branch/lock', t)
         branch.lock_write()
         self.assertIsDirectory('.bzr/branch/lock/held', t)
+
+
+class TestBranchEscaping(TestCaseWithTransport):
+    """Test a branch can be correctly stored and used on a vfat-like transport
+    
+    Makes sure we have proper escaping of invalid characters, etc.
+
+    It'd be better to test all operations on the FakeVFATTransportDecorator,
+    but working trees go straight to the os not through the Transport layer.
+    Therefore we build some history first in the regular way and then 
+    check it's safe to access for vfat.
+    """
+
+    FOO_ID = 'foo<:>ID'
+    REV_ID = 'revid-1'
+
+    def setUp(self):
+        super(TestBranchEscaping, self).setUp()
+        from bzrlib.repository import RepositoryFormatKnit1
+        bzrdir = BzrDirMetaFormat1().initialize(self.get_url())
+        repo = RepositoryFormatKnit1().initialize(bzrdir)
+        branch = bzrdir.create_branch()
+        wt = bzrdir.create_workingtree()
+        self.build_tree_contents([("foo", "contents of foo")])
+        # add file with id containing wierd characters
+        wt.add(['foo'], [self.FOO_ID])
+        wt.commit('this is my new commit', rev_id=self.REV_ID)
+
+    def test_branch_on_vfat(self):
+        from bzrlib.transport.fakevfat import FakeVFATTransportDecorator
+        # now access over vfat; should be safe
+        transport = FakeVFATTransportDecorator('vfat+' + self.get_url())
+        bzrdir, junk = BzrDir.open_containing_from_transport(transport)
+        branch = bzrdir.open_branch()
+        revtree = branch.repository.revision_tree(self.REV_ID)
+        contents = revtree.get_file_text(self.FOO_ID)
+        self.assertEqual(contents, 'contents of foo')
 
 
 class SampleBranchFormat(bzrlib.branch.BranchFormat):

=== modified file 'a/bzrlib/tests/test_store.py'
--- a/bzrlib/tests/test_store.py	
+++ b/bzrlib/tests/test_store.py	
@@ -399,6 +399,12 @@
         my_store = store.TransportStore(MemoryTransport())
         self.assertEqual('%25', my_store._relpath('%'))
 
+    def test_escaped_uppercase(self):
+        """Uppercase letters are escaped for safety on Windows"""
+        my_store = store.TransportStore(MemoryTransport(), escaped=True)
+        # a particularly perverse file-id! :-)
+        self.assertEquals(my_store._escape_file_id('C:<>'), '%43%3a%3c%3e')
+
 
 class TestVersionFileStore(TestCaseWithTransport):
 

=== modified file 'a/bzrlib/tests/test_transport.py'
--- a/bzrlib/tests/test_transport.py	
+++ b/bzrlib/tests/test_transport.py	
@@ -275,6 +275,31 @@
                           transport.rename, 'from', 'to')
 
 
+class FakeVFATDecoratorTests(TestCaseInTempDir):
+    """Tests for simulation of VFAT restrictions"""
+
+    def get_vfat_transport(self, url):
+        """Return vfat-backed transport for test directory"""
+        from bzrlib.transport.fakevfat import FakeVFATTransportDecorator
+        return FakeVFATTransportDecorator('vfat+' + url)
+
+    def test_transport_creation(self):
+        from bzrlib.transport.fakevfat import FakeVFATTransportDecorator
+        transport = self.get_vfat_transport('.')
+        self.assertIsInstance(transport, FakeVFATTransportDecorator)
+
+    def test_transport_mkdir(self):
+        transport = self.get_vfat_transport('.')
+        transport.mkdir('HELLO')
+        self.assertTrue(transport.has('hello'))
+        self.assertTrue(transport.has('Hello'))
+
+    def test_forbidden_chars(self):
+        transport = self.get_vfat_transport('.')
+        self.assertRaises(ValueError, transport.has, "<NU>")
+
+
+
 class BadTransportHandler(Transport):
     def __init__(self, base_url):
         raise DependencyNotPresent('some_lib', 'testing missing dependency')

=== modified file 'a/bzrlib/tests/test_transport_implementations.py'
--- a/bzrlib/tests/test_transport_implementations.py	
+++ b/bzrlib/tests/test_transport_implementations.py	
@@ -246,6 +246,9 @@
     def test_mkdir_permissions(self):
         t = self.get_transport()
         if t.is_readonly():
+            return
+        if not t._can_roundtrip_unix_modebits():
+            # no sense testing on this transport
             return
         # Test mkdir with a mode
         t.mkdir('dmode755', mode=0755)

=== modified file 'a/bzrlib/transport/__init__.py'
--- a/bzrlib/transport/__init__.py	
+++ b/bzrlib/transport/__init__.py	
@@ -616,6 +616,22 @@
         """Return true if this connection cannot be written to."""
         return False
 
+    def _can_roundtrip_unix_modebits(self):
+        """Return true if this transport can store and retrieve unix modebits.
+
+        (For example, 0700 to make a directory owner-private.)
+        
+        Note: most callers will not want to switch on this, but should rather 
+        just try and set permissions and let them be either stored or not.
+        This is intended mainly for the use of the test suite.
+        
+        Warning: this is not guaranteed to be accurate as sometimes we can't 
+        be sure: for example with vfat mounted on unix, or a windows sftp
+        server."""
+        # TODO: Perhaps return a e.g. TransportCharacteristics that can answer
+        # several questions about the transport.
+        return False
+
 
 def get_transport(base):
     """Open a transport to access a URL or directory.
@@ -651,7 +667,8 @@
 
 def urlescape(relpath):
     """Escape relpath to be a valid url."""
-    # TODO utf8 it first. utf8relpath = relpath.encode('utf8')
+    if isinstance(relpath, unicode):
+        relpath = relpath.encode('utf-8')
     return urllib.quote(relpath)
 
 
@@ -788,3 +805,6 @@
 register_lazy_transport('memory:/', 'bzrlib.transport.memory', 'MemoryTransport')
 register_lazy_transport('readonly+', 'bzrlib.transport.readonly', 'ReadonlyTransportDecorator')
 register_lazy_transport('fakenfs+', 'bzrlib.transport.fakenfs', 'FakeNFSTransportDecorator')
+register_lazy_transport('vfat+', 
+                        'bzrlib.transport.fakevfat',
+                        'FakeVFATTransportDecorator')

=== modified file 'a/bzrlib/transport/decorator.py'
--- a/bzrlib/transport/decorator.py	
+++ b/bzrlib/transport/decorator.py	
@@ -42,7 +42,9 @@
         
         _decorated is a private parameter for cloning."""
         prefix = self._get_url_prefix()
-        assert url.startswith(prefix)
+        assert url.startswith(prefix), \
+                "url %r doesn't start with decorator prefix %r" % \
+                (url, prefix)
         decorated_url = url[len(prefix):]
         if _decorated is None:
             self._decorated = get_transport(decorated_url)
@@ -109,6 +111,9 @@
     def list_dir(self, relpath):
         """See Transport.list_dir()."""
         return self._decorated.list_dir(relpath)
+
+    def rename(self, rel_from, rel_to):
+        return self._decorated.rename(rel_from, rel_to)
     
     def rmdir(self, relpath):
         """See Transport.rmdir."""

=== modified file 'a/bzrlib/transport/local.py'
--- a/bzrlib/transport/local.py	
+++ b/bzrlib/transport/local.py	
@@ -20,6 +20,7 @@
 
 import os
 import shutil
+import sys
 from stat import ST_MODE, S_ISDIR, ST_SIZE
 import tempfile
 import urllib
@@ -275,6 +276,13 @@
         except (IOError, OSError),e:
             self._translate_error(e, path)
 
+    def _can_roundtrip_unix_modebits(self):
+        if sys.platform == 'win32':
+            # anyone else?
+            return False
+        else:
+            return True
+
 
 class ScratchTransport(LocalTransport):
     """A transport that works in a temporary dir and cleans up after itself.

=== modified file 'a/bzrlib/workingtree.py'
--- a/bzrlib/workingtree.py	
+++ b/bzrlib/workingtree.py	
@@ -93,8 +93,8 @@
 from bzrlib.symbol_versioning import *
 from bzrlib.textui import show_status
 import bzrlib.tree
-from bzrlib.trace import mutter
 from bzrlib.transform import build_tree
+from bzrlib.trace import mutter, note
 from bzrlib.transport import get_transport
 from bzrlib.transport.local import LocalTransport
 import bzrlib.ui
@@ -1162,7 +1162,7 @@
                                       InventoryFile,
                                       InventoryLink)
         inv = Inventory(self.get_root_id())
-        for path, file_id, parent, kind in new_inventory_list:
+        for path, file_id, parent, kind, executable in new_inventory_list:
             name = os.path.basename(path)
             if name == "":
                 continue
@@ -1170,7 +1170,9 @@
             if kind == 'directory':
                 inv.add(InventoryDirectory(file_id, name, parent))
             elif kind == 'file':
-                inv.add(InventoryFile(file_id, name, parent))
+                ie = InventoryFile(file_id, name, parent)
+                ie.executable = executable
+                inv.add(ie)
             elif kind == 'symlink':
                 inv.add(InventoryLink(file_id, name, parent))
             else:



More information about the bazaar mailing list