Rev 3372: * New ``versionedfile.KeyMapper`` interface to abstract out the access to in http://people.ubuntu.com/~robertc/baz2.0/versioned_files

Robert Collins robertc at robertcollins.net
Fri May 2 04:30:24 BST 2008


At http://people.ubuntu.com/~robertc/baz2.0/versioned_files

------------------------------------------------------------
revno: 3372
revision-id: robertc at robertcollins.net-20080502033007-j9tl9gdrx4yxmtm9
parent: robertc at robertcollins.net-20080430062907-oodfp972lp0m4b1s
committer: Robert Collins <robertc at robertcollins.net>
branch nick: KeyMapper
timestamp: Fri 2008-05-02 13:30:07 +1000
message:
   * New ``versionedfile.KeyMapper`` interface to abstract out the access to
     underyling .knit/.kndx etc files in repositories with partitioned
     storage. (Robert Collins)
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/bzrdir.py               bzrdir.py-20060131065624-156dfea39c4387cb
  bzrlib/fetch.py                fetch.py-20050818234941-26fea6105696365d
  bzrlib/repofmt/knitrepo.py     knitrepo.py-20070206081537-pyy4a00xdas0j4pf-1
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
  bzrlib/store/__init__.py       store.py-20050309040759-164dc5173d6406c2
  bzrlib/store/text.py           text.py-20050928201105-c26468dcb5d9b18b
  bzrlib/store/versioned/__init__.py weavestore.py-20050907094258-88262e0434babab9
  bzrlib/tests/test_escaped_store.py test_escaped_store.py-20060216023929-6bcb9a067344959f
  bzrlib/tests/test_store.py     teststore.py-20050826022702-f6caadb647395769
  bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
  bzrlib/versionedfile.py        versionedfile.py-20060222045106-5039c71ee3b65490
=== modified file 'NEWS'
--- a/NEWS	2008-04-30 06:29:07 +0000
+++ b/NEWS	2008-05-02 03:30:07 +0000
@@ -62,6 +62,10 @@
 
     * Implement xml8 serializer.  (Aaron Bentley)
 
+    * New ``versionedfile.KeyMapper`` interface to abstract out the access to
+      underyling .knit/.kndx etc files in repositories with partitioned
+      storage. (Robert Collins)
+
   API BREAKS:
 
     * Repository.get_data_stream, Repository.get_data_stream_for_search(),

=== modified file 'bzrlib/bzrdir.py'
--- a/bzrlib/bzrdir.py	2008-04-14 08:03:35 +0000
+++ b/bzrlib/bzrdir.py	2008-05-02 03:30:07 +0000
@@ -43,6 +43,7 @@
     graph,
     lockable_files,
     lockdir,
+    osutils,
     registry,
     remote,
     revision as _mod_revision,
@@ -2240,16 +2241,17 @@
                 if (filename.endswith(".weave") or
                     filename.endswith(".gz") or
                     filename.endswith(".sig")):
-                    file_id = os.path.splitext(filename)[0]
+                    file_id, suffix = os.path.splitext(filename)
                 else:
                     file_id = filename
-                prefix_dir = store.hash_prefix(file_id)
+                    suffix = ''
+                new_name = store._mapper.map((file_id,)) + suffix
                 # FIXME keep track of the dirs made RBC 20060121
                 try:
-                    store_transport.move(filename, prefix_dir + '/' + filename)
+                    store_transport.move(filename, new_name)
                 except errors.NoSuchFile: # catches missing dirs strangely enough
-                    store_transport.mkdir(prefix_dir)
-                    store_transport.move(filename, prefix_dir + '/' + filename)
+                    store_transport.mkdir(osutils.dirname(new_name))
+                    store_transport.move(filename, new_name)
         self.bzrdir._control_files.put_utf8('branch-format', BzrDirFormat6().get_format_string())
 
 

=== modified file 'bzrlib/fetch.py'
--- a/bzrlib/fetch.py	2008-04-30 04:43:26 +0000
+++ b/bzrlib/fetch.py	2008-05-02 03:30:07 +0000
@@ -205,7 +205,7 @@
             return self.to_repository.search_missing_revision_ids(
                 self.from_repository, self._last_revision,
                 find_ghosts=self.find_ghosts)
-        except errors.NoSuchRevision:
+        except errors.NoSuchRevision, e:
             raise InstallFailed([self._last_revision])
 
     def _fetch_weave_text(self, file_id, required_versions):

=== modified file 'bzrlib/repofmt/knitrepo.py'
--- a/bzrlib/repofmt/knitrepo.py	2008-04-25 23:14:10 +0000
+++ b/bzrlib/repofmt/knitrepo.py	2008-05-02 03:30:07 +0000
@@ -374,7 +374,7 @@
             versionedfile_kwargs={'delta':False,
                                   'factory':knit.KnitPlainFactory(),
                                  },
-            escaped=True,
+            escaped=False,
             )
         return KnitRevisionStore(versioned_file_store)
 

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2008-04-04 00:06:58 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2008-05-02 03:30:07 +0000
@@ -2126,7 +2126,7 @@
             versionedfile_kwargs={'delta': False,
                                   'factory': knit.KnitPlainFactory(),
                                  },
-            escaped=True,
+            escaped=False,
             )
         return KnitRevisionStore(versioned_file_store)
 

=== modified file 'bzrlib/store/__init__.py'
--- a/bzrlib/store/__init__.py	2007-09-25 08:14:12 +0000
+++ b/bzrlib/store/__init__.py	2008-05-02 03:30:07 +0000
@@ -35,6 +35,7 @@
     osutils,
     symbol_versioning,
     urlutils,
+    versionedfile,
     )
 from bzrlib.errors import BzrError, UnlistableStore, TransportNotPossible
 from bzrlib.symbol_versioning import (
@@ -189,8 +190,9 @@
         raise NotImplementedError('children need to implement this function.')
 
     def _check_fileid(self, fileid):
-        if not isinstance(fileid, basestring):
-            raise TypeError('Fileids should be a string type: %s %r' % (type(fileid), fileid))
+        if type(fileid) != str:
+            raise TypeError('Fileids should be bytestrings: %s %r' % (
+                type(fileid), fileid))
         if '\\' in fileid or '/' in fileid:
             raise ValueError("invalid store id %r" % fileid)
 
@@ -257,13 +259,16 @@
         # will just use the filesystem defaults
         self._dir_mode = dir_mode
         self._file_mode = file_mode
-
-    def _unescape(self, file_id):
-        """If filename escaping is enabled for this store, unescape and return the filename."""
-        if self._escaped:
-            return urllib.unquote(file_id)
+        # Create a key mapper to use
+        if escaped and prefixed:
+            self._mapper = versionedfile.HashEscapedPrefixMapper()
+        elif not escaped and prefixed:
+            self._mapper = versionedfile.HashPrefixMapper()
+        elif self._escaped:
+            import pdb;pdb.set_trace()
+            raise ValueError("escaped unprefixed stores are not permitted.")
         else:
-            return file_id
+            self._mapper = versionedfile.PrefixMapper()
 
     def _iter_files_recursive(self):
         """Iterate through the files in the transport."""
@@ -284,7 +289,7 @@
                     if name.endswith('.' + suffix):
                         skip = True
             if not skip:
-                yield self._unescape(name)
+                yield self._mapper.unmap(name)[0]
 
     def __len__(self):
         return len(list(self.__iter__()))
@@ -298,41 +303,10 @@
                 self._check_fileid(suffix)
         else:
             suffixes = []
-        fileid = self._escape_file_id(fileid)
-        if self._prefixed:
-            # hash_prefix adds the '/' separator
-            prefix = self.hash_prefix(fileid, escaped=True)
-        else:
-            prefix = ''
-        path = prefix + fileid
+        path = self._mapper.map((fileid,))
         full_path = u'.'.join([path] + suffixes)
         return urlutils.escape(full_path)
 
-    def _escape_file_id(self, file_id):
-        """Turn a file id into a filesystem safe string.
-
-        This is similar to a plain urllib.quote, except
-        it uses specific safe characters, so that it doesn't
-        have to translate a lot of valid file ids.
-        """
-        if not self._escaped:
-            return file_id
-        if isinstance(file_id, unicode):
-            file_id = file_id.encode('utf-8')
-        # @ does not get escaped. This is because it is a valid
-        # filesystem character we use all the time, and it looks
-        # a lot better than seeing %40 all the time.
-        safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
-        r = [((c in safe) and c or ('%%%02x' % ord(c)))
-             for c in file_id]
-        return ''.join(r)
-
-    def hash_prefix(self, fileid, escaped=False):
-        # fileid should be unescaped
-        if not escaped and self._escaped:
-            fileid = self._escape_file_id(fileid)
-        return "%02x/" % (adler32(fileid) & 0xff)
-
     def __repr__(self):
         if self._transport is None:
             return "%s(None)" % (self.__class__.__name__)

=== modified file 'bzrlib/store/text.py'
--- a/bzrlib/store/text.py	2006-10-05 05:37:25 +0000
+++ b/bzrlib/store/text.py	2008-05-02 03:30:07 +0000
@@ -20,13 +20,15 @@
 do any sort of delta compression.
 """
 
+import gzip
 import os
+from cStringIO import StringIO
+
+from bzrlib import osutils
+from bzrlib.errors import BzrError, NoSuchFile, FileExists
 import bzrlib.store
 from bzrlib.trace import mutter
-from bzrlib.errors import BzrError, NoSuchFile, FileExists
 
-import gzip
-from cStringIO import StringIO
 
 
 class TextStore(bzrlib.store.TransportStore):
@@ -99,7 +101,7 @@
             if not self._prefixed:
                 raise
             try:
-                self._transport.mkdir(self.hash_prefix(fileid)[:-1], mode=self._dir_mode)
+                self._transport.mkdir(osutils.dirname(path), mode=self._dir_mode)
             except FileExists:
                 pass
             result = other._transport.copy_to([path], self._transport,

=== modified file 'bzrlib/store/versioned/__init__.py'
--- a/bzrlib/store/versioned/__init__.py	2008-04-28 03:52:09 +0000
+++ b/bzrlib/store/versioned/__init__.py	2008-05-02 03:30:07 +0000
@@ -69,7 +69,7 @@
                 if relpath.endswith(suffix):
                     # TODO: use standard remove_suffix function
                     escaped_id = os.path.basename(relpath[:-len(suffix)])
-                    file_id = self._unescape(escaped_id)
+                    file_id = self._mapper.unmap(escaped_id)[0]
                     if file_id not in ids:
                         ids.add(file_id)
                         yield file_id
@@ -153,7 +153,8 @@
                 # unexpected error - NoSuchFile is expected to be raised on a
                 # missing dir only and that only occurs when we are prefixed.
                 raise
-            self._transport.mkdir(self.hash_prefix(file_id), mode=self._dir_mode)
+            dirname = osutils.dirname(_filename)
+            self._transport.mkdir(dirname, mode=self._dir_mode)
             weave = self._versionedfile_class(_filename, self._transport,
                                               self._file_mode, create=True,
                                               get_scope=self.get_scope,

=== modified file 'bzrlib/tests/test_escaped_store.py'
--- a/bzrlib/tests/test_escaped_store.py	2007-09-25 08:14:12 +0000
+++ b/bzrlib/tests/test_escaped_store.py	2008-05-02 03:30:07 +0000
@@ -34,15 +34,6 @@
         t = bzrlib.transport.get_transport(self.get_url())
         return TextStore(t, prefixed=prefixed, escaped=escaped)
 
-    def test_paths(self):
-        text_store = self.get_store()
-
-        self.assertEqual('a', text_store._relpath('a'))
-        self.assertEqual('a', text_store._relpath(u'a'))
-        self.assertEqual('%2520', text_store._relpath(' '))
-        self.assertEqual('%40%253a%253c%253e', text_store._relpath('@:<>'))
-        self.assertEqual('%25c3%25a5', text_store._relpath(u'\xe5'))
-
     def test_prefixed(self):
         # Prefix should be determined by unescaped string
         text_store = self.get_store(prefixed=True)
@@ -58,7 +49,6 @@
         self.assertEqual('88/%2520', text_store._relpath(' '))
         self.assertEqual('72/%40%253a%253c%253e',
                 text_store._relpath('@:<>'))
-        self.assertEqual('77/%25c3%25a5', text_store._relpath(u'\xe5'))
 
     def test_files(self):
         text_store = self.get_store(prefixed=True)

=== modified file 'bzrlib/tests/test_store.py'
--- a/bzrlib/tests/test_store.py	2008-04-28 03:52:09 +0000
+++ b/bzrlib/tests/test_store.py	2008-05-02 03:30:07 +0000
@@ -398,9 +398,10 @@
 
     def test_escaped_uppercase(self):
         """Uppercase letters are escaped for safety on Windows"""
-        my_store = store.TransportStore(MemoryTransport(), escaped=True)
+        my_store = store.TransportStore(MemoryTransport(), prefixed=True,
+            escaped=True)
         # a particularly perverse file-id! :-)
-        self.assertEquals(my_store._escape_file_id('C:<>'), '%43%3a%3c%3e')
+        self.assertEquals(my_store._relpath('C:<>'), 'be/%2543%253a%253c%253e')
 
 
 class TestVersionFileStore(TestCaseWithTransport):
@@ -433,3 +434,14 @@
         self._transaction = transactions.ReadOnlyTransaction()
         vf = self.vfstore.get_weave_or_empty('id', self._transaction)
         self.assertRaises(errors.ReadOnlyError, vf.add_lines, 'b', [], [])
+
+    def test___iter__escaped(self):
+        self.vfstore = store.versioned.VersionedFileStore(MemoryTransport(),
+            prefixed=True, escaped=True)
+        self.vfstore.get_scope = self.get_scope
+        self._transaction = transactions.WriteTransaction()
+        vf = self.vfstore.get_weave_or_empty(' ', self._transaction)
+        vf.add_lines('a', [], [])
+        del vf
+        self._transaction.finish()
+        self.assertEqual([' '], list(self.vfstore))

=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py	2008-04-30 05:32:03 +0000
+++ b/bzrlib/tests/test_versionedfile.py	2008-05-02 03:30:07 +0000
@@ -1545,3 +1545,38 @@
         self.assertEqual('base\nleft\nright\nmerged', delta_data)
         self.assertEqual([('get_lines', 'left')], logged_vf.calls)
 
+
+class TestKeyMapper(TestCaseWithMemoryTransport):
+    """Tests for various key mapping logic."""
+
+    def test_identity_mapper(self):
+        mapper = versionedfile.ConstantMapper("inventory")
+        self.assertEqual("inventory", mapper.map(('foo at ar',)))
+        self.assertEqual("inventory", mapper.map(('quux',)))
+
+    def test_prefix_mapper(self):
+        #format5: plain
+        mapper = versionedfile.PrefixMapper()
+        self.assertEqual("file-id", mapper.map(("file-id", "revision-id")))
+        self.assertEqual("new-id", mapper.map(("new-id", "revision-id")))
+        self.assertEqual(('file-id',), mapper.unmap("file-id"))
+        self.assertEqual(('new-id',), mapper.unmap("new-id"))
+
+    def test_hash_prefix_mapper(self):
+        #format6: hash + plain
+        mapper = versionedfile.HashPrefixMapper()
+        self.assertEqual("9b/file-id", mapper.map(("file-id", "revision-id")))
+        self.assertEqual("45/new-id", mapper.map(("new-id", "revision-id")))
+        self.assertEqual(('file-id',), mapper.unmap("9b/file-id"))
+        self.assertEqual(('new-id',), mapper.unmap("45/new-id"))
+
+    def test_hash_escaped_mapper(self):
+        #knit1: hash + escaped
+        mapper = versionedfile.HashEscapedPrefixMapper()
+        self.assertEqual("88/%20", mapper.map((" ", "revision-id")))
+        self.assertEqual("ed/fil%45-%49d", mapper.map(("filE-Id",
+            "revision-id")))
+        self.assertEqual("88/ne%57-%49d", mapper.map(("neW-Id",
+            "revision-id")))
+        self.assertEqual(('filE-Id',), mapper.unmap("ed/fil%45-%49d"))
+        self.assertEqual(('neW-Id',), mapper.unmap("88/ne%57-%49d"))

=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py	2008-04-30 05:32:03 +0000
+++ b/bzrlib/versionedfile.py	2008-05-02 03:30:07 +0000
@@ -19,6 +19,10 @@
 
 """Versioned text file storage api."""
 
+from cStringIO import StringIO
+import urllib
+from zlib import adler32
+
 from bzrlib.lazy_import import lazy_import
 lazy_import(globals(), """
 
@@ -33,9 +37,6 @@
 from bzrlib.graph import Graph
 from bzrlib.transport.memory import MemoryTransport
 """)
-
-from cStringIO import StringIO
-
 from bzrlib.inter import InterObject
 from bzrlib.registry import Registry
 from bzrlib.symbol_versioning import *
@@ -799,3 +800,92 @@
                     else:
                         new_version_ids.add(version)
                 return new_version_ids
+
+
+class KeyMapper(object):
+    """KeyMappers map between keys and underlying paritioned storage."""
+
+    def map(self, key):
+        """Map key to an underlying storage identifier.
+
+        :param key: A key tuple e.g. ('file-id', 'revision-id').
+        :return: An underlying storage identifier, specific to the partitioning
+            mechanism.
+        """
+
+    def unmap(self, partition_id):
+        """Map a partitioned storage id back to a key prefix.
+        
+        :param partition_id: The underlying partition id.
+        :return: As much of a key (or prefix) as is derivable from the parition
+            id.
+        """
+
+
+class ConstantMapper(KeyMapper):
+    """A key mapper that maps to a constant result."""
+
+    def __init__(self, result):
+        """Create a ConstantMapper which will return result for all maps."""
+        self._result = result
+
+    def map(self, key):
+        """See KeyMapper.map()."""
+        return self._result
+
+
+class PrefixMapper(KeyMapper):
+    """A key mapper that extracts the first component of a key."""
+
+    def map(self, key):
+        """See KeyMapper.map()."""
+        return key[0]
+
+    def unmap(self, partition_id):
+        """See KeyMapper.unmap()."""
+        return (partition_id,)
+
+
+class HashPrefixMapper(KeyMapper):
+    """A key mapper that combines the first component of a key with a hash."""
+
+    def map(self, key):
+        """See KeyMapper.map()."""
+        prefix = self._escape(key[0])
+        return "%02x/%s" % (adler32(prefix) & 0xff, prefix)
+
+    def _escape(self, prefix):
+        """No escaping needed here."""
+        return prefix
+
+    def unmap(self, partition_id):
+        """See KeyMapper.unmap()."""
+        return (self._unescape(osutils.basename(partition_id)),)
+
+    def _unescape(self, basename):
+        """No unescaping needed for HashPrefixMapper."""
+        return basename
+
+
+class HashEscapedPrefixMapper(HashPrefixMapper):
+    """Combines the escaped first component of a key with a hash."""
+
+    _safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
+
+    def _escape(self, prefix):
+        """Turn a key element into a filesystem safe string.
+
+        This is similar to a plain urllib.quote, except
+        it uses specific safe characters, so that it doesn't
+        have to translate a lot of valid file ids.
+        """
+        # @ does not get escaped. This is because it is a valid
+        # filesystem character we use all the time, and it looks
+        # a lot better than seeing %40 all the time.
+        r = [((c in self._safe) and c or ('%%%02x' % ord(c)))
+             for c in prefix]
+        return ''.join(r)
+
+    def _unescape(self, basename):
+        """Escaped names are unescaped by urlutils."""
+        return urllib.unquote(basename)




More information about the bazaar-commits mailing list