Rev 3372: * New ``versionedfile.KeyMapper`` interface to abstract out the access to in http://people.ubuntu.com/~robertc/baz2.0/versioned_files
Robert Collins
robertc at robertcollins.net
Fri May 2 04:30:24 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/versioned_files
------------------------------------------------------------
revno: 3372
revision-id: robertc at robertcollins.net-20080502033007-j9tl9gdrx4yxmtm9
parent: robertc at robertcollins.net-20080430062907-oodfp972lp0m4b1s
committer: Robert Collins <robertc at robertcollins.net>
branch nick: KeyMapper
timestamp: Fri 2008-05-02 13:30:07 +1000
message:
* New ``versionedfile.KeyMapper`` interface to abstract out the access to
underyling .knit/.kndx etc files in repositories with partitioned
storage. (Robert Collins)
modified:
NEWS NEWS-20050323055033-4e00b5db738777ff
bzrlib/bzrdir.py bzrdir.py-20060131065624-156dfea39c4387cb
bzrlib/fetch.py fetch.py-20050818234941-26fea6105696365d
bzrlib/repofmt/knitrepo.py knitrepo.py-20070206081537-pyy4a00xdas0j4pf-1
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/store/__init__.py store.py-20050309040759-164dc5173d6406c2
bzrlib/store/text.py text.py-20050928201105-c26468dcb5d9b18b
bzrlib/store/versioned/__init__.py weavestore.py-20050907094258-88262e0434babab9
bzrlib/tests/test_escaped_store.py test_escaped_store.py-20060216023929-6bcb9a067344959f
bzrlib/tests/test_store.py teststore.py-20050826022702-f6caadb647395769
bzrlib/tests/test_versionedfile.py test_versionedfile.py-20060222045249-db45c9ed14a1c2e5
bzrlib/versionedfile.py versionedfile.py-20060222045106-5039c71ee3b65490
=== modified file 'NEWS'
--- a/NEWS 2008-04-30 06:29:07 +0000
+++ b/NEWS 2008-05-02 03:30:07 +0000
@@ -62,6 +62,10 @@
* Implement xml8 serializer. (Aaron Bentley)
+ * New ``versionedfile.KeyMapper`` interface to abstract out the access to
+ underyling .knit/.kndx etc files in repositories with partitioned
+ storage. (Robert Collins)
+
API BREAKS:
* Repository.get_data_stream, Repository.get_data_stream_for_search(),
=== modified file 'bzrlib/bzrdir.py'
--- a/bzrlib/bzrdir.py 2008-04-14 08:03:35 +0000
+++ b/bzrlib/bzrdir.py 2008-05-02 03:30:07 +0000
@@ -43,6 +43,7 @@
graph,
lockable_files,
lockdir,
+ osutils,
registry,
remote,
revision as _mod_revision,
@@ -2240,16 +2241,17 @@
if (filename.endswith(".weave") or
filename.endswith(".gz") or
filename.endswith(".sig")):
- file_id = os.path.splitext(filename)[0]
+ file_id, suffix = os.path.splitext(filename)
else:
file_id = filename
- prefix_dir = store.hash_prefix(file_id)
+ suffix = ''
+ new_name = store._mapper.map((file_id,)) + suffix
# FIXME keep track of the dirs made RBC 20060121
try:
- store_transport.move(filename, prefix_dir + '/' + filename)
+ store_transport.move(filename, new_name)
except errors.NoSuchFile: # catches missing dirs strangely enough
- store_transport.mkdir(prefix_dir)
- store_transport.move(filename, prefix_dir + '/' + filename)
+ store_transport.mkdir(osutils.dirname(new_name))
+ store_transport.move(filename, new_name)
self.bzrdir._control_files.put_utf8('branch-format', BzrDirFormat6().get_format_string())
=== modified file 'bzrlib/fetch.py'
--- a/bzrlib/fetch.py 2008-04-30 04:43:26 +0000
+++ b/bzrlib/fetch.py 2008-05-02 03:30:07 +0000
@@ -205,7 +205,7 @@
return self.to_repository.search_missing_revision_ids(
self.from_repository, self._last_revision,
find_ghosts=self.find_ghosts)
- except errors.NoSuchRevision:
+ except errors.NoSuchRevision, e:
raise InstallFailed([self._last_revision])
def _fetch_weave_text(self, file_id, required_versions):
=== modified file 'bzrlib/repofmt/knitrepo.py'
--- a/bzrlib/repofmt/knitrepo.py 2008-04-25 23:14:10 +0000
+++ b/bzrlib/repofmt/knitrepo.py 2008-05-02 03:30:07 +0000
@@ -374,7 +374,7 @@
versionedfile_kwargs={'delta':False,
'factory':knit.KnitPlainFactory(),
},
- escaped=True,
+ escaped=False,
)
return KnitRevisionStore(versioned_file_store)
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2008-04-04 00:06:58 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2008-05-02 03:30:07 +0000
@@ -2126,7 +2126,7 @@
versionedfile_kwargs={'delta': False,
'factory': knit.KnitPlainFactory(),
},
- escaped=True,
+ escaped=False,
)
return KnitRevisionStore(versioned_file_store)
=== modified file 'bzrlib/store/__init__.py'
--- a/bzrlib/store/__init__.py 2007-09-25 08:14:12 +0000
+++ b/bzrlib/store/__init__.py 2008-05-02 03:30:07 +0000
@@ -35,6 +35,7 @@
osutils,
symbol_versioning,
urlutils,
+ versionedfile,
)
from bzrlib.errors import BzrError, UnlistableStore, TransportNotPossible
from bzrlib.symbol_versioning import (
@@ -189,8 +190,9 @@
raise NotImplementedError('children need to implement this function.')
def _check_fileid(self, fileid):
- if not isinstance(fileid, basestring):
- raise TypeError('Fileids should be a string type: %s %r' % (type(fileid), fileid))
+ if type(fileid) != str:
+ raise TypeError('Fileids should be bytestrings: %s %r' % (
+ type(fileid), fileid))
if '\\' in fileid or '/' in fileid:
raise ValueError("invalid store id %r" % fileid)
@@ -257,13 +259,16 @@
# will just use the filesystem defaults
self._dir_mode = dir_mode
self._file_mode = file_mode
-
- def _unescape(self, file_id):
- """If filename escaping is enabled for this store, unescape and return the filename."""
- if self._escaped:
- return urllib.unquote(file_id)
+ # Create a key mapper to use
+ if escaped and prefixed:
+ self._mapper = versionedfile.HashEscapedPrefixMapper()
+ elif not escaped and prefixed:
+ self._mapper = versionedfile.HashPrefixMapper()
+ elif self._escaped:
+ import pdb;pdb.set_trace()
+ raise ValueError("escaped unprefixed stores are not permitted.")
else:
- return file_id
+ self._mapper = versionedfile.PrefixMapper()
def _iter_files_recursive(self):
"""Iterate through the files in the transport."""
@@ -284,7 +289,7 @@
if name.endswith('.' + suffix):
skip = True
if not skip:
- yield self._unescape(name)
+ yield self._mapper.unmap(name)[0]
def __len__(self):
return len(list(self.__iter__()))
@@ -298,41 +303,10 @@
self._check_fileid(suffix)
else:
suffixes = []
- fileid = self._escape_file_id(fileid)
- if self._prefixed:
- # hash_prefix adds the '/' separator
- prefix = self.hash_prefix(fileid, escaped=True)
- else:
- prefix = ''
- path = prefix + fileid
+ path = self._mapper.map((fileid,))
full_path = u'.'.join([path] + suffixes)
return urlutils.escape(full_path)
- def _escape_file_id(self, file_id):
- """Turn a file id into a filesystem safe string.
-
- This is similar to a plain urllib.quote, except
- it uses specific safe characters, so that it doesn't
- have to translate a lot of valid file ids.
- """
- if not self._escaped:
- return file_id
- if isinstance(file_id, unicode):
- file_id = file_id.encode('utf-8')
- # @ does not get escaped. This is because it is a valid
- # filesystem character we use all the time, and it looks
- # a lot better than seeing %40 all the time.
- safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
- r = [((c in safe) and c or ('%%%02x' % ord(c)))
- for c in file_id]
- return ''.join(r)
-
- def hash_prefix(self, fileid, escaped=False):
- # fileid should be unescaped
- if not escaped and self._escaped:
- fileid = self._escape_file_id(fileid)
- return "%02x/" % (adler32(fileid) & 0xff)
-
def __repr__(self):
if self._transport is None:
return "%s(None)" % (self.__class__.__name__)
=== modified file 'bzrlib/store/text.py'
--- a/bzrlib/store/text.py 2006-10-05 05:37:25 +0000
+++ b/bzrlib/store/text.py 2008-05-02 03:30:07 +0000
@@ -20,13 +20,15 @@
do any sort of delta compression.
"""
+import gzip
import os
+from cStringIO import StringIO
+
+from bzrlib import osutils
+from bzrlib.errors import BzrError, NoSuchFile, FileExists
import bzrlib.store
from bzrlib.trace import mutter
-from bzrlib.errors import BzrError, NoSuchFile, FileExists
-import gzip
-from cStringIO import StringIO
class TextStore(bzrlib.store.TransportStore):
@@ -99,7 +101,7 @@
if not self._prefixed:
raise
try:
- self._transport.mkdir(self.hash_prefix(fileid)[:-1], mode=self._dir_mode)
+ self._transport.mkdir(osutils.dirname(path), mode=self._dir_mode)
except FileExists:
pass
result = other._transport.copy_to([path], self._transport,
=== modified file 'bzrlib/store/versioned/__init__.py'
--- a/bzrlib/store/versioned/__init__.py 2008-04-28 03:52:09 +0000
+++ b/bzrlib/store/versioned/__init__.py 2008-05-02 03:30:07 +0000
@@ -69,7 +69,7 @@
if relpath.endswith(suffix):
# TODO: use standard remove_suffix function
escaped_id = os.path.basename(relpath[:-len(suffix)])
- file_id = self._unescape(escaped_id)
+ file_id = self._mapper.unmap(escaped_id)[0]
if file_id not in ids:
ids.add(file_id)
yield file_id
@@ -153,7 +153,8 @@
# unexpected error - NoSuchFile is expected to be raised on a
# missing dir only and that only occurs when we are prefixed.
raise
- self._transport.mkdir(self.hash_prefix(file_id), mode=self._dir_mode)
+ dirname = osutils.dirname(_filename)
+ self._transport.mkdir(dirname, mode=self._dir_mode)
weave = self._versionedfile_class(_filename, self._transport,
self._file_mode, create=True,
get_scope=self.get_scope,
=== modified file 'bzrlib/tests/test_escaped_store.py'
--- a/bzrlib/tests/test_escaped_store.py 2007-09-25 08:14:12 +0000
+++ b/bzrlib/tests/test_escaped_store.py 2008-05-02 03:30:07 +0000
@@ -34,15 +34,6 @@
t = bzrlib.transport.get_transport(self.get_url())
return TextStore(t, prefixed=prefixed, escaped=escaped)
- def test_paths(self):
- text_store = self.get_store()
-
- self.assertEqual('a', text_store._relpath('a'))
- self.assertEqual('a', text_store._relpath(u'a'))
- self.assertEqual('%2520', text_store._relpath(' '))
- self.assertEqual('%40%253a%253c%253e', text_store._relpath('@:<>'))
- self.assertEqual('%25c3%25a5', text_store._relpath(u'\xe5'))
-
def test_prefixed(self):
# Prefix should be determined by unescaped string
text_store = self.get_store(prefixed=True)
@@ -58,7 +49,6 @@
self.assertEqual('88/%2520', text_store._relpath(' '))
self.assertEqual('72/%40%253a%253c%253e',
text_store._relpath('@:<>'))
- self.assertEqual('77/%25c3%25a5', text_store._relpath(u'\xe5'))
def test_files(self):
text_store = self.get_store(prefixed=True)
=== modified file 'bzrlib/tests/test_store.py'
--- a/bzrlib/tests/test_store.py 2008-04-28 03:52:09 +0000
+++ b/bzrlib/tests/test_store.py 2008-05-02 03:30:07 +0000
@@ -398,9 +398,10 @@
def test_escaped_uppercase(self):
"""Uppercase letters are escaped for safety on Windows"""
- my_store = store.TransportStore(MemoryTransport(), escaped=True)
+ my_store = store.TransportStore(MemoryTransport(), prefixed=True,
+ escaped=True)
# a particularly perverse file-id! :-)
- self.assertEquals(my_store._escape_file_id('C:<>'), '%43%3a%3c%3e')
+ self.assertEquals(my_store._relpath('C:<>'), 'be/%2543%253a%253c%253e')
class TestVersionFileStore(TestCaseWithTransport):
@@ -433,3 +434,14 @@
self._transaction = transactions.ReadOnlyTransaction()
vf = self.vfstore.get_weave_or_empty('id', self._transaction)
self.assertRaises(errors.ReadOnlyError, vf.add_lines, 'b', [], [])
+
+ def test___iter__escaped(self):
+ self.vfstore = store.versioned.VersionedFileStore(MemoryTransport(),
+ prefixed=True, escaped=True)
+ self.vfstore.get_scope = self.get_scope
+ self._transaction = transactions.WriteTransaction()
+ vf = self.vfstore.get_weave_or_empty(' ', self._transaction)
+ vf.add_lines('a', [], [])
+ del vf
+ self._transaction.finish()
+ self.assertEqual([' '], list(self.vfstore))
=== modified file 'bzrlib/tests/test_versionedfile.py'
--- a/bzrlib/tests/test_versionedfile.py 2008-04-30 05:32:03 +0000
+++ b/bzrlib/tests/test_versionedfile.py 2008-05-02 03:30:07 +0000
@@ -1545,3 +1545,38 @@
self.assertEqual('base\nleft\nright\nmerged', delta_data)
self.assertEqual([('get_lines', 'left')], logged_vf.calls)
+
+class TestKeyMapper(TestCaseWithMemoryTransport):
+ """Tests for various key mapping logic."""
+
+ def test_identity_mapper(self):
+ mapper = versionedfile.ConstantMapper("inventory")
+ self.assertEqual("inventory", mapper.map(('foo at ar',)))
+ self.assertEqual("inventory", mapper.map(('quux',)))
+
+ def test_prefix_mapper(self):
+ #format5: plain
+ mapper = versionedfile.PrefixMapper()
+ self.assertEqual("file-id", mapper.map(("file-id", "revision-id")))
+ self.assertEqual("new-id", mapper.map(("new-id", "revision-id")))
+ self.assertEqual(('file-id',), mapper.unmap("file-id"))
+ self.assertEqual(('new-id',), mapper.unmap("new-id"))
+
+ def test_hash_prefix_mapper(self):
+ #format6: hash + plain
+ mapper = versionedfile.HashPrefixMapper()
+ self.assertEqual("9b/file-id", mapper.map(("file-id", "revision-id")))
+ self.assertEqual("45/new-id", mapper.map(("new-id", "revision-id")))
+ self.assertEqual(('file-id',), mapper.unmap("9b/file-id"))
+ self.assertEqual(('new-id',), mapper.unmap("45/new-id"))
+
+ def test_hash_escaped_mapper(self):
+ #knit1: hash + escaped
+ mapper = versionedfile.HashEscapedPrefixMapper()
+ self.assertEqual("88/%20", mapper.map((" ", "revision-id")))
+ self.assertEqual("ed/fil%45-%49d", mapper.map(("filE-Id",
+ "revision-id")))
+ self.assertEqual("88/ne%57-%49d", mapper.map(("neW-Id",
+ "revision-id")))
+ self.assertEqual(('filE-Id',), mapper.unmap("ed/fil%45-%49d"))
+ self.assertEqual(('neW-Id',), mapper.unmap("88/ne%57-%49d"))
=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py 2008-04-30 05:32:03 +0000
+++ b/bzrlib/versionedfile.py 2008-05-02 03:30:07 +0000
@@ -19,6 +19,10 @@
"""Versioned text file storage api."""
+from cStringIO import StringIO
+import urllib
+from zlib import adler32
+
from bzrlib.lazy_import import lazy_import
lazy_import(globals(), """
@@ -33,9 +37,6 @@
from bzrlib.graph import Graph
from bzrlib.transport.memory import MemoryTransport
""")
-
-from cStringIO import StringIO
-
from bzrlib.inter import InterObject
from bzrlib.registry import Registry
from bzrlib.symbol_versioning import *
@@ -799,3 +800,92 @@
else:
new_version_ids.add(version)
return new_version_ids
+
+
+class KeyMapper(object):
+ """KeyMappers map between keys and underlying paritioned storage."""
+
+ def map(self, key):
+ """Map key to an underlying storage identifier.
+
+ :param key: A key tuple e.g. ('file-id', 'revision-id').
+ :return: An underlying storage identifier, specific to the partitioning
+ mechanism.
+ """
+
+ def unmap(self, partition_id):
+ """Map a partitioned storage id back to a key prefix.
+
+ :param partition_id: The underlying partition id.
+ :return: As much of a key (or prefix) as is derivable from the parition
+ id.
+ """
+
+
+class ConstantMapper(KeyMapper):
+ """A key mapper that maps to a constant result."""
+
+ def __init__(self, result):
+ """Create a ConstantMapper which will return result for all maps."""
+ self._result = result
+
+ def map(self, key):
+ """See KeyMapper.map()."""
+ return self._result
+
+
+class PrefixMapper(KeyMapper):
+ """A key mapper that extracts the first component of a key."""
+
+ def map(self, key):
+ """See KeyMapper.map()."""
+ return key[0]
+
+ def unmap(self, partition_id):
+ """See KeyMapper.unmap()."""
+ return (partition_id,)
+
+
+class HashPrefixMapper(KeyMapper):
+ """A key mapper that combines the first component of a key with a hash."""
+
+ def map(self, key):
+ """See KeyMapper.map()."""
+ prefix = self._escape(key[0])
+ return "%02x/%s" % (adler32(prefix) & 0xff, prefix)
+
+ def _escape(self, prefix):
+ """No escaping needed here."""
+ return prefix
+
+ def unmap(self, partition_id):
+ """See KeyMapper.unmap()."""
+ return (self._unescape(osutils.basename(partition_id)),)
+
+ def _unescape(self, basename):
+ """No unescaping needed for HashPrefixMapper."""
+ return basename
+
+
+class HashEscapedPrefixMapper(HashPrefixMapper):
+ """Combines the escaped first component of a key with a hash."""
+
+ _safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
+
+ def _escape(self, prefix):
+ """Turn a key element into a filesystem safe string.
+
+ This is similar to a plain urllib.quote, except
+ it uses specific safe characters, so that it doesn't
+ have to translate a lot of valid file ids.
+ """
+ # @ does not get escaped. This is because it is a valid
+ # filesystem character we use all the time, and it looks
+ # a lot better than seeing %40 all the time.
+ r = [((c in self._safe) and c or ('%%%02x' % ord(c)))
+ for c in prefix]
+ return ''.join(r)
+
+ def _unescape(self, basename):
+ """Escaped names are unescaped by urlutils."""
+ return urllib.unquote(basename)
More information about the bazaar-commits
mailing list