Rev 2707: Merge FileName allocation change, leading to hash based pack file names. in http://people.ubuntu.com/~robertc/baz2.0/repository
Robert Collins
robertc at robertcollins.net
Tue Aug 7 01:47:51 BST 2007
At http://people.ubuntu.com/~robertc/baz2.0/repository
------------------------------------------------------------
revno: 2707
revision-id: robertc at robertcollins.net-20070807004747-66jkui11vnyhoz56
parent: robertc at robertcollins.net-20070805095913-oir8h97dm86v5ol7
parent: robertc at robertcollins.net-20070806234918-xc9w5f86tgjphf9u
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Tue 2007-08-07 10:47:47 +1000
message:
Merge FileName allocation change, leading to hash based pack file names.
modified:
bzrlib/errors.py errors.py-20050309040759-20512168c4e14fbd
bzrlib/file_names.py file_collection.py-20070714100753-j2zz4ahtk331k5zm-1
bzrlib/repofmt/knitrepo.py knitrepo.py-20070206081537-pyy4a00xdas0j4pf-1
bzrlib/tests/test_file_names.py test_file_collection-20070714093417-5gc9d821to85zo4t-1
bzrlib/tests/test_repository.py test_repository.py-20060131075918-65c555b881612f4d
------------------------------------------------------------
revno: 2592.1.25.2.7.1.28.1.6.1.3.1.9.2.1.2.2
revision-id: robertc at robertcollins.net-20070806234918-xc9w5f86tgjphf9u
parent: robertc at robertcollins.net-20070806054732-0w8j16bf4upvt2nu
committer: Robert Collins <robertc at robertcollins.net>
branch nick: names-collection-user-specified
timestamp: Tue 2007-08-07 09:49:18 +1000
message:
Prevent the duplicate additions of names to FileNames collections.
modified:
bzrlib/errors.py errors.py-20050309040759-20512168c4e14fbd
bzrlib/file_names.py file_collection.py-20070714100753-j2zz4ahtk331k5zm-1
bzrlib/tests/test_file_names.py test_file_collection-20070714093417-5gc9d821to85zo4t-1
------------------------------------------------------------
revno: 2592.1.25.2.7.1.28.1.6.1.3.1.9.2.1.2.1
revision-id: robertc at robertcollins.net-20070806054732-0w8j16bf4upvt2nu
parent: pqm at pqm.ubuntu.com-20070803043116-l7u1uypblmx1uxnr
committer: Robert Collins <robertc at robertcollins.net>
branch nick: names-collection-user-specified
timestamp: Mon 2007-08-06 15:47:32 +1000
message:
Change file_names allocation to be done by the user, not the FileNames class.
modified:
bzrlib/file_names.py file_collection.py-20070714100753-j2zz4ahtk331k5zm-1
bzrlib/tests/test_file_names.py test_file_collection-20070714093417-5gc9d821to85zo4t-1
=== modified file 'bzrlib/errors.py'
--- a/bzrlib/errors.py 2007-07-20 18:59:29 +0000
+++ b/bzrlib/errors.py 2007-08-06 23:49:18 +0000
@@ -1625,6 +1625,7 @@
self.text_revision = text_revision
self.file_id = file_id
+
class DuplicateFileId(BzrError):
_fmt = "File id {%(file_id)s} already exists in inventory as %(entry)s"
=== modified file 'bzrlib/file_names.py'
--- a/bzrlib/file_names.py 2007-07-20 00:57:43 +0000
+++ b/bzrlib/file_names.py 2007-08-06 23:49:18 +0000
@@ -40,7 +40,11 @@
The save method must be called to cause the state to be saved to the
transport.
- Finally, load is used to obtain a previously saved set.
+ The load method is used to obtain a previously saved set.
+
+ There is a cap in the _cap method which will error if more names are
+ added than the names collection can sensibly manage. Currently this
+ cap is 10000.
"""
def __init__(self, transport, index_name):
@@ -50,12 +54,19 @@
self._names = None
self._cap = 10000
- def allocate(self):
- for number in xrange(self._cap):
- if str(number) not in self._names:
- self._names.add(str(number))
- return str(number)
- raise errors.BzrError('too many files')
+ def allocate(self, name=None):
+ """Allocate name in the names collection.
+
+ :param name: The name to allocate.
+ :raises: bzrlib.errors.DuplicateKey if the name is already allocated.
+ """
+ if name is not None:
+ if len(self._names) >= self._cap:
+ raise errors.BzrError('too many files')
+ if name in self._names:
+ raise errors.DuplicateKey(name)
+ self._names.add(name)
+ return name
def initialise(self):
"""Initialise the collection."""
=== modified file 'bzrlib/repofmt/knitrepo.py'
--- a/bzrlib/repofmt/knitrepo.py 2007-08-05 09:59:13 +0000
+++ b/bzrlib/repofmt/knitrepo.py 2007-08-07 00:47:47 +0000
@@ -16,6 +16,8 @@
from bzrlib.lazy_import import lazy_import
lazy_import(globals(), """
+import md5
+
from bzrlib import (
file_names,
pack,
@@ -300,13 +302,12 @@
self._names = file_names.FileNames(self.transport, 'index')
self._names.load()
- def allocate(self):
- return self._names.allocate()
+ def allocate(self, name):
+ return self._names.allocate(name)
def names(self):
- """Provide order to the underlying names."""
- def _cmp(x, y): return cmp(int(x), int(y))
- return sorted(self._names.names(), cmp=_cmp, reverse=True)
+ """Provide an order to the underlying names."""
+ return sorted(self._names.names())
def reset(self):
self._names = None
@@ -759,6 +760,7 @@
self._inv_thunk.reset()
# forget what names there are
self._data_names.reset()
+ self._open_pack_hash = None
def _pack_tuple(self, name):
"""Return a tuple with the transport and file name for a pack name."""
@@ -776,7 +778,11 @@
random_name = self.control_files._lock.nonce
self._open_pack_tuple = (self._upload_transport, random_name + '.pack')
write_stream = self._upload_transport.open_file_stream(random_name + '.pack')
- self._open_pack_writer = pack.ContainerWriter(write_stream)
+ self._open_pack_hash = md5.new()
+ def write_data(bytes):
+ write_stream(bytes)
+ self._open_pack_hash.update(bytes)
+ self._open_pack_writer = pack.ContainerWriter(write_data)
self._open_pack_writer.begin()
self._data_names.setup()
self._revision_store.setup()
@@ -788,11 +794,18 @@
self.weave_store.data_inserted() or
self._inv_thunk.data_inserted())
if data_inserted:
- new_name = self._data_names.allocate()
+ self._open_pack_writer.end()
+ new_name = self._open_pack_hash.hexdigest()
+ # If this fails, its a hash collision. We should:
+ # - determine if its a collision or
+ # - the same content or
+ # - the existing name is not the actual hash - e.g.
+ # its a deliberate attack or data corruption has
+ # occuring during the write of that file.
+ self._data_names.allocate(new_name)
self.weave_store.flush(new_name)
self._inv_thunk.flush(new_name)
self._revision_store.flush(new_name)
- self._open_pack_writer.end()
self._upload_transport.close_file_stream(self._open_pack_tuple[1])
self._upload_transport.rename(self._open_pack_tuple[1],
'../packs/' + new_name + '.pack')
@@ -806,6 +819,7 @@
# forget what names there are - should just refresh and deal with the
# delta.
self._data_names.reset()
+ self._open_pack_hash = None
def get_inventory_weave(self):
return self._inv_thunk.get_weave()
@@ -850,6 +864,7 @@
self._inv_thunk.reset()
# forget what names there are
self._data_names.reset()
+ self._open_pack_hash = None
def _pack_tuple(self, name):
"""Return a tuple with the transport and file name for a pack name."""
@@ -866,8 +881,11 @@
def _start_write_group(self):
random_name = self.control_files._lock.nonce
self._open_pack_tuple = (self._upload_transport, random_name + '.pack')
+ write_stream = self._upload_transport.open_file_stream(random_name + '.pack')
+ self._open_pack_hash = md5.new()
def write_data(bytes):
- self._upload_transport.append_bytes(random_name + '.pack', bytes)
+ write_stream(bytes)
+ self._open_pack_hash.update(bytes)
self._open_pack_writer = pack.ContainerWriter(write_data)
self._open_pack_writer.begin()
self._data_names.setup()
@@ -880,10 +898,19 @@
self.weave_store.data_inserted() or
self._inv_thunk.data_inserted())
if data_inserted:
- new_name = self._data_names.allocate()
+ self._open_pack_writer.end()
+ new_name = self._open_pack_hash.hexdigest()
+ # If this fails, its a hash collision. We should:
+ # - determine if its a collision or
+ # - the same content or
+ # - the existing name is not the actual hash - e.g.
+ # its a deliberate attack or data corruption has
+ # occuring during the write of that file.
+ self._data_names.allocate(new_name)
self.weave_store.flush(new_name)
self._inv_thunk.flush(new_name)
self._revision_store.flush(new_name)
+ self._upload_transport.close_file_stream(self._open_pack_tuple[1])
self._upload_transport.rename(self._open_pack_tuple[1],
'../packs/' + new_name + '.pack')
self._data_names.save()
@@ -896,6 +923,7 @@
# forget what names there are - should just refresh and deal with the
# delta.
self._data_names.reset()
+ self._open_pack_hash = None
def get_inventory_weave(self):
return self._inv_thunk.get_weave()
=== modified file 'bzrlib/tests/test_file_names.py'
--- a/bzrlib/tests/test_file_names.py 2007-07-19 03:37:17 +0000
+++ b/bzrlib/tests/test_file_names.py 2007-08-06 23:49:18 +0000
@@ -33,32 +33,43 @@
names.save()
self.assertEqual('', t.get_bytes(name))
- def test_allocate_trivial(self):
- t = self.get_transport()
- names = FileNames(t, 'index')
- names.initialise()
- name = names.allocate()
- self.assertEqual('0', name)
- self.assertFalse(t.has('index'))
- name = names.allocate()
- self.assertEqual('1', name)
- self.assertFalse(t.has('index'))
-
- def test_allocate_overrun(self):
+ def test_allocate_duplicate_name_errors(self):
+ t = self.get_transport()
+ names = FileNames(t, 'index')
+ names.initialise()
+ names.allocate('0')
+ self.assertRaises(errors.DuplicateKey, names.allocate, '0')
+
+ def test_allocate_name_does_not_error(self):
+ t = self.get_transport()
+ names = FileNames(t, 'index')
+ names.initialise()
+ names.allocate('0')
+ self.assertFalse(t.has('index'))
+
+ def test_allocate_two_names_succeeds(self):
+ t = self.get_transport()
+ names = FileNames(t, 'index')
+ names.initialise()
+ names.allocate('0')
+ names.allocate('1')
+ self.assertFalse(t.has('index'))
+
+ def test_exceeding_the_allocation_cap_errors(self):
t = self.get_transport()
names = FileNames(t, 'index')
names.initialise()
names._cap = 5
for number in xrange(5):
- name = names.allocate()
- self.assertRaises(errors.BzrError, names.allocate)
+ name = names.allocate(str(number))
+ self.assertRaises(errors.BzrError, names.allocate, '6')
def test_load(self):
t = self.get_transport()
names = FileNames(t, 'index')
names.initialise()
- names.allocate()
- names.allocate()
+ names.allocate('0')
+ names.allocate('1')
names.save()
names = FileNames(t, 'index')
names.load()
@@ -77,16 +88,16 @@
t = self.get_transport()
names = FileNames(t, 'index')
names.initialise()
- names.allocate()
- names.allocate()
+ names.allocate('0')
+ names.allocate('1')
self.assertEqual(set(['0', '1']), names.names())
def test_names_on_unlistable_works(self):
t = self.get_transport()
names = FileNames(t, 'index')
names.initialise()
- names.allocate()
- names.allocate()
+ names.allocate('0')
+ names.allocate('1')
names.save()
names = FileNames(
get_transport('unlistable+' + self.get_url()), 'index')
@@ -97,7 +108,19 @@
t = self.get_transport()
names = FileNames(t, 'index')
names.initialise()
- name1 = names.allocate()
- name2 = names.allocate()
- names.remove(name1)
- self.assertEqual(set([name2]), names.names())
+ names.allocate('0')
+ names.allocate('1')
+ names.remove('0')
+ self.assertEqual(set(['1']), names.names())
+
+ def test_roundtrip_hash_name(self):
+ t = self.get_transport()
+ names = FileNames(t, 'index')
+ names.initialise()
+ names.allocate('0123456789abcdef0123456789abcdef')
+ names.save()
+ names = FileNames(t, 'index')
+ names.load()
+ self.assertEqual(set(['0123456789abcdef0123456789abcdef']),
+ names.names())
+
=== modified file 'bzrlib/tests/test_repository.py'
--- a/bzrlib/tests/test_repository.py 2007-07-31 07:12:33 +0000
+++ b/bzrlib/tests/test_repository.py 2007-08-07 00:47:47 +0000
@@ -517,7 +517,6 @@
self.check_format(t)
# XXX: no locks left when unlocked at the moment
# self.assertEqualDiff('', t.get('lock').read())
- self.assertTrue(S_ISDIR(t.stat('knits').st_mode))
self.check_databases(t)
def check_format(self, t):
@@ -533,23 +532,27 @@
"""Assert that knit_name has no index on t."""
self.assertFalse(t.has(knit_name + '.kndx'))
- def assertHasKnit(self, t, knit_name):
+ def assertHasNoKnit(self, t, knit_name):
"""Assert that knit_name exists on t."""
# no default content
- self.assertTrue(t.has(knit_name + '.knit'))
+ self.assertFalse(t.has(knit_name + '.knit'))
def check_databases(self, t):
"""check knit content for a repository."""
+ # check conversion worked
self.assertHasNoKndx(t, 'inventory')
- self.assertHasKnit(t, 'inventory')
+ self.assertHasNoKnit(t, 'inventory')
self.assertHasNoKndx(t, 'revisions')
- self.assertHasKnit(t, 'revisions')
+ self.assertHasNoKnit(t, 'revisions')
self.assertHasNoKndx(t, 'signatures')
- self.assertHasKnit(t, 'signatures')
+ self.assertHasNoKnit(t, 'signatures')
+ self.assertFalse(t.has('knits'))
# revision-indexes file-container directory
names = FileNames(t.clone('indices'), 'index')
names.load()
self.assertEqual(set(), names.names())
+ self.assertTrue(S_ISDIR(t.stat('packs').st_mode))
+ self.assertTrue(S_ISDIR(t.stat('upload').st_mode))
def test_shared_disk_layout(self):
format = self.get_format()
@@ -566,7 +569,6 @@
# XXX: no locks left when unlocked at the moment
# self.assertEqualDiff('', t.get('lock').read())
self.assertEqualDiff('', t.get('shared-storage').read())
- self.assertTrue(S_ISDIR(t.stat('knits').st_mode))
self.check_databases(t)
def test_shared_no_tree_disk_layout(self):
@@ -588,44 +590,63 @@
self.assertEqualDiff('', t.get('no-working-trees').read())
repo.set_make_working_trees(True)
self.assertFalse(t.has('no-working-trees'))
- self.assertTrue(S_ISDIR(t.stat('knits').st_mode))
self.check_databases(t)
- def test_add_revision_creates_zero_dot_rix(self):
+ def test_add_revision_creates_dot_rix(self):
"""Adding a revision makes a 0.rix (Revision IndeX) file."""
format = self.get_format()
tree = self.make_branch_and_tree('.', format=format)
trans = tree.branch.repository.bzrdir.get_repository_transport(None)
- self.assertFalse(trans.has('indices/0.rix'))
+ names = FileNames(trans.clone('indices'), 'index')
+ names.load()
+ self.assertEqual(0, len(names.names()))
tree.commit('foobarbaz')
- self.assertTrue(trans.has('indices/0.rix'))
+ names.load()
+ self.assertEqual(1, len(names.names()))
+ name = list(names.names())[0]
+ self.assertTrue(trans.has('indices/%s.rix' % name))
- def test_add_revision_creates_zero_dot_six(self):
+ def test_add_revision_creates_dot_six(self):
"""Adding a revision makes a 0.six (Signature IndeX) file."""
format = self.get_format()
tree = self.make_branch_and_tree('.', format=format)
trans = tree.branch.repository.bzrdir.get_repository_transport(None)
- self.assertFalse(trans.has('indices/0.six'))
+ names = FileNames(trans.clone('indices'), 'index')
+ names.load()
+ self.assertEqual(0, len(names.names()))
tree.commit('foobarbaz')
- self.assertTrue(trans.has('indices/0.six'))
+ names.load()
+ self.assertEqual(1, len(names.names()))
+ name = list(names.names())[0]
+ self.assertTrue(trans.has('indices/%s.six' % name))
- def test_add_revision_creates_zero_dot_iix(self):
+ def test_add_revision_creates_dot_iix(self):
"""Adding a revision makes a 0.iix (Inventory IndeX) file."""
format = self.get_format()
tree = self.make_branch_and_tree('.', format=format)
trans = tree.branch.repository.bzrdir.get_repository_transport(None)
- self.assertFalse(trans.has('indices/0.iix'))
+ names = FileNames(trans.clone('indices'), 'index')
+ names.load()
+ self.assertEqual(0, len(names.names()))
tree.commit('foobarbaz')
- self.assertTrue(trans.has('indices/0.iix'))
+ names.load()
+ self.assertEqual(1, len(names.names()))
+ name = list(names.names())[0]
+ self.assertTrue(trans.has('indices/%s.iix' % name))
- def test_add_revision_creates_zero_dot_tix(self):
+ def test_add_revision_creates_dot_tix(self):
"""Adding a revision makes a 0.tix (Text IndeX) file."""
format = self.get_format()
tree = self.make_branch_and_tree('.', format=format)
trans = tree.branch.repository.bzrdir.get_repository_transport(None)
- self.assertFalse(trans.has('indices/0.tix'))
+ names = FileNames(trans.clone('indices'), 'index')
+ names.load()
+ self.assertEqual(0, len(names.names()))
tree.commit('foobarbaz')
- self.assertTrue(trans.has('indices/0.tix'))
+ names.load()
+ self.assertEqual(1, len(names.names()))
+ name = list(names.names())[0]
+ self.assertTrue(trans.has('indices/%s.tix' % name))
def test_pulling_nothing_leads_to_no_new_names(self):
format = self.get_format()
@@ -633,10 +654,6 @@
tree2 = self.make_branch_and_tree('2', format=format)
tree1.branch.repository.fetch(tree2.branch.repository)
trans = tree1.branch.repository.bzrdir.get_repository_transport(None)
- self.assertFalse(trans.has('indices/0.iix'))
- self.assertFalse(trans.has('indices/0.rix'))
- self.assertFalse(trans.has('indices/0.six'))
- self.assertFalse(trans.has('indices/0.tix'))
names = FileNames(trans.clone('indices'), 'index')
names.load()
self.assertEqual(set(), names.names())
More information about the bazaar-commits
mailing list