Rev 2748: Split out common methods from pack repositories and add hash index in http://sourcefrog.net/bzr/pack-hashes

Martin Pool mbp at sourcefrog.net
Thu Aug 30 01:08:47 BST 2007


At http://sourcefrog.net/bzr/pack-hashes

------------------------------------------------------------
revno: 2748
revision-id: mbp at sourcefrog.net-20070830000846-p71qs5j6k2xkzri3
parent: mbp at sourcefrog.net-20070829235346-i8p0j38fo2flpxvn
committer: Martin Pool <mbp at sourcefrog.net>
branch nick: pack-hashes
timestamp: Thu 2007-08-30 10:08:46 +1000
message:
  Split out common methods from pack repositories and add hash index
added:
  bzrlib/tests/test_pack_repository.py test_pack_repository-20070828111851-nof5soh31tidz2dq-1
modified:
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
  bzrlib/tests/__init__.py       selftest.py-20050531073622-8d0e3c8845c97a64
=== added file 'bzrlib/tests/test_pack_repository.py'
--- a/bzrlib/tests/test_pack_repository.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/test_pack_repository.py	2007-08-30 00:08:46 +0000
@@ -0,0 +1,56 @@
+# Copyright (C) 2006, 2007 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+"""Tests specific to the packed repository format."""
+
+
+from bzrlib import (
+    bzrdir,
+    errors,
+    repository,
+    workingtree,
+    )
+from bzrlib.osutils import sha_string
+from bzrlib.repofmt import pack_repo
+from bzrlib.repository import RepositoryFormat
+from bzrlib.tests import TestCase, TestCaseWithTransport
+from bzrlib.transport import get_transport
+from bzrlib.util.bencode import bdecode
+
+
+
+class TestHashStorage(TestCaseWithTransport):
+
+    def get_format(self):
+        # XXX: Update this when a permanent name is allocated, or make one by
+        # hand to avoid naming
+        return bzrdir.format_registry.make_bzrdir('experimental')
+
+    def test_store_get_bytes_by_hash(self):
+        repo = self.make_repository('r1', format=self.get_format())
+        repo.lock_write()
+        repo.start_write_group()
+        # store some stuff
+        stuff = 'hello repo!'
+        stuff_hash = sha_string(stuff)
+        repo._add_bytes_by_hash(stuff_hash, stuff)
+        # finish storing
+        repo.commit_write_group()
+        # try to get it back
+        return
+        result = repo._get_bytes_by_hash(stuff_hash)
+        self.assertEquals(stuff. result)

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2007-08-29 23:53:46 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2007-08-30 00:08:46 +0000
@@ -786,7 +786,7 @@
         if getattr(self.repo, '_revision_knit', None) is not None:
             return self.repo._revision_knit
         self.repo._packs.ensure_loaded()
-        pack_map, indices = self._make_rev_pack_map()
+        pack_map, indices = self.repo._make_index_map('.rix')
         if self.repo.is_in_write_group():
             # allow writing: queue writes to a new index
             indices.insert(0, self.repo._revision_write_index)
@@ -810,17 +810,6 @@
             access_method=knit_access)
         return self.repo._revision_knit
 
-    def _make_rev_pack_map(self):
-        indices = []
-        pack_map = {}
-        for name in self.repo._packs.names():
-            # TODO: maybe this should expose size to us  to allow
-            # sorting of the indices for better performance ?
-            index_name = self.name_to_revision_index_name(name)
-            indices.append(GraphIndex(self.transport, index_name))
-            pack_map[indices[-1]] = (self.repo._pack_tuple(name))
-        return pack_map, indices
-
     def get_signature_file(self, transaction):
         """Get the signature versioned file object."""
         if getattr(self.repo, '_signature_knit', None) is not None:
@@ -875,7 +864,7 @@
             # create a pack map for the autopack code - XXX finish
             # making a clear managed list of packs, indices and use
             # that in these mapping classes
-            self.repo._revision_pack_map = self._make_rev_pack_map()[0]
+            self.repo._revision_pack_map = self.repo._make_index_map('.rix')[0]
         else:
             del self.repo._revision_pack_map[self.repo._revision_write_index]
             self.repo._revision_write_index = None
@@ -1191,6 +1180,66 @@
     they simulate - this just provides common delegated implementations.
     """
 
+    def _add_bytes_by_hash(self, new_hash, new_bytes):
+        """Add content to the repository indexed by hash.
+        """
+        offset, length = self._open_pack_writer.add_bytes_record(
+            new_bytes, [(new_hash,)])
+        assert len(new_hash) == 40
+        key = (new_hash,)
+        value = "%d %d" % (offset, length)
+        self._hash_write_index.add_nodes([(key, value)])
+
+    def _make_index_map(self, suffix):
+        """Return information on existing indexes.
+
+        :param suffix: Index suffix added to pack name.
+
+        :returns: (pack_map, indices) where indices is a list of GraphIndex 
+        objects, and pack_map is a mapping from those objects to the 
+        pack tuple they describe.
+        """
+        indices = []
+        pack_map = {}
+        self._packs.ensure_loaded()
+        for name in self._packs.names():
+            # TODO: maybe this should expose size to us  to allow
+            # sorting of the indices for better performance ?
+            index_name = name + suffix
+            indices.append(GraphIndex(self._index_transport, index_name))
+            pack_map[indices[-1]] = (self._pack_tuple(name))
+        return pack_map, indices
+
+    def _start_hash_index(self):
+        self._hash_write_index = InMemoryGraphIndex(reference_lists=0)
+
+    def _abort_hash_index(self):
+        del self._hash_write_index
+
+    def _commit_hash_index(self, new_pack_name):
+        new_hash_index_name = new_pack_name + _HASH_INDEX_SUFFIX
+        self._index_transport.put_file(new_hash_index_name,
+                self._hash_write_index.finish())
+        del self._hash_write_index
+                    
+    def _start_write_group(self):
+        random_name = self.control_files._lock.nonce
+        self._open_pack_tuple = (self._upload_transport, random_name + '.pack')
+        write_stream = self._upload_transport.open_write_stream(random_name + '.pack')
+        self._write_stream = write_stream
+        self._open_pack_hash = md5.new()
+        def write_data(bytes, write=write_stream.write, update=self._open_pack_hash.update):
+            write(bytes)
+            update(bytes)
+        self._open_pack_writer = pack.ContainerWriter(write_data)
+        self._open_pack_writer.begin()
+        self._packs.setup()
+        self._revision_store.setup()
+        self.weave_store.setup()
+        self._inv_thunk.setup()
+        self._start_hash_index()
+
+
 class GraphKnitRepository1(_GraphKnitRepositoryBase, KnitRepository):
     """Experimental graph-knit using repository."""
 
@@ -1199,6 +1248,7 @@
         KnitRepository.__init__(self, _format, a_bzrdir, control_files,
                               _revision_store, control_store, text_store)
         index_transport = control_files._transport.clone('indices')
+        self._index_transport = index_transport
         self._packs = RepositoryPackCollection(self, control_files._transport)
         self._revision_store = GraphKnitRevisionStore(self, index_transport, self._revision_store)
         self.weave_store = GraphKnitTextStore(self, index_transport, self.weave_store)
@@ -1229,22 +1279,6 @@
             # forget what names there are
             self._packs.reset()
 
-    def _start_write_group(self):
-        random_name = self.control_files._lock.nonce
-        self._open_pack_tuple = (self._upload_transport, random_name + '.pack')
-        write_stream = self._upload_transport.open_write_stream(random_name + '.pack')
-        self._write_stream = write_stream
-        self._open_pack_hash = md5.new()
-        def write_data(bytes, write=write_stream.write, update=self._open_pack_hash.update):
-            write(bytes)
-            update(bytes)
-        self._open_pack_writer = pack.ContainerWriter(write_data)
-        self._open_pack_writer.begin()
-        self._packs.setup()
-        self._revision_store.setup()
-        self.weave_store.setup()
-        self._inv_thunk.setup()
-
     def _commit_write_group(self):
         data_inserted = (self._revision_store.data_inserted() or
             self.weave_store.data_inserted() or 
@@ -1328,6 +1362,7 @@
         KnitRepository3.__init__(self, _format, a_bzrdir, control_files,
                               _revision_store, control_store, text_store)
         index_transport = control_files._transport.clone('indices')
+        self._index_transport = index_transport
         self._packs = RepositoryPackCollection(self, control_files._transport)
         self._revision_store = GraphKnitRevisionStore(self, index_transport, self._revision_store)
         self.weave_store = GraphKnitTextStore(self, index_transport, self.weave_store)
@@ -1358,22 +1393,6 @@
             # forget what names there are
             self._packs.reset()
 
-    def _start_write_group(self):
-        random_name = self.control_files._lock.nonce
-        self._open_pack_tuple = (self._upload_transport, random_name + '.pack')
-        write_stream = self._upload_transport.open_write_stream(random_name + '.pack')
-        self._write_stream = write_stream
-        self._open_pack_hash = md5.new()
-        def write_data(bytes, write=write_stream.write, update=self._open_pack_hash.update):
-            write(bytes)
-            update(bytes)
-        self._open_pack_writer = pack.ContainerWriter(write_data)
-        self._open_pack_writer.begin()
-        self._packs.setup()
-        self._revision_store.setup()
-        self.weave_store.setup()
-        self._inv_thunk.setup()
-
     def _commit_write_group(self):
         data_inserted = (self._revision_store.data_inserted() or
             self.weave_store.data_inserted() or 

=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py	2007-08-22 01:02:35 +0000
+++ b/bzrlib/tests/__init__.py	2007-08-30 00:08:46 +0000
@@ -2398,6 +2398,7 @@
                    'bzrlib.tests.test_osutils',
                    'bzrlib.tests.test_osutils_encodings',
                    'bzrlib.tests.test_pack',
+                   'bzrlib.tests.test_pack_repository',
                    'bzrlib.tests.test_patch',
                    'bzrlib.tests.test_patches',
                    'bzrlib.tests.test_permissions',




More information about the bazaar-commits mailing list