Rev 2748: Split out _make_rev_pack_map to generalize use of inventories in http://sourcefrog.net/bzr/inv-split

Wed Aug 29 09:37:40 BST 2007

At http://sourcefrog.net/bzr/inv-split

------------------------------------------------------------
revno: 2748
revision-id: mbp at sourcefrog.net-20070829083739-xcjh0knlj56132rq
parent: mbp at sourcefrog.net-20070829080854-xwt7zhkap0nwgj74
committer: Martin Pool <mbp at sourcefrog.net>
branch nick: inv-split
timestamp: Wed 2007-08-29 18:37:39 +1000
message:
  Split out _make_rev_pack_map to generalize use of inventories
  
  Add index of objects by hash.
modified:
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
  bzrlib/tests/test_pack_repository.py test_pack_repository-20070828111851-nof5soh31tidz2dq-1
=== modified file 'bzrlib/repofmt/pack_repo.py'

--- a/bzrlib/repofmt/pack_repo.py	2007-08-29 08:08:54 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2007-08-29 08:37:39 +0000
@@ -66,6 +66,8 @@
 from bzrlib.trace import mutter, note, warning
 
 
+_HASH_INDEX_SUFFIX = '.hix'
+
 class Pack(object):
     """An in memory proxy for a .pack and its indices."""
 
@@ -761,7 +763,7 @@
         if getattr(self.repo, '_revision_knit', None) is not None:
             return self.repo._revision_knit
         self.repo._packs.ensure_loaded()
-        pack_map, indices = self._make_rev_pack_map()
+        pack_map, indices = self.repo._make_rev_pack_map('.rix')
         if self.repo.is_in_write_group():
             # allow writing: queue writes to a new index
             indices.insert(0, self.repo._revision_write_index)
@@ -785,17 +787,6 @@
             access_method=knit_access)
         return self.repo._revision_knit
 
-    def _make_rev_pack_map(self):
-        indices = []
-        pack_map = {}
-        for name in self.repo._packs.names():
-            # TODO: maybe this should expose size to us  to allow
-            # sorting of the indices for better performance ?
-            index_name = self.name_to_revision_index_name(name)
-            indices.append(GraphIndex(self.transport, index_name))
-            pack_map[indices[-1]] = (self.repo._pack_tuple(name))
-        return pack_map, indices
-
     def get_signature_file(self, transaction):
         """Get the signature versioned file object."""
         if getattr(self.repo, '_signature_knit', None) is not None:
@@ -850,7 +841,8 @@
             # create a pack map for the autopack code - XXX finish
             # making a clear managed list of packs, indices and use
             # that in these mapping classes
-            self.repo._revision_pack_map = self._make_rev_pack_map()[0]
+            self.repo._revision_pack_map = self.repo._make_rev_pack_map(
+                    '.rix')[0]
         else:
             del self.repo._revision_pack_map[self.repo._revision_write_index]
             self.repo._revision_write_index = None
@@ -1201,12 +1193,12 @@
         for dir_hash, dir_bytes in split_inventory._iter_serialized_parts():
             # TODO: look in the existing index; if this text is in there then
             # don't write it again.
-            self._add_content_by_hash(dir_hash, dir_bytes)
+            self._add_bytes_by_hash(dir_hash, dir_bytes)
         # return the last hash, which should be the root - this will fail if
         # absolutely nothing was written, which is reasonable.
         return dir_hash
 
-    def _add_content_by_hash(self, new_hash, new_bytes):
+    def _add_bytes_by_hash(self, new_hash, new_bytes):
         """Add content to the repository indexed by hash.
         """
         offset, length = self._open_pack_writer.add_bytes_record(
@@ -1215,6 +1207,38 @@
         value = "%d %d" % (offset, length)
         self._hash_write_index.add_nodes([(key, value)])
 
+    def _get_bytes_by_hash(self, h):
+        """Return the byte string with the given hash."""
+        # TODO: don't load the overall index every time, just get it once and hold it
+        # during the lock duration
+        hash_all_indices = self._combined_hash_index()
+        hits = hash_all_indices.iter_entries([h])
+        
+    def _combined_hash_index(self):
+        # TODO: include the currently active index writer if there is one?
+        indices = self._make_rev_pack_map(_HASH_INDEX_SUFFIX)
+        return CombinedGraphIndex(indices)
+
+    def _make_rev_pack_map(self, suffix):
+        """Return information on existing indexes.
+
+        :param suffix: Index suffix added to pack name.
+
+        :returns: (pack_map, indices) where indices is a list of GraphIndex 
+        objects, and pack_map is a mapping from those objects to the 
+        pack tuple they describe.
+        """
+        indices = []
+        pack_map = {}
+        self._packs.ensure_loaded()
+        for name in self._packs.names():
+            # TODO: maybe this should expose size to us  to allow
+            # sorting of the indices for better performance ?
+            index_name = name + suffix
+            indices.append(GraphIndex(self._index_transport, index_name))
+            pack_map[indices[-1]] = (self._pack_tuple(name))
+        return pack_map, indices
+
     def _start_hash_index(self):
         self._hash_write_index = InMemoryGraphIndex(reference_lists=0)
 
@@ -1222,8 +1246,8 @@
         del self._hash_write_index
 
     def _commit_hash_index(self, new_pack_name):
-        new_hash_index_name = new_pack_name + '.hix'
-        self.transport.put_file(new_hash_index_name,
+        new_hash_index_name = new_pack_name + _HASH_INDEX_SUFFIX
+        self._index_transport.put_file(new_hash_index_name,
                 self._hash_write_index.finish())
         del self._hash_write_index
                     
@@ -1236,6 +1260,7 @@
         KnitRepository.__init__(self, _format, a_bzrdir, control_files,
                               _revision_store, control_store, text_store)
         index_transport = control_files._transport.clone('indices')
+        self._index_transport = index_transport
         self._packs = RepositoryPackCollection(self, control_files._transport)
         self._revision_store = GraphKnitRevisionStore(self, index_transport, self._revision_store)
         self.weave_store = GraphKnitTextStore(self, index_transport, self.weave_store)
@@ -1358,6 +1383,7 @@
         KnitRepository3.__init__(self, _format, a_bzrdir, control_files,
                               _revision_store, control_store, text_store)
         index_transport = control_files._transport.clone('indices')
+        self._index_transport = index_transport
         self._packs = RepositoryPackCollection(self, control_files._transport)
         self._revision_store = GraphKnitRevisionStore(self, index_transport, self._revision_store)
         self.weave_store = GraphKnitTextStore(self, index_transport, self.weave_store)

=== modified file 'bzrlib/tests/test_pack_repository.py'
--- a/bzrlib/tests/test_pack_repository.py	2007-08-29 08:08:54 +0000
+++ b/bzrlib/tests/test_pack_repository.py	2007-08-29 08:37:39 +0000
@@ -25,10 +25,11 @@
     workingtree,
     )
 from bzrlib.inventory_split import SplitInventory
+from bzrlib.repofmt import pack_repo
 from bzrlib.repository import RepositoryFormat
 from bzrlib.tests import TestCase, TestCaseWithTransport
 from bzrlib.transport import get_transport
-from bzrlib.repofmt import pack_repo
+from bzrlib.util.bencode import bdecode
 
 
 class TestSplitInventory(TestCaseWithTransport):
@@ -53,4 +54,8 @@
         repo.unlock()
         # check it's a plausible hash name
         self.assertEquals(40, len(root_hash))
-        # check we can retrieve it
+        # check we can retrieve it, and that it looks like a plausible
+        # inventory
+        root_bytes = repo._get_bytes_by_hash(root_hash)
+        ## root_obs = bdecode(root_bytes)
+