Rev 2802: Various pack refactorings. in http://people.ubuntu.com/~robertc/baz2.0/repository

Robert Collins robertc at robertcollins.net
Wed Oct 10 06:48:11 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/repository

------------------------------------------------------------
revno: 2802
revision-id: robertc at robertcollins.net-20071010054802-raogpryl9kq6t7ea
parent: robertc at robertcollins.net-20071010010748-95wmal5rrzkk2msh
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Wed 2007-10-10 15:48:02 +1000
message:
  Various pack refactorings.
modified:
  bzrlib/index.py                index.py-20070712131115-lolkarso50vjr64s-1
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/tests/test_repository.py test_repository.py-20060131075918-65c555b881612f4d
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py	2007-10-09 03:43:20 +0000
+++ b/bzrlib/index.py	2007-10-10 05:48:02 +0000
@@ -266,6 +266,17 @@
         self._nodes_by_key = None
         self._size = size
 
+    def __eq__(self, other):
+        """Equal when self and otherwere created with the same parameters."""
+        return (
+            type(self) == type(other) and
+            self._transport == other._transport and
+            self._name == other._name and
+            self._size == other._size)
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
     def _buffer_all(self):
         """Buffer all the index data.
 

=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2007-10-10 01:01:17 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2007-10-10 05:48:02 +0000
@@ -137,6 +137,8 @@
         self._pack_transport = pack_transport
         self._suffix_offsets = {'.rix':0, '.iix':1, '.tix':2, '.six':3}
         self.packs = []
+        # name:Pack mapping
+        self._packs = {}
 
     def add_pack_to_memory(self, pack):
         """Make a Pack object available to the repository to satisfy queries.
@@ -144,6 +146,8 @@
         :param pack: A Pack object.
         """
         self.packs.append(pack)
+        assert pack.name not in self._packs
+        self._packs[pack.name] = pack
         if self.repo._revision_all_indices is None:
             # to make this function more useful, perhaps we should make an
             # all_indices object in future?
@@ -187,7 +191,7 @@
         """
         result = []
         for name in self.names():
-            result.append(Pack(self._pack_transport, name, None, None, None))
+            result.append(self.get_pack_by_name(name))
         return result
 
     def all_pack_details(self):
@@ -252,7 +256,8 @@
                 # one revision for each to the total revision count, to get
                 # a matching distribution.
                 continue
-            existing_packs.append((revision_count, transport_and_name))
+            existing_packs.append((revision_count, transport_and_name,
+                self.get_pack_by_name(transport_and_name[1])))
         pack_operations = self.plan_autopack_combinations(
             existing_packs, pack_distribution)
         self._execute_pack_operations(pack_operations)
@@ -481,19 +486,23 @@
         :param pack_operations: A list of [revision_count, packs_to_combine].
         :return: None.
         """
-        for revision_count, pack_details in pack_operations:
+        for revision_count, pack_list in pack_operations:
             # we may have no-ops from the setup logic
-            if len(pack_details) == 0:
+            if len(pack_list) == 0:
                 continue
             # have a progress bar?
-            self._combine_packs(pack_details)
+            pack_details = [details for details,_ in pack_list]
+            packs = [pack for _, pack in pack_list]
+            assert pack_details[0].__class__ == tuple
+            self._combine_packs(pack_details, packs)
             for pack_detail in pack_details:
                 self._remove_pack_by_name(pack_detail[1])
         # record the newly available packs and stop advertising the old
         # packs
         self._save_pack_names()
         # move the old packs out of the way
-        for revision_count, pack_details in pack_operations:
+        for revision_count, pack_list in pack_operations:
+            pack_details = [details for details,_ in pack_list]
             self._obsolete_packs(pack_details)
 
     def pack(self):
@@ -520,13 +529,20 @@
                     continue
                 revision_count = index.key_count()
                 pack_operations[-1][0] += revision_count
-                pack_operations[-1][1].append(transport_and_name)
+                pack_operations[-1][1].append((transport_and_name,
+                    self.get_pack_by_name(transport_and_name[1])))
             self._execute_pack_operations(pack_operations)
         finally:
             if not self.repo.is_in_write_group():
                 self.reset()
 
     def plan_autopack_combinations(self, existing_packs, pack_distribution):
+        """Plan a pack operation.
+
+        :param existing_packs: The packs to pack.
+        :parma pack_distribution: A list with the number of revisions desired
+            in each pack.
+        """
         if len(existing_packs) <= len(pack_distribution):
             return []
         existing_packs.sort(reverse=True)
@@ -537,7 +553,7 @@
             # distribution chart we will include its contents in the new pack for
             # that position. If its larger, we remove its size from the
             # distribution chart
-            next_pack_rev_count, next_pack_details = existing_packs.pop(0)
+            next_pack_rev_count, next_pack_details, next_pack = existing_packs.pop(0)
             if next_pack_rev_count >= pack_distribution[0]:
                 # this is already packed 'better' than this, so we can
                 # not waste time packing it.
@@ -553,7 +569,7 @@
                 # add the revisions we're going to add to the next output pack
                 pack_operations[-1][0] += next_pack_rev_count
                 # allocate this pack to the next pack sub operation
-                pack_operations[-1][1].append(next_pack_details)
+                pack_operations[-1][1].append((next_pack_details, next_pack))
                 if pack_operations[-1][0] >= pack_distribution[0]:
                     # this pack is used up, shift left.
                     del pack_distribution[0]
@@ -561,7 +577,7 @@
         
         return pack_operations
 
-    def _combine_packs(self, pack_details):
+    def _combine_packs(self, pack_details, packs):
         """Combine the data from the packs listed in pack_details.
 
         This does little more than a bulk copy of data. One key difference
@@ -575,7 +591,7 @@
         :return: None
         """
         # select revision keys
-        revision_index_map = self._revision_index_map(pack_details)
+        revision_index_map = self._revision_index_map(pack_details, packs)
         # select inventory keys
         inv_index_map = self._inv_index_map(pack_details)
         # select text keys
@@ -683,6 +699,24 @@
                 sizes = [int(digits) for digits in value.split(' ')]
                 self._names[name] = sizes
 
+    def get_pack_by_name(self, name):
+        """Get a Pack object by name.
+
+        :param name: The name of the pack - e.g. '123456'
+        :return: A Pack object.
+        """
+        try:
+            return self._packs[name]
+        except KeyError:
+            rev_index = self._make_index(name, '.rix')
+            inv_index = self._make_index(name, '.iix')
+            txt_index = self._make_index(name, '.tix')
+            sig_index = self._make_index(name, '.six')
+            result = Pack(self._pack_transport, name, rev_index, inv_index,
+                txt_index, sig_index)
+            self._packs[name] = result
+            return result
+
     def allocate(self, name, revision_index_length, inventory_index_length,
         text_index_length, signature_index_length):
         """Allocate name in the list of packs.
@@ -711,20 +745,20 @@
         objects, and pack_map is a mapping from those objects to the 
         pack tuple they describe.
         """
-        size_offset = self._suffix_offsets[suffix]
-        indices = []
-        pack_map = {}
         self.ensure_loaded()
+        details = []
         for name in self.names():
             # TODO: maybe this should expose size to us  to allow
             # sorting of the indices for better performance ?
-            index_name = name + suffix
-            index_size = self._names[name][size_offset]
-            new_index = GraphIndex(
-                self._index_transport, index_name, index_size)
-            indices.append(new_index)
-            pack_map[new_index] = self._pack_tuple(name)
-        return pack_map, indices
+            details.append(self._pack_tuple(name))
+        return self._make_index_to_pack_map(details, suffix)
+
+    def _make_index(self, name, suffix):
+        size_offset = self._suffix_offsets[suffix]
+        index_name = name + suffix
+        index_size = self._names[name][size_offset]
+        return GraphIndex(
+            self._index_transport, index_name, index_size)
 
     def _max_pack_count(self, total_revisions):
         """Return the maximum number of packs to use for total revisions.
@@ -795,6 +829,7 @@
     def reset(self):
         self._names = None
         self.packs = []
+        self._packs = {}
 
     def _make_index_to_pack_map(self, pack_details, index_suffix):
         """Given a list (transport,name), return a map of (index)->(transport, name)."""
@@ -802,30 +837,31 @@
         # this should really reuse the existing index objects for these 
         # packs - this means making the way they are managed in the repo be 
         # more sane.
-        size_offset = self._suffix_offsets[index_suffix]
-        indices = {}
+        indices = []
+        pack_map = {}
+        self.ensure_loaded()
         for transport, name in pack_details:
             index_name = name[:-5] + index_suffix
-            index_size = self._names[index_name][index_size]
-            indices[GraphIndex(self._index_transport, index_name, index_size)] = \
-                (transport, name)
-        return indices
+            new_index = self._make_index(index_name, index_suffix)
+            indices.append(new_index)
+            pack_map[new_index] = (transport, name)
+        return pack_map, indices
 
     def _inv_index_map(self, pack_details):
         """Get a map of inv index -> packs for pack_details."""
-        return self._make_index_to_pack_map(pack_details, '.iix')
+        return self._make_index_to_pack_map(pack_details, '.iix')[0]
 
-    def _revision_index_map(self, pack_details):
+    def _revision_index_map(self, pack_details, packs):
         """Get a map of revision index -> packs for pack_details."""
-        return self._make_index_to_pack_map(pack_details, '.rix')
+        return self._make_index_to_pack_map(pack_details, '.rix')[0]
 
     def _signature_index_map(self, pack_details):
         """Get a map of signature index -> packs for pack_details."""
-        return self._make_index_to_pack_map(pack_details, '.six')
+        return self._make_index_to_pack_map(pack_details, '.six')[0]
 
     def _text_index_map(self, pack_details):
         """Get a map of text index -> packs for pack_details."""
-        return self._make_index_to_pack_map(pack_details, '.tix')
+        return self._make_index_to_pack_map(pack_details, '.tix')[0]
 
     def _index_contents(self, pack_map, key_filter=None):
         """Get an iterable of the index contents from a pack_map.

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2007-10-05 03:33:40 +0000
+++ b/bzrlib/repository.py	2007-10-10 05:48:02 +0000
@@ -2343,7 +2343,8 @@
             except errors.NoSuchRevision:
                 raise errors.InstallFailed([revision_id])
         packs = self.source._packs.all_pack_details()
-        revision_index_map = self.source._packs._revision_index_map(packs)
+        _packs = self.source._packs.all_packs()
+        revision_index_map = self.source._packs._revision_index_map(packs, _packs)
         inventory_index_map = self.source._packs._inv_index_map(packs)
         text_index_map = self.source._packs._text_index_map(packs)
         signature_index_map = self.source._packs._signature_index_map(packs)

=== modified file 'bzrlib/tests/test_repository.py'
--- a/bzrlib/tests/test_repository.py	2007-10-10 01:07:48 +0000
+++ b/bzrlib/tests/test_repository.py	2007-10-10 05:48:02 +0000
@@ -913,8 +913,9 @@
         self.addCleanup(tree.unlock)
         packs = tree.branch.repository._packs
         packs.ensure_loaded()
-        self.assertEqual([pack_repo.Pack(packs._pack_transport,
-            packs.names()[0], None, None, None, None)], packs.all_packs())
+        self.assertEqual([
+            packs.get_pack_by_name(packs.names()[0])],
+            packs.all_packs())
 
     def test_all_packs_two(self):
         format = self.get_format()
@@ -926,12 +927,35 @@
         packs = tree.branch.repository._packs
         packs.ensure_loaded()
         self.assertEqual([
-            pack_repo.Pack(packs._pack_transport,
-                packs.names()[0], None, None, None, None),
-            pack_repo.Pack(packs._pack_transport,
-                packs.names()[1], None, None, None, None),
+            packs.get_pack_by_name(packs.names()[0]),
+            packs.get_pack_by_name(packs.names()[1]),
             ], packs.all_packs())
 
+    def test_get_pack_by_name(self):
+        format = self.get_format()
+        tree = self.make_branch_and_tree('.', format=format)
+        tree.commit('start')
+        tree.lock_read()
+        self.addCleanup(tree.unlock)
+        packs = tree.branch.repository._packs
+        packs.ensure_loaded()
+        name = packs.names()[0]
+        pack_1 = packs.get_pack_by_name(name)
+        # the pack should be correctly initialised
+        rev_index = GraphIndex(packs._index_transport, name + '.rix',
+            packs._names[name][0])
+        inv_index = GraphIndex(packs._index_transport, name + '.iix',
+            packs._names[name][1])
+        txt_index = GraphIndex(packs._index_transport, name + '.tix',
+            packs._names[name][2])
+        sig_index = GraphIndex(packs._index_transport, name + '.six',
+            packs._names[name][3])
+        self.assertEqual(pack_repo.Pack(packs._pack_transport,
+                packs.names()[0], rev_index, inv_index, txt_index, sig_index),
+            pack_1)
+        # and the same instance should be returned on successive calls.
+        self.assertTrue(pack_1 is packs.get_pack_by_name(name))
+
 
 class TestPack(TestCaseWithTransport):
     """Tests for the Pack object."""



More information about the bazaar-commits mailing list