Rev 2822: Move some text index logic to NewPack. in http://people.ubuntu.com/~robertc/baz2.0/repository

Robert Collins robertc at robertcollins.net
Mon Oct 15 06:36:20 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/repository

------------------------------------------------------------
revno: 2822
revision-id: robertc at robertcollins.net-20071015053607-y3r9wtrhne469282
parent: robertc at robertcollins.net-20071015045749-xld4bvkdad4t3mod
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Mon 2007-10-15 15:36:07 +1000
message:
  Move some text index logic to NewPack.
modified:
  bzrlib/repofmt/pack_repo.py    pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py	2007-10-15 04:57:49 +0000
+++ b/bzrlib/repofmt/pack_repo.py	2007-10-15 05:36:07 +0000
@@ -98,16 +98,21 @@
     ExistingPack and NewPack are used.
     """
 
-    def __init__(self, revision_index, inventory_index):
+    def __init__(self, revision_index, inventory_index, text_index):
         """Create a pack instance.
 
         :param revision_index: A GraphIndex for determining what revisions are
             present in the Pack and accessing the locations of their texts.
         :param inventory_index: A GraphIndex for determining what inventories are
-            present in the Pack and accessing the locations of their texts.
+            present in the Pack and accessing the locations of their
+            texts/deltas.
+        :param text_index: A GraphIndex for determining what file texts
+        are present in the pack and accessing the locations of their
+        texts/deltas (via (fileid, revisionid) tuples).
         """
         self.revision_index = revision_index
         self.inventory_index = inventory_index
+        self.text_index = text_index
 
     def get_revision_count(self):
         return self.revision_index.key_count()
@@ -134,7 +139,7 @@
 
     def __init__(self, transport, name, revision_index, inventory_index,
         text_index, signature_index):
-        Pack.__init__(self, revision_index, inventory_index)
+        Pack.__init__(self, revision_index, inventory_index, text_index)
         self.text_index = text_index
         self.signature_index = signature_index
         self.name = name
@@ -180,7 +185,11 @@
             # knit code hasn't been updated enough to understand that, so we
             # have a regular 2-list index giving parents and compression
             # source.
-            InMemoryGraphIndex(reference_lists=2))
+            InMemoryGraphIndex(reference_lists=2),
+            # Texts: compression and per file graph, for all fileids - so two
+            # reference lists and two elements in the key tuple.
+            InMemoryGraphIndex(reference_lists=2, key_elements=2),
+            )
         # where should the new pack be opened
         self.upload_transport = upload_transport
         # where are indices written out to
@@ -210,24 +219,30 @@
         """
         new_name = self._hash.hexdigest()
         self.index_sizes = [None, None, None, None]
-
-        revision_index_name = self.revision_index_name(new_name)
-        self.index_sizes[0] = self.index_transport.put_file(
-            revision_index_name, self.revision_index.finish())
-        if 'fetch' in debug.debug_flags:
-            # XXX: size might be interesting?
-            mutter('%s: create_pack: wrote revision index: %s%s t+%6.3fs',
-                time.ctime(), self.upload_transport.base, self.random_name,
-                time.time() - self.start_time)
-
-        inv_index_name = self.inventory_index_name(new_name)
-        self.index_sizes[1] = self.index_transport.put_file(inv_index_name,
-            self.inventory_index.finish())
-        if 'fetch' in debug.debug_flags:
-            # XXX: size might be interesting?
-            mutter('%s: create_pack: wrote inventory index: %s%s t+%6.3fs',
-                time.ctime(), self.upload_transport.base, self.random_name,
-                time.time() - self.start_time)
+        self._write_index(new_name, self.revision_index, 0,
+            self.revision_index_name, 'revision')
+        self._write_index(new_name, self.inventory_index, 1,
+            self.inventory_index_name, 'inventory')
+        self._write_index(new_name, self.text_index, 2,
+            self.text_index_name, 'file texts')
+
+    def _write_index(self, new_name, index, index_offset, name_getter, label):
+        """Write out an index.
+
+        :param new_name: The basename of the pack.
+        :param index: The index object to serialise.
+        :param index_offset: Where in self.index_sizes to remember this.
+        :param name_getter: What to use to get the name of the index on disk.
+        :param label: What label to give the index e.g. 'revision'.
+        """
+        index_name = name_getter(new_name)
+        self.index_sizes[index_offset] = self.index_transport.put_file(
+            index_name, index.finish())
+        if 'fetch' in debug.debug_flags:
+            # XXX: size might be interesting?
+            mutter('%s: create_pack: wrote %s index: %s%s t+%6.3fs',
+                time.ctime(), label, self.upload_transport.base,
+                self.random_name, time.time() - self.start_time)
 
 
 class RepositoryPackCollection(object):
@@ -293,7 +308,7 @@
         file_id_index = GraphIndexPrefixAdapter(
             self.repo._text_all_indices,
             (file_id, ), 1,
-            add_nodes_callback=self.repo._text_write_index.add_nodes)
+            add_nodes_callback=self.repo._packs._new_pack.text_index.add_nodes)
         self.repo._text_knit._index._graph_index = file_id_index
         self.repo._text_knit._index._add_callback = file_id_index.add_nodes
         return self.repo._text_knit.add_lines_with_ghosts(
@@ -422,15 +437,12 @@
             self.repo._inv_pack_map = None
         return inv_index
 
-    def flush_text_index(self, new_name):
+    def flush_text_index(self, new_name, text_index_length):
         """Write the index out to new_name."""
         # write a revision index (might be empty)
         new_index_name = self._new_pack.text_index_name(new_name)
-        text_index_length = self._index_transport.put_file(
-            new_index_name, self.repo._text_write_index.finish())
         txt_index = GraphIndex(self._index_transport, new_index_name,
             text_index_length)
-        self.repo._text_write_index = None
         self.repo.weave_store._setup_knit(False)
         if self.repo._text_all_indices is not None:
             # text 'knits' have been used, replace the mutated memory index
@@ -440,7 +452,7 @@
             # remove the write buffering index. XXX: API break
             # - clearly we need a remove_index call too.
             del self.repo._text_all_indices._indices[1]
-        return txt_index, text_index_length
+        return txt_index
 
     def create_pack_from_packs(self, packs, suffix, revision_ids=None):
         """Create a new pack by reading data from other packs.
@@ -501,7 +513,6 @@
         writer = pack.ContainerWriter(write_data)
         writer.begin()
         # open new indices
-        text_index = InMemoryGraphIndex(reference_lists=2, key_elements=2)
         signature_index = InMemoryGraphIndex(reference_lists=0)
         # select revisions
         if revision_ids:
@@ -572,11 +583,11 @@
                     a_missing_key[0])
         # copy text keys and adjust values
         list(self._copy_nodes_graph(text_nodes, text_index_map, writer,
-            text_index))
+            new_pack.text_index))
         if 'fetch' in debug.debug_flags:
             mutter('%s: create_pack: file texts copied: %s%s %d items t+%6.3fs',
                 time.ctime(), self._upload_transport.base, random_name,
-                text_index.key_count(),
+                new_pack.text_index.key_count(),
                 time.time() - new_pack.start_time)
         # select signature keys
         signature_filter = revision_keys # same keyspace
@@ -601,7 +612,7 @@
         # if nothing has been written, discard the new pack.
         if 0 == sum((new_pack.get_revision_count(),
             new_pack.inventory_index.key_count(),
-            text_index.key_count(),
+            new_pack.text_index.key_count(),
             signature_index.key_count(),
             )):
             self._upload_transport.delete(random_name)
@@ -611,15 +622,8 @@
         new_pack.finish()
         revision_index_length = new_pack.index_sizes[0]
         inventory_index_length = new_pack.index_sizes[1]
+        text_index_length = new_pack.index_sizes[2]
 
-        text_index_name = new_pack.text_index_name(new_name)
-        text_index_length = index_transport.put_file(text_index_name,
-            text_index.finish())
-        if 'fetch' in debug.debug_flags:
-            # XXX: size might be interesting?
-            mutter('%s: create_pack: wrote file texts index: %s%s t+%6.3fs',
-                time.ctime(), self._upload_transport.base, random_name,
-                time.time() - new_pack.start_time)
         signature_index_name = new_pack.signature_index_name(new_name)
         signature_index_length = index_transport.put_file(signature_index_name,
             signature_index.finish())
@@ -636,7 +640,7 @@
         write_stream.close()
         self._upload_transport.rename(random_name, '../packs/' + new_name + '.pack')
         result = ExistingPack(self._upload_transport.clone('../packs/'), new_name,
-            new_pack.revision_index, new_pack.inventory_index, text_index,
+            new_pack.revision_index, new_pack.inventory_index, new_pack.text_index,
             signature_index)
         if 'fetch' in debug.debug_flags:
             # XXX: size might be interesting?
@@ -996,8 +1000,6 @@
         self.repo._signature_write_index = None
         self.repo._signature_all_indices = None
         self.repo._signature_knit_access = None
-        # remove any accumlating index of text data
-        self.repo._text_write_index = None
         # no access object.
         self.repo._text_knit_access = None
         # no write-knit
@@ -1137,7 +1139,8 @@
             self._open_pack_writer.end()
             new_name = self._new_pack._hash.hexdigest()
             self._new_pack.finish()
-            txt_index, text_index_length = self.flush_text_index(new_name)
+            txt_index = \
+                self.flush_text_index(new_name, self._new_pack.index_sizes[2])
             inv_index = \
                 self.flush_inventory_index(new_name, self._new_pack.index_sizes[1])
             rev_index, \
@@ -1156,7 +1159,7 @@
             #   its a deliberate attack or data corruption has
             #   occuring during the write of that file.
             self.allocate(new_name, self._new_pack.index_sizes[0],
-                self._new_pack.index_sizes[1], text_index_length,
+                self._new_pack.index_sizes[1], self._new_pack.index_sizes[2],
                 signature_index_length)
             self.repo._open_pack_tuple = None
             self._new_pack = None
@@ -1313,9 +1316,8 @@
         self._transport = weavestore._transport
 
     def data_inserted(self):
-        # XXX: Should we define __len__ for indices?
-        if (getattr(self.repo, '_text_write_index', None) and
-            self.repo._text_write_index.key_count()):
+        if (self.repo._packs._new_pack is not None and
+            self.repo._packs._new_pack.text_index.key_count()):
             return True
 
     def _ensure_all_index(self, for_write=None):
@@ -1326,7 +1328,7 @@
         self.repo._text_pack_map = pack_map
         if for_write or self.repo.is_in_write_group():
             # allow writing: queue writes to a new index
-            indices.insert(0, self.repo._text_write_index)
+            indices.insert(0, self.repo._packs._new_pack.text_index)
         self._setup_knit(self.repo.is_in_write_group())
         self.repo._text_all_indices = CombinedGraphIndex(indices)
 
@@ -1337,8 +1339,8 @@
         """
         self._ensure_all_index()
         if force_write or self.repo.is_in_write_group():
-            add_callback = self.repo._text_write_index.add_nodes
-            self.repo._text_pack_map[self.repo._text_write_index] = self.repo._open_pack_tuple
+            add_callback = self.repo._packs._new_pack.text_index.add_nodes
+            self.repo._text_pack_map[self.repo._packs._new_pack.text_index] = self.repo._open_pack_tuple
         else:
             add_callback = None # no data-adding permitted.
 
@@ -1365,9 +1367,6 @@
         return iter(ids)
 
     def setup(self):
-        # setup in-memory indices to accumulate data.
-        self.repo._text_write_index = InMemoryGraphIndex(reference_lists=2,
-            key_elements=2)
         # we require that text 'knits' be accessed from within the write 
         # group to be able to be written to, simply because it makes this
         # code cleaner - we don't need to track all 'open' knits and 
@@ -1378,7 +1377,7 @@
     
     def _setup_knit(self, for_write):
         if for_write:
-            writer = (self.repo._packs._open_pack_writer, self.repo._text_write_index)
+            writer = (self.repo._packs._open_pack_writer, self.repo._packs._new_pack.text_index)
         else:
             writer = None
         self.repo._text_knit_access = _PackAccess(



More information about the bazaar-commits mailing list