Rev 18: Refactoring: make ComponentIndexBuilder responsible for returning a component index and doing the upload, rather than being an index itself. in http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk

Robert Collins robertc at robertcollins.net
Wed Jun 11 04:59:41 BST 2008


At http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk

------------------------------------------------------------
revno: 18
revision-id: robertc at robertcollins.net-20080611035938-hw90ua35seca1x1y
parent: robertc at robertcollins.net-20080610070818-tjhe1vvf007ayff0
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Wed 2008-06-11 13:59:38 +1000
message:
  Refactoring: make ComponentIndexBuilder responsible for returning a component index and doing the upload, rather than being an index itself.
modified:
  index.py                       index.py-20080608055509-hnimeek7q8tctkqf-2
=== modified file 'index.py'
--- a/index.py	2008-06-09 12:34:11 +0000
+++ b/index.py	2008-06-11 03:59:38 +0000
@@ -210,43 +210,21 @@
         # here: index inventory/paths
         # here: index revisions
         _ensure_regexes()
-        index = ComponentIndexBuilder()
+        builder = ComponentIndexBuilder()
         terms = self._terms_for_revs(locked_branch.repository,
             revisions_to_index)
-        self._add_terms(index, terms)
+        self._add_terms(builder, terms)
         for rev_id in revisions_to_index:
-            index.add_revision(rev_id)
-        self._add_index(index)
+            builder.add_revision(rev_id)
+        self._add_index(builder)
 
-    def _add_index(self, index):
+    def _add_index(self, builder):
         """Add a new component index to the list of indices."""
         # The index name is the md5sum of the revision index serialised form.
-        index_bytes = index.revision_index.finish().read()
-        index_name = md5.new(index_bytes).hexdigest()
+        index_name, index_value, elements = builder.upload_index(
+            self._upload_transport)
         if index_name in self._current_names:
             raise Exception("md5 collision! rad! %s" % index_name)
-        # Upload preparatory to renaming into place.
-        # write to disc.
-        self._upload_transport.put_bytes_non_atomic(index_name + ".rix",
-            index_bytes)
-        rev_length = len(index_bytes)
-        # XXX: We should be able to do:
-        # term_length = self._upload_transport.put_file_non_atomic(
-        #     index_name + '.tix', index.term_index.finish())
-        # doc_length = self._upload_transport.put_file_non_atomic(
-        #     index_name + '.dix', index.document_index.finish())
-        # but, put_file_non_atomic is not interface tested to return the byte
-        # length.
-        index_bytes = index.term_index.finish().read()
-        term_length = len(index_bytes)
-        self._upload_transport.put_bytes_non_atomic(
-            index_name + '.tix', index_bytes)
-        index_bytes = index.document_index.finish().read()
-        doc_length = len(index_bytes)
-        self._upload_transport.put_bytes_non_atomic(
-            index_name + '.dix', index_bytes)
-        del index_bytes
-        index_value = "%d %d %d" % (rev_length, term_length, doc_length)
         # The indices are uploaded, we only need to rename to activate.
         self._refresh_indices()
         if index_name in self._current_names:
@@ -261,10 +239,11 @@
                 new_names.add_node((name,), value, ())
             # Now, as the last step, rename the new index into place and update
             # the disk list of names.
-            for suffix in [".rix", ".dix", ".tix"]:
-                self._upload_transport.rename(index_name + suffix,
-                    '../indices/' + index_name + suffix)
+            for element in elements:
+                self._upload_transport.rename(element,
+                    '../indices/' + element)
             self._transport.put_file('names', new_names.finish())
+            index = builder.get_index()
             self._orig_names[index_name] = (index_value, index)
         finally:
             self._lock.unlock()
@@ -340,7 +319,8 @@
         found_documents = []
         # Use a set to remove duplicates
         term_keys = set([(term,) for term in termlist])
-        for term_index in self._term_indices:
+        for value, component in self._current_names.values():
+            term_index = component.term_index
             doc_references = {}
             for node in term_index.iter_entries(term_keys):
                 doc_references[node[1]] = node[2]
@@ -479,3 +459,44 @@
     def add_revision(self, revision_id):
         """List a revision as having been indexed by this index."""
         self.revision_index.add_node((revision_id,), '',  ())
+
+    def get_index(self):
+        """Returns the created index."""
+        return ComponentIndex(self.revision_index, self.term_index,
+            self.document_index)
+
+    def upload_index(self, upload_transport):
+        """Upload the index in preparation for insertion.
+
+        :param upload_transport: The transport to upload to.
+        :return: The index name, the value for the names list, and a list of
+            the filenames that comprise the index.
+        """
+        # Upload preparatory to renaming into place.
+        # write to disc.
+        index_bytes = self.revision_index.finish().read()
+        index_name = md5.new(index_bytes).hexdigest()
+        upload_transport.put_bytes_non_atomic(index_name + ".rix",
+            index_bytes)
+        rev_length = len(index_bytes)
+        # XXX: We should be able to do:
+        # term_length = upload_transport.put_file_non_atomic(
+        #     index_name + '.tix', index.term_index.finish())
+        # doc_length = upload_transport.put_file_non_atomic(
+        #     index_name + '.dix', index.document_index.finish())
+        # but, put_file_non_atomic is not interface tested to return the byte
+        # length.
+        index_bytes = self.term_index.finish().read()
+        term_length = len(index_bytes)
+        upload_transport.put_bytes_non_atomic(
+            index_name + '.tix', index_bytes)
+        index_bytes = self.document_index.finish().read()
+        doc_length = len(index_bytes)
+        upload_transport.put_bytes_non_atomic(
+            index_name + '.dix', index_bytes)
+        del index_bytes
+        index_value = "%d %d %d" % (rev_length, term_length, doc_length)
+        elements = []
+        for suffix in [".rix", ".dix", ".tix"]:
+            elements.append(index_name + suffix)
+        return index_name, index_value, elements




More information about the bazaar-commits mailing list