Rev 14: Store actual index lengths in the names list, allowing partial index reads. in http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk

Robert Collins robertc at robertcollins.net
Mon Jun 9 09:14:16 BST 2008


At http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk

------------------------------------------------------------
revno: 14
revision-id: robertc at robertcollins.net-20080609081415-uu3fuq592p13j97k
parent: robertc at robertcollins.net-20080609074136-sm0apl2chj8r2alp
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Mon 2008-06-09 18:14:15 +1000
message:
  Store actual index lengths in the names list, allowing partial index reads.
modified:
  index.py                       index.py-20080608055509-hnimeek7q8tctkqf-2
=== modified file 'index.py'
--- a/index.py	2008-06-09 07:41:36 +0000
+++ b/index.py	2008-06-09 08:14:15 +0000
@@ -219,19 +219,32 @@
     def _add_index(self, index):
         """Add a new component index to the list of indices."""
         # The index name is the md5sum of the revision index serialised form.
-        rev_index_bytes = index.revision_index.finish().read()
-        index_name = md5.new(rev_index_bytes).hexdigest()
+        index_bytes = index.revision_index.finish().read()
+        index_name = md5.new(index_bytes).hexdigest()
         if index_name in self._current_names:
             raise Exception("md5 collision! rad! %s" % index_name)
         # Upload preparatory to renaming into place.
         # write to disc.
-        index_value = "%d" % len(rev_index_bytes)
         self._upload_transport.put_bytes_non_atomic(index_name + ".rix",
-            rev_index_bytes)
-        doc_length = self._upload_transport.put_file_non_atomic(
-            index_name + '.dix', index.document_index.finish())
-        term_length = self._upload_transport.put_file_non_atomic(
-            index_name + '.tix', index.term_index.finish())
+            index_bytes)
+        rev_length = len(index_bytes)
+        # XXX: We should be able to do:
+        # term_length = self._upload_transport.put_file_non_atomic(
+        #     index_name + '.tix', index.term_index.finish())
+        # doc_length = self._upload_transport.put_file_non_atomic(
+        #     index_name + '.dix', index.document_index.finish())
+        # but, put_file_non_atomic is not interface tested to return the byte
+        # length.
+        index_bytes = index.term_index.finish().read()
+        term_length = len(index_bytes)
+        self._upload_transport.put_bytes_non_atomic(
+            index_name + '.tix', index_bytes)
+        index_bytes = index.document_index.finish().read()
+        doc_length = len(index_bytes)
+        self._upload_transport.put_bytes_non_atomic(
+            index_name + '.dix', index_bytes)
+        del index_bytes
+        index_value = "%d %d %d" % (rev_length, term_length, doc_length)
         # The indices are uploaded, we only need to rename to activate.
         self._refresh_indices()
         if index_name in self._current_names:
@@ -292,11 +305,15 @@
                 # XXX perhaps cross-check the size?
         for name in added_names:
             # TODO: byte length of the indices here.
-            rev_index = GraphIndex(self._indices_transport, name + '.rix', None)
-            term_index = GraphIndex(self._indices_transport, name + '.tix', None)
-            doc_index = GraphIndex(self._indices_transport, name + '.dix', None)
+            value = new_names[name][0]
+            lengths = value.split(' ')
+            rev_index = GraphIndex(self._indices_transport, name + '.rix',
+                int(lengths[0]))
+            term_index = GraphIndex(self._indices_transport, name + '.tix',
+                int(lengths[1]))
+            doc_index = GraphIndex(self._indices_transport, name + '.dix',
+                int(lengths[2]))
             component = ComponentIndex(rev_index, term_index, doc_index)
-            value = new_names[name][0]
             self._add_index_to_memory(name, value, component)
         for name in deleted_names:
             self._remove_component_from_memory(name)




More information about the bazaar-commits mailing list