Rev 64: Get format 2 using btree indices (but suggestion will fail at this point). in http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk

Robert Collins robertc at robertcollins.net
Tue Dec 2 21:06:34 GMT 2008


At http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk

------------------------------------------------------------
revno: 64
revision-id: robertc at robertcollins.net-20081202210630-zn6zbcm7lfkqi61x
parent: robertc at robertcollins.net-20081202205604-307gpsaw7whse1b2
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Wed 2008-12-03 08:06:30 +1100
message:
  Get format 2 using btree indices (but suggestion will fail at this point).
modified:
  index.py                       index.py-20080608055509-hnimeek7q8tctkqf-2
  tests/test_index.py            test_index.py-20080608055509-hnimeek7q8tctkqf-4
=== modified file 'index.py'
--- a/index.py	2008-12-02 20:56:04 +0000
+++ b/index.py	2008-12-02 21:06:30 +0000
@@ -71,11 +71,8 @@
 
 _FORMAT_1 = 'bzr-search search folder 1\n'
 _FORMAT_2 = 'bzr-search search folder 2\n'
-_FORMATS = {
-    # format: index builder, index reader, index deletes
-    _FORMAT_1:(InMemoryGraphIndex, GraphIndex, False),
-    _FORMAT_2:(BTreeBuilder, BTreeGraphIndex, True)
-    }
+# _FORMATS definitions are the end of the module, so that they can use index
+# subclasses.
 _tokeniser_re = None
 
 
@@ -407,7 +404,7 @@
                 raise Exception(
                     "md5 collision with concurrent writer! rad! %s" % index_name)
             # Serialise the index list
-            new_names = InMemoryGraphIndex(0, 1)
+            new_names = self._format[0](0, 1)
             new_names.add_node((index_name,), index_value, ())
             for name, (value, index) in self._current_names.items():
                 new_names.add_node((name,), value, ())
@@ -456,7 +453,8 @@
         # B) we don't want to regress infinitely; a flag to _add_index would do
         # this.
         # C) We need to remove components too.
-        combiner = ComponentCombiner(components, self._upload_transport)
+        combiner = ComponentCombiner(self._format, components,
+            self._upload_transport)
         self._add_index(combiner, to_remove=components, allow_pack=False)
         
     def _add_index_to_memory(self, name, value, index):
@@ -477,7 +475,7 @@
         :param to_remove: An optional list of components to remove from memory
             even if they are still listed on disk.
         """
-        names = GraphIndex(self._transport, 'names', None)
+        names = self._format[1](self._transport, 'names', None)
         new_names = {}
         merged_names = {}
         deleted_names = set()
@@ -578,7 +576,7 @@
             filemap = {post_name:(posting_start, posting_stop)}
             view = FileView(self._indices_transport, component.name + '.pack',
                 filemap)
-            post_index = GraphIndex(view, post_name, posting_length)
+            post_index = self._format[1](view, post_name, posting_length)
             common_doc_keys = set([node[1] for node in
                 post_index.iter_all_entries()])
             # Now we whittle down the nodes we need - still going in sorted
@@ -637,7 +635,7 @@
         filemap = {post_name:(posting_start, posting_stop)}
         view = FileView(self._indices_transport,
             component.name + '.pack', filemap)
-        post_index = GraphIndex(view, post_name, posting_length)
+        post_index = self._format[1](view, post_name, posting_length)
         return set([node[1] for node in post_index.iter_entries(key_filter)])
 
     def suggest(self, termlist):
@@ -670,7 +668,7 @@
                     filemap = {post_name:(posting_start, posting_stop)}
                     view = FileView(self._indices_transport,
                         component.name + '.pack', filemap)
-                    post_index = GraphIndex(view, post_name, posting_length)
+                    post_index = self._format[1](view, post_name, posting_length)
                     common_doc_keys = set([node[1] for node in
                         post_index.iter_entries(common_doc_keys)])
                     if len(common_doc_keys):
@@ -907,18 +905,18 @@
             "documents": (lengths[4], lengths[4] + lengths[5]),
             "terms_2": (lengths[6], lengths[6] + lengths[7]),
             }
+        self._format = format
         view = FileView(transport, name + '.pack', filemap)
-        rev_index = GraphIndex(view, "revisions", lengths[1])
-        term_index = SuggestableGraphIndex(view, "terms", lengths[3])
-        term_2_index = GraphIndex(view, "terms_2", lengths[7])
-        doc_index = GraphIndex(view, "documents", lengths[5])
+        rev_index = self._format[1](view, "revisions", lengths[1])
+        term_index = self._format[1](view, "terms", lengths[3])
+        term_2_index = self._format[1](view, "terms_2", lengths[7])
+        doc_index = self._format[1](view, "documents", lengths[5])
         self.revision_index = rev_index
         self.term_index = term_index
         self.term_2_index = term_2_index
         self.document_index = doc_index
         self.name = name
         self.transport = transport
-        self.format = format
 
     def all_terms(self):
         """As per Index, but for a single component."""
@@ -936,7 +934,7 @@
             post_name = "term_list." + term_id
             filemap = {post_name:(posting_start, posting_stop)}
             view = FileView(self.transport, self.name + '.pack', filemap)
-            post_index = GraphIndex(view, post_name, posting_length)
+            post_index = self._format[1](view, post_name, posting_length)
             doc_ids = set([node[1] for node in
                 post_index.iter_all_entries()])
             posting_list = set(self._document_ids_to_keys(doc_ids))
@@ -1003,12 +1001,12 @@
     """Creates a component index."""
 
     def __init__(self, format):
-        self.document_index = InMemoryGraphIndex(0, 1)
+        self.document_index = format[0](0, 1)
         self._document_ids = {}
         self.terms = {}
-        self.revision_index = InMemoryGraphIndex(0, 1)
+        self.revision_index = format[0](0, 1)
         self.posting_lists = {}
-        self.format = format
+        self._format = format
 
     def add_term(self, term, posting_list):
         """Add a term to the index.
@@ -1082,11 +1080,11 @@
         # generate a new term index with the length of the serialised posting
         # lists.
         term_indices = {}
-        term_indices[1] = InMemoryGraphIndex(0, 1)
-        term_indices[2] = InMemoryGraphIndex(0, 2)
+        term_indices[1] = self._format[0](0, 1)
+        term_indices[2] = self._format[0](0, 2)
         for term, term_id in self.terms.iteritems():
             posting_list = self.posting_lists[term_id]
-            post_index = InMemoryGraphIndex(0, 1)
+            post_index = self._format[0](0, 1)
             for doc_id in posting_list:
                 post_index.add_node((doc_id,), "", ())
             posting_name = "term_list." + term_id
@@ -1119,9 +1117,10 @@
 class ComponentCombiner(ComponentCreator):
     """Combines components into a new single larger component."""
 
-    def __init__(self, components, transport):
+    def __init__(self, format, components, transport):
         """Create a combiner.
 
+        :param format: The format of component to create.
         :param components: An iterable of components.
         :param transport: A transport to upload the combined component to.
         :return: A tuple - the component name, the value for the names file,
@@ -1129,6 +1128,7 @@
         """
         self.components = list(components)
         self.transport = transport
+        self._format = format
     
     def _copy_documents(self):
         """Copy the document references from components to a new component.
@@ -1137,7 +1137,7 @@
         component's document ids to the output document ids.
         """
         self._document_ids = {}
-        self.document_index = InMemoryGraphIndex(0, 1)
+        self.document_index = self._format[0](0, 1)
         self.component_docids = {}
         for component in self.components:
             component_docs = {}
@@ -1161,8 +1161,8 @@
         and self.terms to determine what to copy from.
         It populates self.term_index as it progresses.
         """
-        term_indices = {1:InMemoryGraphIndex(0, 1),
-            2:InMemoryGraphIndex(0, 2)
+        term_indices = {1:self._format[0](0, 1),
+            2:self._format[0](0, 2)
             }
         for term, posting_lists in self.terms.iteritems():
             posting_list = set()
@@ -1176,11 +1176,11 @@
                 filemap = {post_name:(posting_start, posting_stop)}
                 view = FileView(component.transport,
                     component.name + '.pack', filemap)
-                post_index = GraphIndex(view, post_name, posting_length)
+                post_index = self._format[1](view, post_name, posting_length)
                 doc_mapping = self.component_docids[component]
                 for node in post_index.iter_all_entries():
                     posting_list.add(doc_mapping[node[1]])
-            post_index = InMemoryGraphIndex(0, 1)
+            post_index = self._format[0](0, 1)
             for doc_id in posting_list:
                 post_index.add_node((doc_id,), '', ())
             term_id = str(term_indices[1].key_count() +
@@ -1207,7 +1207,7 @@
         for component in self.components:
             for node in component.revision_index.iter_all_entries():
                 revisions.add(node[1])
-        revision_index = InMemoryGraphIndex(0, 1)
+        revision_index = self._format[0](0, 1)
         for revision in revisions:
             revision_index.add_node(revision, '', ())
         index_file = revision_index.finish()
@@ -1442,3 +1442,10 @@
     if _tokeniser_re.search(regex):
         return None
     return [(regex,)]
+
+
+_FORMATS = {
+    # format: index builder, index reader, index deletes
+    _FORMAT_1:(InMemoryGraphIndex, SuggestableGraphIndex, False),
+    _FORMAT_2:(BTreeBuilder, BTreeGraphIndex, True)
+    }

=== modified file 'tests/test_index.py'
--- a/tests/test_index.py	2008-12-02 20:56:04 +0000
+++ b/tests/test_index.py	2008-12-02 21:06:30 +0000
@@ -561,7 +561,7 @@
         name, value, elements = builder.upload_index(transport)
         component2 = index.ComponentIndex(self.format, name, value, transport)
         components.append(component2)
-        combiner = index.ComponentCombiner(components, transport)
+        combiner = index.ComponentCombiner(self.format, components, transport)
         name, value, elements = combiner.combine()
         combined = index.ComponentIndex(self.format, name, value, transport)
         terms = {}
@@ -589,7 +589,7 @@
         name, value, elements = builder.upload_index(transport)
         component2 = index.ComponentIndex(self.format, name, value, transport)
         components.append(component2)
-        combiner = index.ComponentCombiner(components, transport)
+        combiner = index.ComponentCombiner(self.format, components, transport)
         name, value, elements = combiner.combine()
         combined = index.ComponentIndex(self.format, name, value, transport)
         terms = {('file', 'revid'): set([('p', '', 'file path')])}




More information about the bazaar-commits mailing list