Rev 43: Partial support for bzr-svn branches - they are indexed without full texts for now. in http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk

Robert Collins robertc at robertcollins.net
Sun Jun 22 11:04:39 BST 2008


At http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk

------------------------------------------------------------
revno: 43
revision-id: robertc at robertcollins.net-20080622100438-raipn9bo8wq9sezf
parent: robertc at robertcollins.net-20080622054515-k4qwckj2gfgoip8j
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Sun 2008-06-22 20:04:38 +1000
message:
  Partial support for bzr-svn branches - they are indexed without full texts for now.
modified:
  DESIGN                         design-20080608072426-vjoj110dtykfyb7g-1
  index.py                       index.py-20080608055509-hnimeek7q8tctkqf-2
=== modified file 'DESIGN'
--- a/DESIGN	2008-06-22 05:45:15 +0000
+++ b/DESIGN	2008-06-22 10:04:38 +0000
@@ -90,12 +90,14 @@
 There are many possible places to store indices. Ideally they get deleted when
 a branch is deleted, can be shared between branches, update automatically etc.
 For now, I'm going to store them in .bzr/bzr-search when the bzrdir is a
-MetaDir, and just refuse to index other branches. To index an svn branch, I
-would expect to use a look-aside table and store the index in e.g.
-~/.bazaar/search/<encoded_url>. Storing in bzr-search is a bit of an
-abstraction violation, but we have a split-out structure for a reason, and by
-prefixing it with bzr-search I leave 'search' available for a 'core' version of
-this feature.
+MetaDir, and just refuse to index other branches.  Storing in bzr-search is a
+bit of an abstraction violation, but we have a split-out structure for a
+reason, and by prefixing it with bzr-search I leave 'search' available for a
+'core' version of this feature.
+
+When indexing a bzr-svn branch (actual svn branch, not a branch created *from*
+svn), the index is stored in
+$bzrconfig/bzr-search/svn-lookaside/SVN_UUID/BRANCHPATH.
 
 Search engine to use
 ++++++++++++++++++++

=== modified file 'index.py'
--- a/index.py	2008-06-22 05:45:15 +0000
+++ b/index.py	2008-06-22 10:04:38 +0000
@@ -25,6 +25,7 @@
 from bzrlib import branch as _mod_branch
 from bzrlib import ui
 from bzrlib.bzrdir import BzrDirMeta1
+import bzrlib.config
 from bzrlib.errors import NotBranchError, NoSuchFile, UnknownFormatError
 from bzrlib.index import CombinedGraphIndex, GraphIndex, InMemoryGraphIndex
 from bzrlib.lockdir import LockDir
@@ -35,6 +36,7 @@
 from bzrlib.plugins.search.transport import FileView
 from bzrlib.multiparent import NewText
 from bzrlib.revision import NULL_REVISION
+from bzrlib.transport import get_transport
 from bzrlib.tsort import topo_sort
 
 
@@ -53,12 +55,29 @@
 
 def init_index(branch):
     """Initialise an index on branch."""
-    if not isinstance(branch.bzrdir, BzrDirMeta1):
+    if isinstance(branch.bzrdir, BzrDirMeta1):
+        transport = branch.bzrdir.transport
+        transport.mkdir('bzr-search')
+        index_transport = transport.clone('bzr-search')
+    else:
         # We don't know how to handle this format.
-        raise errors.CannotIndex(branch)
-    transport = branch.bzrdir.transport
-    transport.mkdir('bzr-search')
-    index_transport = transport.clone('bzr-search')
+        try:
+            from bzrlib.plugins.svn.branch import SvnBranch
+        except ImportError:
+            SvnBranch = None
+        if type(branch) != SvnBranch:
+            raise errors.CannotIndex(branch)
+        # We can't write to the 'bzrdir' as it is virtual
+        uuid = branch.repository.uuid
+        branch_path = branch.get_branch_path()
+        config = bzrlib.config.config_dir()
+        transport = get_transport(bzrlib.config.config_dir())
+        path = 'bzr-search/svn-lookaside/' + uuid + '/' + branch_path
+        paths = path.split('/')
+        for path in paths:
+            transport = transport.clone(path)
+            transport.ensure_base()
+        index_transport = transport
     lockdir = LockDir(index_transport, 'names-lock')
     lockdir.create()
     lockdir.lock_write()
@@ -127,7 +146,23 @@
     :param branch: The branch to get an index for.
     :raises: NoSearchIndex if no index can be located.
     """
-    return Index(branch.bzrdir.transport.clone('bzr-search'), branch)
+    try:
+        from bzrlib.plugins.svn.branch import SvnBranch
+    except ImportError:
+        SvnBranch = None
+    if type(branch) == SvnBranch:
+        # We can't write to the 'bzrdir' as it is virtual
+        uuid = branch.repository.uuid
+        branch_path = branch.get_branch_path()
+        config = bzrlib.config.config_dir()
+        transport = get_transport(bzrlib.config.config_dir())
+        path = 'bzr-search/svn-lookaside/' + uuid + '/' + branch_path
+        transport = transport.clone(path)
+        commits_only = True
+    else:
+        transport = branch.bzrdir.transport.clone('bzr-search')
+        commits_only = False
+    return Index(transport, branch, commits_only=commits_only)
 
 
 # XXX: This wants to be a PackCollection subclass with RepositoryPackCollection
@@ -135,11 +170,15 @@
 class Index(object):
     """A bzr content index."""
 
-    def __init__(self, index_transport, branch):
+    def __init__(self, index_transport, branch, commits_only=False):
         """Create an index stored at index_transport.
 
         :param index_transport: The path where the index data should be stored.
         :param branch: The branch this Index is indexing.
+        :param commits_only: If True, when indexing only attempt to index
+            commits, not file texts. Useful for foreign formats (often commits
+            are the most mature part of such plugins), or for some projects
+            where file contents may not be useful to index.
         """
         self._transport = index_transport
         try:
@@ -160,6 +199,7 @@
         # CombinedGraphIndex for grouping the term indices or doc indices.
         self._lock = LockDir(index_transport, 'names-lock')
         self._branch = branch
+        self._commits_only = commits_only
 
     def _add_terms(self, index, terms):
         """Add a set of term posting lists to a in progress index.
@@ -276,13 +316,14 @@
             steps = ui.ui_factory.nested_progress_bar()
             try:
                 steps.update("Indexing texts", 0, 4)
-                terms = self._terms_for_texts(locked_branch.repository,
-                    revision_group)
-                self._add_terms(builder, terms)
-                steps.update("Indexing paths", 1, 4)
-                terms = self._terms_for_file_terms(locked_branch.repository,
-                    terms, order_dict)
-                self._add_terms(builder, terms)
+                if not self._commits_only:
+                    terms = self._terms_for_texts(locked_branch.repository,
+                        revision_group)
+                    self._add_terms(builder, terms)
+                    steps.update("Indexing paths", 1, 4)
+                    terms = self._terms_for_file_terms(
+                        locked_branch.repository, terms, order_dict)
+                    self._add_terms(builder, terms)
                 steps.update("Indexing commits", 2, 4)
                 terms = self._terms_for_revs(locked_branch.repository,
                     revision_group)




More information about the bazaar-commits mailing list