Rev 58: Support for split-inventory branches. in http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk
Robert Collins
robertc at robertcollins.net
Sun Nov 16 23:01:06 GMT 2008
At http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk
------------------------------------------------------------
revno: 58
revision-id: robertc at robertcollins.net-20081116230104-6zytzth3br38bg63
parent: robertc at robertcollins.net-20081107001411-7d10mputo0on9qk7
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Mon 2008-11-17 10:01:04 +1100
message:
Support for split-inventory branches.
modified:
NEWS news-20080608052041-z5bahsl8kwl0uf4x-2
index.py index.py-20080608055509-hnimeek7q8tctkqf-2
=== modified file 'NEWS'
--- a/NEWS 2008-11-07 00:14:11 +0000
+++ b/NEWS 2008-11-16 23:01:04 +0000
@@ -27,6 +27,9 @@
* Compatibility with python 2.6 (as long as bzrlib is also compatible.)
(Matt Nordhoff, Robert Collins)
+ * Compatibility with split-inventory repositories (requires a bzrlib that
+ supports them). (Robert Collins)
+
BUGFIXES:
* Bug 293906 caused by changes in bzrlib has been fixed. This bug caused
=== modified file 'index.py'
--- a/index.py 2008-11-07 00:14:11 +0000
+++ b/index.py 2008-11-16 23:01:04 +0000
@@ -43,6 +43,32 @@
from bzrlib.plugins.search.transport import FileView
from bzrlib.multiparent import NewText
from bzrlib.revision import NULL_REVISION
+xml_serializers = []
+try:
+ from bzrlib.xml4 import _Serializer_v4
+ xml_serializers.append(_Serializer_v4)
+except ImportError:
+ pass
+try:
+ from bzrlib.xml5 import Serializer_v5
+ xml_serializers.append(Serializer_v5)
+except ImportError:
+ pass
+try:
+ from bzrlib.xml6 import Serializer_v6
+ xml_serializers.append(Serializer_v6)
+except ImportError:
+ pass
+try:
+ from bzrlib.xml7 import Serializer_v7
+ xml_serializers.append(Serializer_v7)
+except ImportError:
+ pass
+try:
+ from bzrlib.xml8 import Serializer_v8
+ xml_serializers.append(Serializer_v8)
+except ImportError:
+ pass
from bzrlib.transport import get_transport
from bzrlib.tsort import topo_sort
@@ -311,7 +337,7 @@
for pos, revid in enumerate(order):
order_dict[revid] = pos
# 5000 uses 1GB on a mysql tree.
- group_size = 2500
+ group_size = 50
groups = len(order) / group_size + 1
for offset in range(groups):
outer_bar.update("Indexing...", offset, groups)
@@ -662,26 +688,26 @@
for offset in range(groups):
bar.update("Extract revision paths", offset, groups)
inventory_group = order[offset * group_size:(offset + 1) * group_size]
- group_keys = [(revid,) for revid in inventory_group]
- stream = repository.inventories.get_record_stream(
- group_keys, 'unordered', True)
serializer = repository._serializer
- # For VersionedFiles:
- # for xml in repository._iter_inventory_xmls(inventory_group):
- # pass
- for record in stream:
- bytes = record.get_bytes_as('fulltext')
- revision_id = record.key[-1]
- path_dict = paths_from_ids(bytes, serializer,
- revision_ids[revision_id])
- for file_id, path in path_dict.iteritems():
- terms[(file_id, revision_id)] = [('p', '', path)]
- # Public api way - 5+ times slower:
- # for inventory in repository.iter_inventories(inventory_group):
- # # revision_id = inventory.revision_id
- # for file_id in revision_ids[revision_id]:
- # path = inventory.id2path(file_id)
- # terms[(file_id, revision_id)] = [('p', '', path)]
+ if type(serializer) in xml_serializers:
+ # Fast path for flat-file serializers.
+ group_keys = [(revid,) for revid in inventory_group]
+ stream = repository.inventories.get_record_stream(
+ group_keys, 'unordered', True)
+ for record in stream:
+ bytes = record.get_bytes_as('fulltext')
+ revision_id = record.key[-1]
+ path_dict = paths_from_ids(bytes, serializer,
+ revision_ids[revision_id])
+ for file_id, path in path_dict.iteritems():
+ terms[(file_id, revision_id)] = [('p', '', path)]
+ else:
+ # Public api way - 5+ times slower on xml inventories
+ for inventory in repository.iter_inventories(inventory_group):
+ revision_id = inventory.revision_id
+ for file_id in revision_ids[revision_id]:
+ path = inventory.id2path(file_id)
+ terms[(file_id, revision_id)] = [('p', '', path)]
finally:
bar.finished()
return terms.iteritems()
More information about the bazaar-commits
mailing list