Rev 67: Fix/limit memory use in mpdiff generation for dealing with busy trees. in http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk
Robert Collins
robertc at robertcollins.net
Wed Jan 21 04:43:08 GMT 2009
At http://people.ubuntu.com/~robertc/baz2.0/plugins/search/trunk
------------------------------------------------------------
revno: 67
revision-id: robertc at robertcollins.net-20090121044253-cm1ogklzp31qpkvj
parent: robertc at robertcollins.net-20081202223633-et3bqd5i8d3qnu94
committer: Robert Collins <robertc at robertcollins.net>
branch nick: trunk
timestamp: Wed 2009-01-21 15:42:53 +1100
message:
Fix/limit memory use in mpdiff generation for dealing with busy trees.
=== modified file 'index.py'
--- a/index.py 2008-12-02 22:36:33 +0000
+++ b/index.py 2009-01-21 04:42:53 +0000
@@ -775,18 +775,23 @@
for file_version in item[2]:
file_versions.add((item[1], file_version))
for file_id, file_keys in files.iteritems():
- for diff, key in zip(repository.texts.make_mpdiffs(file_keys),
- file_keys):
- document_key = ('f',) + key
- for hunk in diff.hunks:
- if type(hunk) == NewText:
- for line in hunk.lines:
- line_terms = _tokeniser_re.split(line)
- for term in line_terms:
- if not term:
- continue
- posting_list = terms.setdefault((term,), set())
- posting_list.add(document_key)
+ file_keys = list(file_keys)
+ group_size = 100
+ groups = len(file_keys) / group_size + 1
+ for offset in range(groups):
+ file_key_group = file_keys[offset * group_size:(offset + 1) * group_size]
+ for diff, key in zip(repository.texts.make_mpdiffs(file_key_group),
+ file_key_group):
+ document_key = ('f',) + key
+ for hunk in diff.hunks:
+ if type(hunk) == NewText:
+ for line in hunk.lines:
+ line_terms = _tokeniser_re.split(line)
+ for term in line_terms:
+ if not term:
+ continue
+ posting_list = terms.setdefault((term,), set())
+ posting_list.add(document_key)
return terms.items()
More information about the bazaar-commits
mailing list