Rev 3764: Possible fix for bug #269456. in http://bzr.arbash-meinel.com/branches/bzr/1.8-dev/lighter_iter_files_bytes
John Arbash Meinel
john at arbash-meinel.com
Fri Oct 3 17:14:45 BST 2008
At http://bzr.arbash-meinel.com/branches/bzr/1.8-dev/lighter_iter_files_bytes
------------------------------------------------------------
revno: 3764
revision-id: john at arbash-meinel.com-20081003161439-h23zdckp4z78wh3r
parent: pqm at pqm.ubuntu.com-20081002172844-d6df1l8dzpsqzyup
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: lighter_iter_files_bytes
timestamp: Fri 2008-10-03 11:14:39 -0500
message:
Possible fix for bug #269456.
During 'get_record_stream()' only unpack one file's content map
at a time. This helps prevent abusive consumption of memory.
Also, free the lines cache as texts are consumed.
-------------- next part --------------
=== modified file 'NEWS'
--- a/NEWS 2008-10-02 17:28:44 +0000
+++ b/NEWS 2008-10-03 16:14:39 +0000
@@ -83,6 +83,10 @@
repository now preserves the repository format.
(Andrew Bennetts, #269214)
+ * ``bzr co`` uses less memory. It used to unpack the entire WT into
+ memory before writing it to disk. This was a little bit faster, but
+ consumed lots of memory. (John Arbash Meinel, #269456)
+
* ``bzr log`` now accepts a ``--change`` option.
(Vincent Ladeuil, #248427)
=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py 2008-10-01 05:40:45 +0000
+++ b/bzrlib/knit.py 2008-10-03 16:14:39 +0000
@@ -1124,6 +1124,26 @@
record_map[key] = record, record_details, digest, next
return record_map
+ def _split_by_prefix(self, keys):
+ """For the given keys, split them up based on their prefix.
+
+ To keep memory pressure somewhat under control, split the
+ requests back into per-file-id requests, otherwise "bzr co"
+ extracts the full tree into memory before writing it to disk.
+ This should be revisited if _get_content_maps() can ever cross
+ file-id boundaries.
+
+ :param keys: An iterable of key tuples
+ :return: A dict of {prefix: [key_list]}
+ """
+ split_by_prefix = {}
+ for key in keys:
+ if len(key) == 1:
+ split_by_prefix.setdefault('', []).append(key)
+ else:
+ split_by_prefix.setdefault(key[0], []).append(key)
+ return split_by_prefix
+
def get_record_stream(self, keys, ordering, include_delta_closure):
"""Get a stream of records for keys.
@@ -1223,11 +1243,18 @@
if include_delta_closure:
# XXX: get_content_maps performs its own index queries; allow state
# to be passed in.
- text_map, _ = self._get_content_maps(present_keys,
- needed_from_fallback - absent_keys)
- for key in present_keys:
- yield FulltextContentFactory(key, global_map[key], None,
- ''.join(text_map[key]))
+ non_local_keys = needed_from_fallback - absent_keys
+ prefix_split_keys = self._split_by_prefix(present_keys)
+ prefix_split_non_local_keys = self._split_by_prefix(non_local_keys)
+ for prefix, keys in prefix_split_keys.iteritems():
+ non_local = prefix_split_non_local_keys.get(prefix, [])
+ non_local = set(non_local)
+ text_map, _ = self._get_content_maps(keys, non_local)
+ for key in keys:
+ lines = text_map.pop(key)
+ text = ''.join(lines)
+ yield FulltextContentFactory(key, global_map[key], None,
+ text)
else:
for source, keys in source_keys:
if source is parent_maps[0]:
More information about the bazaar-commits
mailing list