Rev 5536: Some interesting results from using cython: profile=True, in http://bazaar.launchpad.net/~jameinel/bzr/2.3-gcb-peak-mem
John Arbash Meinel
john at arbash-meinel.com
Wed Dec 1 19:18:20 GMT 2010
At http://bazaar.launchpad.net/~jameinel/bzr/2.3-gcb-peak-mem
------------------------------------------------------------
revno: 5536
revision-id: john at arbash-meinel.com-20101201191814-laegibyb63r913f3
parent: john at arbash-meinel.com-20101130203740-twiaq89vtwrvyq1p
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.3-gcb-peak-mem
timestamp: Wed 2010-12-01 13:18:14 -0600
message:
Some interesting results from using cython: profile=True,
Having 'next_with_dummy()' defined as 'except? NULL' was quite a bit of
overhead. My guess is that NULL is quite common, and PyErr_Occurred is
a little expensive. All the callers would handle it gracefully (.next()
knows to handle falling off the end, and the limiting loop will just
treat it as no more entries to fill in, which is valid.
Now within spitting distance of the existing code, lots faster and lower
memory against large file content, but need to think closely about some
tweaks for match rules.
-------------- next part --------------
=== modified file 'bzrlib/_delta_index_pyx.pyx'
--- a/bzrlib/_delta_index_pyx.pyx 2010-11-30 20:37:40 +0000
+++ b/bzrlib/_delta_index_pyx.pyx 2010-12-01 19:18:14 +0000
@@ -1,3 +1,5 @@
+# cython: profile=False
+
# Copyright (C) 2010 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
@@ -57,6 +59,8 @@
unsigned char *top) nogil
+cimport cython
+
from bzrlib import trace
cdef int MAX_HASH_BUCKET_ENTRIES
@@ -148,6 +152,7 @@
self._table = rabin_hash_map._table
self._table_mask = rabin_hash_map.table_mask
+ @cython.profile(False)
cdef start(self, unsigned int rabin_val):
self.rabin_val = rabin_val
self.entry = NULL
@@ -163,7 +168,8 @@
# because RABIN isn't a super-strong hash.
self._next_hash = rabin_val
- cdef rabin_entry* next_with_dummy(self) except? NULL:
+ @cython.profile(False)
+ cdef rabin_entry* next_with_dummy(self): # noexcept
cdef rabin_entry *slot
# This uses Quadratic Probing:
# http://en.wikipedia.org/wiki/Quadratic_probing
@@ -194,11 +200,9 @@
# We found an entry that matches exactly, return it
self.match_count += 1
return slot
- raise RuntimeError("We seem to have stepped too far while searching"
- " for rabin_val = %d, we took %d steps, hash %d"
- % (self.rabin_val, self._step,
- self._next_hash))
+ return NULL
+ @cython.profile(False)
cdef next(self):
cdef rabin_entry *slot
# Don't search forever, even if we have bogus entries in the table
@@ -383,11 +387,10 @@
table_size = MIN_TABLE_SIZE
while table_size < total_entries * 2 and table_size < 2**31:
table_size = table_size * 2
- if self._table != NULL:
- trace.mutter('resizing table %08x from %d to %d'
- ' for %d new_entries, %d total'
- % (id(self), self.table_mask + 1, table_size,
- new_entries, total_entries))
+ trace.mutter('resizing table %08x from %d to %d'
+ ' for %d new_entries, %d total'
+ % (id(self), self.table_mask + 1, table_size,
+ new_entries, total_entries))
n_bytes = sizeof(rabin_entry) * table_size
new_table = <rabin_entry*>PyMem_Malloc(n_bytes)
if new_table == NULL:
@@ -692,6 +695,7 @@
cdef Py_ssize_t size
cdef rabin_entry *last
cdef unsigned int hash_offset
+ cdef int stride
cdef RabinEntrySearch search
if not PyString_CheckExact(source):
@@ -702,6 +706,25 @@
# TODO: Test that empty strings don't screw things up
# Nothing to add
return
+ stride = RABIN_WINDOW
+ if size > 10*1024*1024:
+ # For objects > 10MB, we increase the stride, so that we don't
+ # waste peak memory. The idea is that if the content is large, then
+ # either we will have very little overlap, or the overlap chunks
+ # are going to be very big. We don't need to find every possible
+ # 20-byte match.
+ # under 10MB, stride = 16
+ stride = size / 655360
+ # TODO: Consider capping the maximum stride. Rationale: we will
+ # insert up to 127 bytes before finding an 'acceptable'
+ # match. After that, we finalize the insert. So say every 200
+ # bytes has a match in a different location. We would end up
+ # inserting 127 bytes, and then matching the remaining 73
+ # bytes.
+ trace.mutter('Encountered large content %.3fMB,'
+ ' using alternate stride: %d'
+ % ((size / (1024. * 1024.)), stride))
+ self._hash_map.reserve(size / stride)
# We walk backwards through the data, so that early matches are the
# first entries in the hash buckets.
start = (<const_data>PyString_AS_STRING(source))
@@ -719,7 +742,7 @@
ptr += RABIN_WINDOW
match_pre = ptr - start
match_tail = end - ptr
- global_offset += RABIN_WINDOW
+ global_offset += stride
if (last != NULL and val == last.rabin_val):
# We have a repeated entry. We only store the first of
# consecutive entries, so we just skip this one
@@ -728,6 +751,7 @@
last = self._hash_map.add(ptr, global_offset,
val, match_pre, match_tail,
search)
+ ptr += stride - RABIN_WINDOW
def _compute_offsets_from_delta(self, delta_source, start_offset):
cdef unsigned int global_offset
@@ -753,6 +777,8 @@
# TODO: Test that empty strings don't screw things up
# Nothing to add
return
+ # This is an upper bound of the number of entries we'll find
+ self._hash_map.reserve(len(delta_source) / RABIN_WINDOW)
start = (<const_data>PyString_AS_STRING(delta_source))
end = start + size
ptr = start
@@ -821,7 +847,6 @@
start_offset = self.total_source_bytes + extra_offset
new_info = SourceInfo(source, start_offset)
self.sources.append(new_info)
- self._hash_map.reserve(len(source) / RABIN_WINDOW)
self._compute_offsets(source, start_offset)
self.total_source_bytes += len(source) + extra_offset
global add_total_time
@@ -836,8 +861,6 @@
start_offset = self.total_source_bytes + extra_offset
new_info = SourceInfo(delta_source, start_offset)
self.sources.append(new_info)
- # This is an upper bound of the number of entries we'll find
- self._hash_map.reserve(len(delta_source) / RABIN_WINDOW)
self._compute_offsets_from_delta(delta_source, start_offset)
self.total_source_bytes += len(delta_source) + extra_offset
global add_delta_total_time
@@ -848,11 +871,11 @@
cdef _DeltaCreator creator
if not self.sources:
return None
- t = _timer()
+ # t = _timer()
creator = _DeltaCreator(self, content, max_delta_size, noisy)
val = creator.create()
- global make_delta_total_time
- make_delta_total_time += _timer() - t
+ # global make_delta_total_time
+ # make_delta_total_time += _timer() - t
return val
More information about the bazaar-commits
mailing list