Rev 5535: More instrumentation. in http://bazaar.launchpad.net/~jameinel/bzr/2.3-gcb-peak-mem
John Arbash Meinel
john at arbash-meinel.com
Tue Nov 30 20:37:52 GMT 2010
At http://bazaar.launchpad.net/~jameinel/bzr/2.3-gcb-peak-mem
------------------------------------------------------------
revno: 5535
revision-id: john at arbash-meinel.com-20101130203740-twiaq89vtwrvyq1p
parent: john at arbash-meinel.com-20101123214953-9yhtoz01kxnr5cct
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.3-gcb-peak-mem
timestamp: Tue 2010-11-30 14:37:40 -0600
message:
More instrumentation.
-------------- next part --------------
=== modified file 'bzrlib/_delta_index_pyx.pyx'
--- a/bzrlib/_delta_index_pyx.pyx 2010-11-23 21:49:53 +0000
+++ b/bzrlib/_delta_index_pyx.pyx 2010-11-30 20:37:40 +0000
@@ -57,6 +57,8 @@
unsigned char *top) nogil
+from bzrlib import trace
+
cdef int MAX_HASH_BUCKET_ENTRIES
MAX_HASH_BUCKET_ENTRIES = 64
cdef int MAX_DELTA_OP_SIZE
@@ -114,6 +116,9 @@
# 256 values here
unsigned char match_pre
+ # How many times has this entry been considered a success?
+ unsigned short match_count
+
cdef class RabinHashMap
@@ -150,10 +155,27 @@
self.free_slot = NULL
self._step = 0
+ # Consider, self._next_hash & self._table_mask never see the 'upper'
+ # bits of the rabin_val. Is there a decent way to 'mix that in', to
+ # help keep chains from overlapping too much? (ATM 0x10001 and 0x20001
+ # will overlap for any table size less than 0x10000, at *all* steps) I
+ # don't know how common collisions are, but it may be more-than-normal
+ # because RABIN isn't a super-strong hash.
self._next_hash = rabin_val
cdef rabin_entry* next_with_dummy(self) except? NULL:
cdef rabin_entry *slot
+ # This uses Quadratic Probing:
+ # http://en.wikipedia.org/wiki/Quadratic_probing
+ # with c1 = c2 = 1/2
+ # This leads to probe locations at:
+ # h0 = hash(k1)
+ # h1 = h0 + 1 = h0 + 1
+ # h2 = h0 + 3 = h1 + 2
+ # h3 = h0 + 6 = h2 + 3
+ # h4 = h0 + 10 = h2 + 4
+ # Note that all of these are '& mask', but that is computed *after* the
+ # offset.
while self._step < self._table_mask:
slot = self._table + (self._next_hash & self._table_mask)
self._step += 1
@@ -179,17 +201,6 @@
cdef next(self):
cdef rabin_entry *slot
- # This uses Quadratic Probing:
- # http://en.wikipedia.org/wiki/Quadratic_probing
- # with c1 = c2 = 1/2
- # This leads to probe locations at:
- # h0 = hash(k1)
- # h1 = h0 + 1
- # h2 = h0 + 3 = h1 + 1 + 1
- # h3 = h0 + 6 = h2 + 1 + 2
- # h4 = h0 + 10 = h2 + 1 + 3
- # Note that all of these are '& mask', but that is computed *after* the
- # offset.
# Don't search forever, even if we have bogus entries in the table
while self._step < self._table_mask:
slot = self.next_with_dummy()
@@ -286,6 +297,7 @@
# that point.
cdef rabin_entry *slot, *new_slot, *old_table
cdef unsigned int i, new_offset, old_mask
+ cdef unsigned long total_matches, total_steps, total_items
cdef RabinEntrySearch search
old_table = self._table
@@ -294,10 +306,31 @@
self.table_mask = new_mask
if old_table == NULL:
# Nothing to do
- self._table = new_table
- self.table_mask = new_mask
return
+ # TODO: Consider this as a possible time to re-sort the data. A few
+ # thoughts:
+ # 1) We could walk the old table, and when we find a rabin_val,
+ # we could search for all matches of that val. And then insert
+ # those together into the new table. That saves some searching
+ # on the target table (walking items you just inserted), at a
+ # cost of searching the old table.
+ # 2) However, when searching the old table, you can temp-copy
+ # everything into the temp_entries mini-table and sort them
+ # before inserting into the new table. That way, all lists are
+ # tend towards sorted, even if they don't end up with >64
+ # entries.
+ # 3) You could only sort if you get more than X entries.
+ # 4) If we walk the source in order, then we will preserve the
+ # ordering when inserting into the new table, rather than
+ # randomizing the ordering every time we have to resize.
+ # 5) It may help compact the chains (you might get less
+ # 'alternating'
+ # 6) We still have to 'search' the target table, but we don't
+ # have to walk over items we already walked. With the current
+ # code w/ 64 entries in a bucket, we walk, 1, 2, 3, 4, 5...
+ # 2047 steps.
+ total_steps = total_matches = total_items = 0
try:
search = RabinEntrySearch(self)
for i from 0 <= i <= old_mask:
@@ -305,12 +338,22 @@
if (slot.ptr == NULL or slot.ptr == dummy_ptr):
# An empty slot
continue
+ total_items += 1
search.start(slot.rabin_val)
while search.free_slot == NULL:
+ # Note: It should be fine to use search.next() here,
+ # because there should be no dummy pointers in the new
+ # table. Might want to validate that, though.
search.next()
search.free_slot[0] = slot[0]
+ total_steps += search._step
+ total_matches += search.match_count
finally:
PyMem_Free(old_table)
+ trace.mutter(' took %d steps, %d matches,'
+ ' for %d items'
+ % (total_steps, total_matches,
+ total_items))
def reserve(self, new_entries):
"""Ensure that we have enough open slots for N items.
@@ -340,6 +383,11 @@
table_size = MIN_TABLE_SIZE
while table_size < total_entries * 2 and table_size < 2**31:
table_size = table_size * 2
+ if self._table != NULL:
+ trace.mutter('resizing table %08x from %d to %d'
+ ' for %d new_entries, %d total'
+ % (id(self), self.table_mask + 1, table_size,
+ new_entries, total_entries))
n_bytes = sizeof(rabin_entry) * table_size
new_table = <rabin_entry*>PyMem_Malloc(n_bytes)
if new_table == NULL:
@@ -392,7 +440,11 @@
weight -= 1
# 'pre' bytes is less valuable than 'post' bytes, because of how
# the matching works, so double-weight post bytes.
- weight += (search.entry.match_tail * 2) + search.entry.match_pre
+ # Having hits is very valuable, more so than a byte or two
+ # available
+ weight += ((search.entry.match_count * 10)
+ + (search.entry.match_tail * 2)
+ + search.entry.match_pre)
all_entries.append((weight, temp_idx))
temp_entries[temp_idx] = search.entry[0]
temp_idx += 1
@@ -451,6 +503,7 @@
entry.rabin_val = val
entry.match_pre = match_pre
entry.match_tail = match_tail
+ entry.match_count = 0
entry.ptr = ptr
entry.global_offset = global_offset
self.entry_count += 1
@@ -1023,6 +1076,8 @@
cdef Py_ssize_t match_size
cdef Py_ssize_t local_offset
+ if self.match_entry.match_count < 65534:
+ self.match_entry.match_count += 1
self.match_offset = self.match_entry.global_offset
data = self._expand_match(data)
self._finalize_pending_insert()
More information about the bazaar-commits
mailing list