Rev 3823: Bring in the check_remap page cache utilization. in http://bzr.arbash-meinel.com/branches/bzr/brisbane/hack

John Arbash Meinel john at arbash-meinel.com
Wed Dec 24 17:00:30 GMT 2008


At http://bzr.arbash-meinel.com/branches/bzr/brisbane/hack

------------------------------------------------------------
revno: 3823
revision-id: john at arbash-meinel.com-20081224170009-z175uhn3iq9h29su
parent: john at arbash-meinel.com-20081223235152-2kvb00nzjp076ls1
parent: john at arbash-meinel.com-20081224165308-mdwuyh9kxtmijy65
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: hack
timestamp: Wed 2008-12-24 11:00:09 -0600
message:
  Bring in the check_remap page cache utilization.
modified:
  bzrlib/chk_map.py              chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
    ------------------------------------------------------------
    revno: 3802.2.4
    revision-id: john at arbash-meinel.com-20081224165308-mdwuyh9kxtmijy65
    parent: john at arbash-meinel.com-20081209231116-6lm0mt17pxieb18x
    committer: John Arbash Meinel <john at arbash-meinel.com>
    branch nick: remap
    timestamp: Wed 2008-12-24 10:53:08 -0600
    message:
      Remove pdb statement.
    modified:
      bzrlib/chk_map.py              chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
    ------------------------------------------------------------
    revno: 3802.2.3
    revision-id: john at arbash-meinel.com-20081209231116-6lm0mt17pxieb18x
    parent: john at arbash-meinel.com-20081209063920-y7dofjycpl6m946l
    committer: John Arbash Meinel <john at arbash-meinel.com>
    branch nick: xml_cache
    timestamp: Tue 2008-12-09 17:11:16 -0600
    message:
      Properly remove keys that are found in the page cache. And add some debugging.
    modified:
      bzrlib/chk_map.py              chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
    ------------------------------------------------------------
    revno: 3802.2.2
    revision-id: john at arbash-meinel.com-20081209063920-y7dofjycpl6m946l
    parent: john at arbash-meinel.com-20081209061005-gz20bp1fke585zll
    committer: John Arbash Meinel <john at arbash-meinel.com>
    branch nick: xml_cache
    timestamp: Tue 2008-12-09 00:39:20 -0600
    message:
      Finish using the page cache as part of _check_remap, add debugging functions
      to give a count of what happens with _check_remap()
    modified:
      bzrlib/chk_map.py              chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
    ------------------------------------------------------------
    revno: 3802.2.1
    revision-id: john at arbash-meinel.com-20081209061005-gz20bp1fke585zll
    parent: john at arbash-meinel.com-20081219230732-ri1i1tujtrh2d3sl
    committer: John Arbash Meinel <john at arbash-meinel.com>
    branch nick: xml_cache
    timestamp: Tue 2008-12-09 00:10:05 -0600
    message:
      Use the page cache as part of _check_remap()
    modified:
      bzrlib/chk_map.py              chk_map.py-20081001014447-ue6kkuhofvdecvxa-1
-------------- next part --------------
=== modified file 'bzrlib/chk_map.py'
--- a/bzrlib/chk_map.py	2008-12-23 23:51:52 +0000
+++ b/bzrlib/chk_map.py	2008-12-24 17:00:09 +0000
@@ -45,12 +45,15 @@
 lazy_import.lazy_import(globals(), """
 from bzrlib import errors, knit, trace, versionedfile
 """)
-from bzrlib import osutils
-from bzrlib.lru_cache import LRUCache
+from bzrlib import (
+    lru_cache,
+    osutils,
+    )
 
 # approx 2MB
-_PAGE_CACHE_SIZE = 2*1024*1024 / 4*1024
-_page_cache = LRUCache(_PAGE_CACHE_SIZE)
+_PAGE_CACHE_SIZE = 2*1024*1024
+# We are caching bytes so len(value) is perfectly accurate
+_page_cache = lru_cache.LRUSizeCache(_PAGE_CACHE_SIZE)
 
 
 class CHKMap(object):
@@ -548,7 +551,8 @@
             value = value[:-1]
             items[tuple(elements[:-1])] = value
         if len(items) != length:
-            raise AssertionError("item count mismatch")
+            raise AssertionError("item count (%d) mismatch for key %s,"
+                " bytes %r" % (length, key, bytes))
         result._items = items
         result._len = length
         # assert length == len(lines) - 5
@@ -1165,12 +1169,40 @@
             else:
                 if isinstance(node, InternalNode):
                     # Without looking at any leaf nodes, we are sure
+                    def child_is_internal_node(): pass
+                    child_is_internal_node()
                     return self
                 for key, value in node._items.iteritems():
                     if new_leaf._map_no_split(key, value):
                         # Adding this key would cause a split, so we know we
                         # don't need to collapse
-                        return self
+                        def child_causes_split(): pass
+                        child_causes_split()
+                        return self
+        if keys:
+            # Look in the page cache for some more bytes
+            found_keys = set()
+            for chk, prefix in keys.iteritems():
+                try:
+                    bytes = _page_cache[chk]
+                except KeyError:
+                    continue
+                else:
+                    found_keys.add(chk)
+                    node = _deserialise(bytes, chk)
+                    self._items[prefix] = node
+                    if isinstance(node, InternalNode):
+                        # We have done enough to know that we can stop
+                        def page_is_internal(): pass
+                        page_is_internal()
+                        return self
+                    for key, value in node._items.iteritems():
+                        if new_leaf._map_no_split(key, value):
+                            def page_causes_split(): pass
+                            page_causes_split()
+                            return self
+            for chk in found_keys:
+                del keys[chk]
         # So far, everything fits. Page in the rest of the nodes, and see if it
         # holds true.
         if keys:
@@ -1189,19 +1221,27 @@
             # don't need to continue. We requested the bytes, we may as well
             # use them
             for record in stream:
-                node = _deserialise(record.get_bytes_as('fulltext'), record.key)
+                bytes = record.get_bytes_as('fulltext')
+                node = _deserialise(bytes, record.key)
                 self._items[keys[record.key]] = node
+                _page_cache[record.key] = bytes
                 nodes.append(node)
             for node in nodes:
                 if isinstance(node, InternalNode):
                     # We know we won't fit
+                    def stream_is_internal(): pass
+                    stream_is_internal()
                     return self
                 for key, value in node._items.iteritems():
                     if new_leaf._map_no_split(key, value):
+                        def stream_causes_split(): pass
+                        stream_causes_split()
                         return self
 
         # We have gone to every child, and everything fits in a single leaf
         # node, we no longer need this internal node
+        def check_remap_collapsed(): pass
+        check_remap_collapsed()
         return new_leaf
 
 



More information about the bazaar-commits mailing list