Rev 2901: Bisection improvements after integrating with packs. in http://people.ubuntu.com/~robertc/baz2.0/index

Robert Collins robertc at robertcollins.net
Mon Oct 8 03:00:41 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/index

------------------------------------------------------------
revno: 2901
revision-id: robertc at robertcollins.net-20071008020031-7k73clatevakdpsb
parent: robertc at robertcollins.net-20071007233729-305al11yzo3ebxd1
committer: Robert Collins <robertc at robertcollins.net>
branch nick: index
timestamp: Mon 2007-10-08 12:00:31 +1000
message:
  Bisection improvements after integrating with packs.
modified:
  bzrlib/index.py                index.py-20070712131115-lolkarso50vjr64s-1
=== modified file 'bzrlib/index.py'
--- a/bzrlib/index.py	2007-10-07 23:37:29 +0000
+++ b/bzrlib/index.py	2007-10-08 02:00:31 +0000
@@ -389,6 +389,20 @@
             node_refs.append(tuple([self._keys_by_offset[ref][0] for ref in ref_list]))
         return tuple(node_refs)
 
+    def _find_index(self, range_map, key):
+        """Helper for the _parsed_*_index calls.
+
+        Given a range map - [(start, end), ...], finds the index of the range
+        in the map for key if it is in the map, and if it is not there, the
+        immediately preceeding range in the map.
+        """
+        result = bisect_right(range_map, key) - 1
+        if result + 1 < len(range_map):
+            # check the border condition, it may be in result + 1
+            if range_map[result + 1][0] == key[0]:
+                return result + 1
+        return result
+
     def _parsed_byte_index(self, offset):
         """Return the index of the entry immediately before offset.
 
@@ -400,7 +414,7 @@
         asking for 12 will return 1
         """
         key = (offset, 0)
-        return bisect_right(self._parsed_byte_map, key) - 1
+        return self._find_index(self._parsed_byte_map, key)
 
     def _parsed_key_index(self, key):
         """Return the index of the entry immediately before key.
@@ -413,8 +427,8 @@
         asking for 'b' will return 1
         asking for 'e' will return 1
         """
-        search_key = (key, 0)
-        return bisect_right(self._parsed_key_map, search_key) - 1
+        search_key = (key, None)
+        return self._find_index(self._parsed_key_map, search_key)
 
     def _is_parsed(self, offset):
         """Returns True if offset has been parsed."""
@@ -567,12 +581,22 @@
         readv_ranges = []
         for location, key in location_keys:
             # can we answer from cache?
+            # - if we know the answer - yes
             index = self._parsed_key_index(key)
             if (len(self._parsed_key_map) and 
                 self._parsed_key_map[index][0] <= key and
-                self._parsed_key_map[index][1] > key):
+                (self._parsed_key_map[index][1] > key or
+                 # end of the file has been parsed
+                 self._parsed_byte_map[index][1] == self._size)):
                 # the key has been parsed, so no lookup is needed
                 continue
+            # - if we have examined this part of the file already - yes
+            index = self._parsed_byte_index(location)
+            if (len(self._parsed_byte_map) and 
+                self._parsed_byte_map[index][0] <= location and
+                self._parsed_byte_map[index][1] > location):
+                # the byte region has been parsed, so no read is needed.
+                continue
             length = 800
             if location + length > self._size:
                 length = self._size - location
@@ -774,12 +798,13 @@
             assert trim_end != 0, 'no \n was present'
             # print 'removing end', offset, trim_end, repr(data[trim_end:])
         # adjust offset and data to the parseable data.
-        data = data[trim_start:trim_end]
+        trimmed_data = data[trim_start:trim_end]
+        assert trimmed_data, 'read unneeded data'
         if trim_start:
             offset += trim_start
-        # print "parsing", repr(data)
+        # print "parsing", repr(trimmed_data)
         # splitlines mangles the \r delimiters.. don't use it.
-        lines = data.split('\n')
+        lines = trimmed_data.split('\n')
         del lines[-1]
         pos = offset
         first_key = None
@@ -813,7 +838,7 @@
                 node_value = value
             self._bisect_nodes[key] = node_value
             # print "parsed ", key
-        self._parsed_bytes(offset, first_key, offset + len(data), key)
+        self._parsed_bytes(offset, first_key, offset + len(trimmed_data), key)
 
     def _parsed_bytes(self, start, start_key, end, end_key):
         """Mark the bytes from start to end as parsed.
@@ -824,37 +849,46 @@
         :param start: The start of the parsed region.
         :param end: The end of the parsed region.
         """
-        key = (start, 0)
-        index = bisect_right(self._parsed_byte_map, key)
+        index = self._parsed_byte_index(start)
         new_value = (start, end)
         new_key = (start_key, end_key)
-        if index == 0:
+        if index == -1:
             # first range parsed is always the beginning.
             self._parsed_byte_map.insert(index, new_value)
             self._parsed_key_map.insert(index, new_key)
-        elif index == len(self._parsed_byte_map):
-            # may be adjacent to the prior entry
-            if start == self._parsed_byte_map[index - 1][1]:
-                self._parsed_byte_map[index - 1] = (
-                    self._parsed_byte_map[index - 1][0], end)
-                self._parsed_key_map[index - 1] = (
-                    self._parsed_key_map[index - 1][0], end_key)
-            else:
-                #no, new entry
-                self._parsed_byte_map.insert(index, new_value)
-                self._parsed_key_map.insert(index, new_key)
+            return
+        # four cases:
+        # new region
+        # extend lower region
+        # extend higher region
+        # combine two regions
+        if (index + 1 < len(self._parsed_byte_map) and
+            self._parsed_byte_map[index][1] == start and
+            self._parsed_byte_map[index + 1][0] == end):
+            # combine two regions
+            self._parsed_byte_map[index] = (self._parsed_byte_map[index][0],
+                self._parsed_byte_map[index + 1][1])
+            self._parsed_key_map[index] = (self._parsed_key_map[index][0],
+                self._parsed_key_map[index + 1][1])
+        elif self._parsed_byte_map[index][1] == start:
+            # extend the lower entry
+            self._parsed_byte_map[index] = (
+                self._parsed_byte_map[index][0], end)
+            self._parsed_key_map[index] = (
+                self._parsed_key_map[index][0], end_key)
+        elif (index + 1 < len(self._parsed_byte_map) and
+            self._parsed_byte_map[index + 1][0] == end):
+            # extend the higher entry
+            self._parsed_byte_map[index + 1] = (
+                start, self._parsed_byte_map[index + 1][1])
+            self._parsed_key_map[index + 1] = (
+                start_key, self._parsed_key_map[index + 1][1])
         else:
-            # may be adjacent to the next entry
-            if end == self._parsed_byte_map[index][0]:
-                # move the next entry down to this range
-                self._parsed_byte_map[index] = (
-                    start, self._parsed_byte_map[index][1])
-                self._parsed_key_map[index] = (
-                    start_key, self._parsed_key_map[index][1])
-            else:
-                # no, new entry
-                self._parsed_byte_map.insert(index, new_value)
-                self._parsed_key_map.insert(index, new_key)
+            # new entry
+            self._parsed_byte_map.insert(index + 1, new_value)
+            self._parsed_key_map.insert(index + 1, new_key)
+        assert sorted(self._parsed_byte_map) == self._parsed_byte_map
+        assert sorted(self._parsed_key_map) == self._parsed_key_map
 
     def _read_and_parse(self, readv_ranges):
         """Read the the ranges and parse the resulting data.



More information about the bazaar-commits mailing list