Rev 5076: Steal bzr-search's FileView implementation in http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack

John Arbash Meinel john at arbash-meinel.com
Thu Mar 4 21:18:31 GMT 2010


At http://bzr.arbash-meinel.com/branches/bzr/lp/2.2.0b2-contained-pack

------------------------------------------------------------
revno: 5076
revision-id: john at arbash-meinel.com-20100304211802-fxoa6vtwg03vov8w
parent: john at arbash-meinel.com-20100304203435-csu20otvt3mm1g3i
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: 2.2.0b2-contained-pack
timestamp: Thu 2010-03-04 15:18:02 -0600
message:
  Steal bzr-search's FileView implementation
-------------- next part --------------
=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py	2010-03-04 20:34:35 +0000
+++ b/bzrlib/tests/__init__.py	2010-03-04 21:18:02 +0000
@@ -3745,6 +3745,7 @@
         'bzrlib.tests.test_transactions',
         'bzrlib.tests.test_transform',
         'bzrlib.tests.test_transport',
+        'bzrlib.tests.test_transport_file_view',
         'bzrlib.tests.test_transport_log',
         'bzrlib.tests.test_tree',
         'bzrlib.tests.test_treebuilder',

=== added file 'bzrlib/tests/test_transport_file_view.py'
--- a/bzrlib/tests/test_transport_file_view.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/tests/test_transport_file_view.py	2010-03-04 21:18:02 +0000
@@ -0,0 +1,56 @@
+# Copyright (C) 2010 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+# Taken from bzr-search w/ permission from Robert Collins
+
+"""Tests for the FileView transport wrapper."""
+
+from bzrlib import tests
+from bzrlib.transport import file_view
+
+
+class TestFileView(tests.TestCaseWithTransport):
+    
+    def get_bulk_and_view_data(self):
+        """Get sample data for a view on a file."""
+        bulk_data = []
+        for count in range(4096):
+            bulk_data.append(str(count))
+        bulk_data = ":".join(bulk_data)
+        view_data = bulk_data[400:1600]
+        file_map = {"Foo.1": (400, 1600)}
+        base_transport = self.get_transport(".")
+        base_transport.put_bytes("foo.pack", bulk_data)
+        return bulk_data, view_data, file_map
+
+    def test_get(self):
+        bulk_data, view_data, file_map = self.get_bulk_and_view_data()
+        base_transport = self.get_transport(".")
+        view = file_view.FileView(base_transport, "foo.pack", file_map)
+        # Doing a get() returns a file which only contains the view_data.
+        visible_bytes = view.get("Foo.1").read()
+        self.assertEqual(visible_bytes, view_data)
+
+    def test_readv(self):
+        bulk_data, view_data, file_map = self.get_bulk_and_view_data()
+        base_transport = self.get_transport(".")
+        view = file_view.FileView(base_transport, "foo.pack", file_map)
+        # Doing a readv for '' on view is trimmed to the data between 400 and
+        # 1600.
+        for offset, data in view.readv('Foo.1', [(0, 10), (700, 100)], True,
+            800):
+            matching_data = view_data[offset:offset + len(data)]
+            self.assertEqual(matching_data, data)

=== added file 'bzrlib/transport/file_view.py'
--- a/bzrlib/transport/file_view.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/transport/file_view.py	2010-03-04 21:18:02 +0000
@@ -0,0 +1,92 @@
+# Copyright (C) 2010 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+# Taken from bzr-search w/ permission from Robert Collins
+
+"""Transport facilities to support the index engine.
+
+The primary class here is FileView, an adapter for exposing a number of files 
+in a pack (with identity encoding only!) such that they can be accessed via
+readv.
+"""
+
+from cStringIO import StringIO
+
+
+class FileView(object):
+    """An adapter from a pack file to multiple smaller readvable files.
+
+    A typical use for this is to embed GraphIndex objects in a pack and then
+    use this to allow the GraphIndex logic to readv while actually reading
+    from the pack.
+
+    Currently only the get and readv methods are supported, all the rest of the
+    transport interface will raise AttributeError - this is deliberate to catch
+    unexpected uses.
+    """
+
+    def __init__(self, backing_transport, backing_file, file_map):
+        """Create a FileView.
+
+        :param backing_transport: The transport the pack file is located on.
+        :param backing_file: The url fragment name of the pack file.
+        :param file_map: A dict from file url fragments, to byte ranges in
+            the pack file. Pack file header and trailer overhead should not
+            be included in these ranges.
+        """
+        self._backing_transport = backing_transport
+        self._backing_file = backing_file
+        self._file_map = file_map
+
+    def get(self, relpath):
+        """See Transport.get."""
+        start, stop = self._file_map[relpath]
+        length = stop - start
+        _, bytes = self._backing_transport.readv(self._backing_file,
+            [(start, length)]).next()
+        return StringIO(bytes)
+
+    def readv(self, relpath, offsets, adjust_for_latency=False,
+        upper_limit=None):
+        """See Transport.readv.
+
+        This adapter will clip results back to the range defined by the
+        file_map.
+        """
+        base, upper_limit = self._file_map[relpath]
+        # adjust offsets
+        new_offsets = []
+        for offset, length in offsets:
+            new_offsets.append((offset + base, length))
+        for offset, data in self._backing_transport.readv(self._backing_file,
+            new_offsets, adjust_for_latency=adjust_for_latency,
+            upper_limit=upper_limit):
+            if offset + len(data) > upper_limit:
+                upper_trim = len(data) + offset - upper_limit
+            else:
+                upper_trim = None
+            if offset < base:
+                lower_trim = base - offset
+                offset = base
+            else:
+                lower_trim = 0
+            data = data[lower_trim:upper_trim]
+            offset = offset - base
+            yield offset, data
+
+    def recommended_page_size(self):
+        """See Transport.recommended_page_size."""
+        return self._backing_transport.recommended_page_size()



More information about the bazaar-commits mailing list