Rev 2485: Add an initial function to read knit indexes in pyrex. in http://bzr.arbash-meinel.com/branches/bzr/0.17-dev/knit_index_pyrex

Wed May 9 15:40:58 BST 2007

At http://bzr.arbash-meinel.com/branches/bzr/0.17-dev/knit_index_pyrex

------------------------------------------------------------
revno: 2485
revision-id: john at arbash-meinel.com-20070509144042-9uyruriejyp98751
parent: pqm at pqm.ubuntu.com-20070508203256-wcxwdphd1y2psezh
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: knit_index_pyrex
timestamp: Wed 2007-05-09 09:40:42 -0500
message:
  Add an initial function to read knit indexes in pyrex.
  Update the LowLevelKnitIndex tests so that they can be run
  against both the python and pyrex implementations.
added:
  bzrlib/knit_c.pyx              knit_c.pyx-20070509143944-u42gy8w387a10m0j-1
modified:
  .bzrignore                     bzrignore-20050311232317-81f7b71efa2db11a
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/tests/test_knit.py      test_knit.py-20051212171302-95d4c00dd5f11f2b
  setup.py                       setup.py-20050314065409-02f8a0a6e3f9bc70
-------------- next part --------------
=== added file 'bzrlib/knit_c.pyx'

--- a/bzrlib/knit_c.pyx	1970-01-01 00:00:00 +0000
+++ b/bzrlib/knit_c.pyx	2007-05-09 14:40:42 +0000
@@ -0,0 +1,71 @@
+# Copyright (C) 2007 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Pyrex extensions to knit parsing."""
+
+
+def _load_data_c(kndx, fp):
+    """Load the knit index file into memory."""
+    cache = kndx._cache
+    history = kndx._history
+
+    kndx.check_header(fp)
+    # readlines reads the whole file at once:
+    # bad for transports like http, good for local disk
+    # we save 60 ms doing this one change (
+    # from calling readline each time to calling
+    # readlines once.
+    # probably what we want for nice behaviour on
+    # http is a incremental readlines that yields, or
+    # a check for local vs non local indexes,
+    history_top = len(history) - 1
+    for line in fp.readlines():
+        rec = line.split()
+        if len(rec) < 5 or rec[-1] != ':':
+            # corrupt line.
+            # FIXME: in the future we should determine if its a
+            # short write - and ignore it 
+            # or a different failure, and raise. RBC 20060407
+            continue
+
+        parents = []
+        for value in rec[4:-1]:
+            if value[0] == '.':
+                # uncompressed reference
+                parent_id = value[1:]
+            else:
+                parent_id = history[int(value)]
+            parents.append(parent_id)
+
+        version_id, options, pos, size = rec[:4]
+        version_id = version_id
+
+        # See kndx._cache_version
+        # only want the _history index to reference the 1st 
+        # index entry for version_id
+        if version_id not in cache:
+            history_top = history_top + 1
+            index = history_top
+            history.append(version_id)
+        else:
+            index = cache[version_id][5]
+        cache[version_id] = (version_id,
+                             options.split(','),
+                             int(pos),
+                             int(size),
+                             parents,
+                             index)
+        # end kndx._cache_version 

=== modified file '.bzrignore'
--- a/.bzrignore	2007-04-12 20:27:42 +0000
+++ b/.bzrignore	2007-05-09 14:40:42 +0000
@@ -34,3 +34,4 @@
 ./html_docs
 ./pretty_docs
 ./api
+bzrlib/knit_c.c

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2007-03-09 22:17:39 +0000
+++ b/bzrlib/knit.py	2007-05-09 14:40:42 +0000
@@ -1,4 +1,4 @@
-# Copyright (C) 2005, 2006 Canonical Ltd
+# Copyright (C) 2005, 2006, 2007 Canonical Ltd
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -1150,7 +1150,7 @@
             try:
                 # _load_data may raise NoSuchFile if the target knit is
                 # completely empty.
-                self._load_data(fp)
+                _load_data(self, fp)
             finally:
                 fp.close()
         except NoSuchFile:
@@ -1162,58 +1162,6 @@
                 self._transport.put_bytes_non_atomic(
                     self._filename, self.HEADER, mode=self._file_mode)
 
-    def _load_data(self, fp):
-        cache = self._cache
-        history = self._history
-
-        self.check_header(fp)
-        # readlines reads the whole file at once:
-        # bad for transports like http, good for local disk
-        # we save 60 ms doing this one change (
-        # from calling readline each time to calling
-        # readlines once.
-        # probably what we want for nice behaviour on
-        # http is a incremental readlines that yields, or
-        # a check for local vs non local indexes,
-        history_top = len(history) - 1
-        for line in fp.readlines():
-            rec = line.split()
-            if len(rec) < 5 or rec[-1] != ':':
-                # corrupt line.
-                # FIXME: in the future we should determine if its a
-                # short write - and ignore it 
-                # or a different failure, and raise. RBC 20060407
-                continue
-
-            parents = []
-            for value in rec[4:-1]:
-                if value[0] == '.':
-                    # uncompressed reference
-                    parent_id = value[1:]
-                else:
-                    parent_id = history[int(value)]
-                parents.append(parent_id)
-
-            version_id, options, pos, size = rec[:4]
-            version_id = version_id
-
-            # See self._cache_version
-            # only want the _history index to reference the 1st 
-            # index entry for version_id
-            if version_id not in cache:
-                history_top += 1
-                index = history_top
-                history.append(version_id)
-            else:
-                index = cache[version_id][5]
-            cache[version_id] = (version_id,
-                                 options.split(','),
-                                 int(pos),
-                                 int(size),
-                                 parents,
-                                 index)
-            # end self._cache_version 
-
     def get_graph(self):
         return [(vid, idx[4]) for vid, idx in self._cache.iteritems()]
 
@@ -1947,3 +1895,66 @@
             bestsize = bestsize + 1
 
         return besti, bestj, bestsize
+
+
+def _load_data_py(knit, fp):
+    cache = knit._cache
+    history = knit._history
+
+    knit.check_header(fp)
+    # readlines reads the whole file at once:
+    # bad for transports like http, good for local disk
+    # we save 60 ms doing this one change (
+    # from calling readline each time to calling
+    # readlines once.
+    # probably what we want for nice behaviour on
+    # http is a incremental readlines that yields, or
+    # a check for local vs non local indexes,
+    history_top = len(history) - 1
+    for line in fp.readlines():
+        rec = line.split()
+        if len(rec) < 5 or rec[-1] != ':':
+            # corrupt line.
+            # FIXME: in the future we should determine if its a
+            # short write - and ignore it 
+            # or a different failure, and raise. RBC 20060407
+            continue
+
+        parents = []
+        for value in rec[4:-1]:
+            if value[0] == '.':
+                # uncompressed reference
+                parent_id = value[1:]
+            else:
+                parent_id = history[int(value)]
+            parents.append(parent_id)
+
+        version_id, options, pos, size = rec[:4]
+        version_id = version_id
+
+        # See knit._cache_version
+        # only want the _history index to reference the 1st 
+        # index entry for version_id
+        if version_id not in cache:
+            history_top += 1
+            index = history_top
+            history.append(version_id)
+        else:
+            index = cache[version_id][5]
+        cache[version_id] = (version_id,
+                             options.split(','),
+                             int(pos),
+                             int(size),
+                             parents,
+                             index)
+        # end knit._cache_version 
+
+_load_data = _load_data_py
+
+
+try:
+    from bzrlib.knit_c import _load_data_c
+except ImportError:
+    pass
+else:
+    _load_data = _load_data_c

=== modified file 'bzrlib/tests/test_knit.py'
--- a/bzrlib/tests/test_knit.py	2007-03-09 22:14:55 +0000
+++ b/bzrlib/tests/test_knit.py	2007-05-09 14:40:42 +0000
@@ -23,6 +23,7 @@
 
 from bzrlib import (
     errors,
+    knit,
     )
 from bzrlib.errors import (
     RevisionAlreadyPresent,
@@ -40,12 +41,27 @@
     WeaveToKnit,
     )
 from bzrlib.osutils import split_lines
-from bzrlib.tests import TestCase, TestCaseWithTransport
+from bzrlib.tests import TestCase, TestCaseWithTransport, Feature
 from bzrlib.transport import TransportLogger, get_transport
 from bzrlib.transport.memory import MemoryTransport
 from bzrlib.weave import Weave
 
 
+class _CompiledKnitFeature(Feature):
+
+    def _probe(self):
+        try:
+            import bzrlib.knit_c
+        except ImportError:
+            return False
+        return True
+
+    def feature_name(self):
+        return 'bzrlib.knit_c'
+
+CompiledKnitFeature = _CompiledKnitFeature()
+
+
 class KnitContentTests(TestCase):
 
     def test_constructor(self):
@@ -240,17 +256,26 @@
 
 class LowLevelKnitIndexTests(TestCase):
 
+    def get_knit_index(self, *args, **kwargs):
+        orig = knit._load_data
+        def reset():
+            knit._load_data = orig
+        self.addCleanup(reset)
+        knit._load_data = knit._load_data_py
+        return _KnitIndex(*args, **kwargs)
+
     def test_no_such_file(self):
         transport = MockTransport()
 
-        self.assertRaises(NoSuchFile, _KnitIndex, transport, "filename", "r")
-        self.assertRaises(NoSuchFile, _KnitIndex, transport,
-            "filename", "w", create=False)
+        self.assertRaises(NoSuchFile, self.get_knit_index,
+                          transport, "filename", "r")
+        self.assertRaises(NoSuchFile, self.get_knit_index,
+                          transport, "filename", "w", create=False)
 
     def test_create_file(self):
         transport = MockTransport()
 
-        index = _KnitIndex(transport, "filename", "w",
+        index = self.get_knit_index(transport, "filename", "w",
             file_mode="wb", create=True)
         self.assertEqual(
                 ("put_bytes_non_atomic",
@@ -260,7 +285,7 @@
     def test_delay_create_file(self):
         transport = MockTransport()
 
-        index = _KnitIndex(transport, "filename", "w",
+        index = self.get_knit_index(transport, "filename", "w",
             create=True, file_mode="wb", create_parent_dir=True,
             delay_create=True, dir_mode=0777)
         self.assertEqual([], transport.calls)
@@ -285,7 +310,7 @@
             _KnitIndex.HEADER,
             '%s option 0 1 :' % (utf8_revision_id,)
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
         # _KnitIndex is a private class, and deals in utf8 revision_ids, not
         # Unicode revision_ids.
         self.assertTrue(index.has_version(utf8_revision_id))
@@ -298,7 +323,7 @@
             _KnitIndex.HEADER,
             "version option 0 1 .%s :" % (utf8_revision_id,)
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
         self.assertEqual([utf8_revision_id],
             index.get_parents_with_ghosts("version"))
 
@@ -309,14 +334,14 @@
             "corrupted options 0 1 .b .c ",
             "version options 0 1 :"
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
         self.assertEqual(1, index.num_versions())
         self.assertTrue(index.has_version("version"))
 
     def test_read_corrupted_header(self):
         transport = MockTransport(['not a bzr knit index header\n'])
         self.assertRaises(KnitHeaderError,
-            _KnitIndex, transport, "filename", "r")
+            self.get_knit_index, transport, "filename", "r")
 
     def test_read_duplicate_entries(self):
         transport = MockTransport([
@@ -326,7 +351,7 @@
             "version options2 1 2 .other :",
             "version options3 3 4 0 .other :"
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
         self.assertEqual(2, index.num_versions())
         self.assertEqual(1, index.lookup("version"))
         self.assertEqual((3, 4), index.get_position("version"))
@@ -341,7 +366,7 @@
             "b option 0 1 0 :",
             "c option 0 1 1 0 :",
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
         self.assertEqual(["a"], index.get_parents("b"))
         self.assertEqual(["b", "a"], index.get_parents("c"))
 
@@ -351,7 +376,7 @@
         transport = MockTransport([
             _KnitIndex.HEADER
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
         index.add_version(utf8_revision_id, ["option"], 0, 1, [])
         self.assertEqual(("append_bytes", ("filename",
             "\n%s option 0 1  :" % (utf8_revision_id,)),
@@ -364,7 +389,7 @@
         transport = MockTransport([
             _KnitIndex.HEADER
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
         index.add_version("version", ["option"], 0, 1, [utf8_revision_id])
         self.assertEqual(("append_bytes", ("filename",
             "\nversion option 0 1 .%s :" % (utf8_revision_id,)),
@@ -373,7 +398,7 @@
 
     def test_get_graph(self):
         transport = MockTransport()
-        index = _KnitIndex(transport, "filename", "w", create=True)
+        index = self.get_knit_index(transport, "filename", "w", create=True)
         self.assertEqual([], index.get_graph())
 
         index.add_version("a", ["option"], 0, 1, ["b"])
@@ -391,7 +416,7 @@
             "c option 0 1 1 0 :",
             "d option 0 1 2 .f :"
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
 
         self.assertEqual([], index.get_ancestry([]))
         self.assertEqual(["a"], index.get_ancestry(["a"]))
@@ -411,7 +436,7 @@
             "c option 0 1 0 .f .g :",
             "d option 0 1 2 .h .j .k :"
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
 
         self.assertEqual([], index.get_ancestry_with_ghosts([]))
         self.assertEqual(["a"], index.get_ancestry_with_ghosts(["a"]))
@@ -436,7 +461,7 @@
         transport = MockTransport([
             _KnitIndex.HEADER
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
 
         self.assertEqual(0, index.num_versions())
         self.assertEqual(0, len(index))
@@ -457,7 +482,7 @@
         transport = MockTransport([
             _KnitIndex.HEADER
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
 
         self.assertEqual([], index.get_versions())
 
@@ -476,7 +501,7 @@
             "a option 0 1 :",
             "b option 0 1 :"
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
 
         self.assertEqual("a", index.idx_to_name(0))
         self.assertEqual("b", index.idx_to_name(1))
@@ -489,7 +514,7 @@
             "a option 0 1 :",
             "b option 0 1 :"
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
 
         self.assertEqual(0, index.lookup("a"))
         self.assertEqual(1, index.lookup("b"))
@@ -498,7 +523,7 @@
         transport = MockTransport([
             _KnitIndex.HEADER
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
 
         index.add_version("a", ["option"], 0, 1, ["b"])
         self.assertEqual(("append_bytes",
@@ -534,7 +559,7 @@
         transport = MockTransport([
             _KnitIndex.HEADER
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
 
         index.add_versions([
             ("a", ["option"], 0, 1, ["b"]),
@@ -559,7 +584,7 @@
     def test_delay_create_and_add_versions(self):
         transport = MockTransport()
 
-        index = _KnitIndex(transport, "filename", "w",
+        index = self.get_knit_index(transport, "filename", "w",
             create=True, file_mode="wb", create_parent_dir=True,
             delay_create=True, dir_mode=0777)
         self.assertEqual([], transport.calls)
@@ -587,7 +612,7 @@
             _KnitIndex.HEADER,
             "a option 0 1 :"
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
 
         self.assertTrue(index.has_version("a"))
         self.assertFalse(index.has_version("b"))
@@ -598,7 +623,7 @@
             "a option 0 1 :",
             "b option 1 2 :"
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
 
         self.assertEqual((0, 1), index.get_position("a"))
         self.assertEqual((1, 2), index.get_position("b"))
@@ -610,7 +635,7 @@
             "b unknown,line-delta 1 2 :",
             "c bad 3 4 :"
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
 
         self.assertEqual("fulltext", index.get_method("a"))
         self.assertEqual("line-delta", index.get_method("b"))
@@ -622,7 +647,7 @@
             "a opt1 0 1 :",
             "b opt2,opt3 1 2 :"
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
 
         self.assertEqual(["opt1"], index.get_options("a"))
         self.assertEqual(["opt2", "opt3"], index.get_options("b"))
@@ -634,7 +659,7 @@
             "b option 1 2 0 .c :",
             "c option 1 2 1 0 .e :"
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
 
         self.assertEqual([], index.get_parents("a"))
         self.assertEqual(["a", "c"], index.get_parents("b"))
@@ -647,7 +672,7 @@
             "b option 1 2 0 .c :",
             "c option 1 2 1 0 .e :"
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
 
         self.assertEqual([], index.get_parents_with_ghosts("a"))
         self.assertEqual(["a", "c"], index.get_parents_with_ghosts("b"))
@@ -660,7 +685,7 @@
             "a option 0 1 :",
             "b option 0 1 :"
             ])
-        index = _KnitIndex(transport, "filename", "r")
+        index = self.get_knit_index(transport, "filename", "r")
 
         check = index.check_versions_present
 
@@ -672,6 +697,20 @@
         self.assertRaises(RevisionNotPresent, check, ["a", "b", "c"])
 
 
+class LowLevelKnitIndexTests_c(LowLevelKnitIndexTests):
+
+    _test_needs_features = [CompiledKnitFeature]
+
+    def get_knit_index(self, *args, **kwargs):
+        orig = knit._load_data
+        def reset():
+            knit._load_data = orig
+        self.addCleanup(reset)
+        knit._load_data = knit._load_data_c
+        return _KnitIndex(*args, **kwargs)
+
+
+
 class KnitTests(TestCaseWithTransport):
     """Class containing knit test helper routines."""
 

=== modified file 'setup.py'
--- a/setup.py	2007-03-11 23:34:27 +0000
+++ b/setup.py	2007-05-09 14:40:42 +0000
@@ -164,6 +164,8 @@
     from distutils.extension import Extension
     #ext_modules.append(
     #    Extension("bzrlib.modulename", ["bzrlib/foo.pyx"], libraries = []))
+    ext_modules.append(
+        Extension("bzrlib.knit_c", ["bzrlib/knit_c.pyx"]))
 command_classes['build_ext'] = build_ext
 
 if 'bdist_wininst' in sys.argv: