Rev 2531: Move code around to refactor according to our pyrex extension design. in http://bzr.arbash-meinel.com/branches/bzr/0.17-dev/dirstate_pyrex

John Arbash Meinel john at arbash-meinel.com
Tue Jul 10 15:54:11 BST 2007


At http://bzr.arbash-meinel.com/branches/bzr/0.17-dev/dirstate_pyrex

------------------------------------------------------------
revno: 2531
revision-id: john at arbash-meinel.com-20070710145123-jv3wcj10qdvkgmt8
parent: john at arbash-meinel.com-20070509152850-spj91ozbgzpgxmw7
committer: John Arbash Meinel <john at arbash-meinel.com>
branch nick: dirstate_pyrex
timestamp: Tue 2007-07-10 09:51:23 -0500
message:
  Move code around to refactor according to our pyrex extension design.
  This creates a _dirstate_helpers_py.py next to _dirstate_helpers_c.pyx
  Rather than having a 'bzrlib.compiled.*' directory.
removed:
  bzrlib/compiled/               bzrlibcompiled-20070503195935-dmd5sxtc930u4p37-1
  bzrlib/compiled/__init__.py    __init__.py-20070503201057-u425eni465q4idwn-1
  bzrlib/tests/compiled/         bzrlibtestscompiled-20070504021933-pn9n1zh8fkzcr2z1-1
  bzrlib/tests/compiled/__init__.py __init__.py-20070504035751-jsbn00xodv0y1eve-1
added:
  bzrlib/_dirstate_helpers_py.py _dirstate_helpers_py-20070710145033-90nz6cqglsk150jy-1
renamed:
  bzrlib/compiled/dirstate_helpers.pyx => bzrlib/_dirstate_helpers_c.pyx dirstate_helpers.pyx-20070503201057-u425eni465q4idwn-3
  bzrlib/tests/compiled/test_dirstate_helpers.py => bzrlib/tests/test__dirstate_helpers.py test_dirstate_helper-20070504035751-jsbn00xodv0y1eve-2
modified:
  .bzrignore                     bzrignore-20050311232317-81f7b71efa2db11a
  bzrlib/dirstate.py             dirstate.py-20060728012006-d6mvoihjb3je9peu-1
  bzrlib/tests/__init__.py       selftest.py-20050531073622-8d0e3c8845c97a64
  bzrlib/tests/test_dirstate.py  test_dirstate.py-20060728012006-d6mvoihjb3je9peu-2
  setup.py                       setup.py-20050314065409-02f8a0a6e3f9bc70
  bzrlib/tests/test__dirstate_helpers.py test_dirstate_helper-20070504035751-jsbn00xodv0y1eve-2
-------------- next part --------------
=== removed directory 'bzrlib/compiled'
=== removed file 'bzrlib/compiled/__init__.py'
--- a/bzrlib/compiled/__init__.py	2007-05-03 20:11:37 +0000
+++ b/bzrlib/compiled/__init__.py	1970-01-01 00:00:00 +0000
@@ -1,22 +0,0 @@
-# Copyright (C) 2007 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Container directory for compiled bzr extensions.
-
-Code which is implemented in raw C or pyrex should be placed in this directory,
-to make it clear what code is compiled.
-"""
-

=== removed directory 'bzrlib/tests/compiled'
=== removed file 'bzrlib/tests/compiled/__init__.py'
--- a/bzrlib/tests/compiled/__init__.py	2007-05-04 03:58:29 +0000
+++ b/bzrlib/tests/compiled/__init__.py	1970-01-01 00:00:00 +0000
@@ -1,45 +0,0 @@
-# Copyright (C) 2007 Canonical Ltd
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Tests for the compiled extensions."""
-
-from bzrlib import tests
-
-
-# TODO: jam 20070503 This seems like a good feature to have, but right now it
-#       seems like we need to test individually compiled modules
-# class _CompiledFeature(tests.Feature):
-#     def _probe(self):
-#         try:
-#             import bzrlib.compiled.???
-#         except ImportError:
-#             return False
-#         return True
-#
-#     def feature_name(self):
-#         return 'bzrlib.compiled.???'
-#
-# CompiledFeature =_CompiledFeature()
-
-
-def test_suite():
-    testmod_names = [
-        'bzrlib.tests.compiled.test_dirstate_helpers',
-    ]
-
-    loader = tests.TestLoader()
-    suite = loader.loadTestsFromModuleNames(testmod_names)
-    return suite

=== added file 'bzrlib/_dirstate_helpers_py.py'
--- a/bzrlib/_dirstate_helpers_py.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/_dirstate_helpers_py.py	2007-07-10 14:51:23 +0000
@@ -0,0 +1,171 @@
+# Copyright (C) 2007 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+"""Python implementations of Dirstate Helper functions."""
+
+
+def bisect_dirblock_py(dirblocks, dirname, lo=0, hi=None, cache={}):
+    """Return the index where to insert dirname into the dirblocks.
+
+    The return value idx is such that all directories blocks in dirblock[:idx]
+    have names < dirname, and all blocks in dirblock[idx:] have names >=
+    dirname.
+
+    Optional args lo (default 0) and hi (default len(dirblocks)) bound the
+    slice of a to be searched.
+    """
+    if hi is None:
+        hi = len(dirblocks)
+    try:
+        dirname_split = cache[dirname]
+    except KeyError:
+        dirname_split = dirname.split('/')
+        cache[dirname] = dirname_split
+    while lo < hi:
+        mid = (lo+hi)//2
+        # Grab the dirname for the current dirblock
+        cur = dirblocks[mid][0]
+        try:
+            cur_split = cache[cur]
+        except KeyError:
+            cur_split = cur.split('/')
+            cache[cur] = cur_split
+        if cur_split < dirname_split: lo = mid+1
+        else: hi = mid
+    return lo
+
+
+def _read_dirblocks_py(state):
+    """Read in the dirblocks for the given DirState object.
+
+    This is tightly bound to the DirState internal representation. It should be
+    thought of as a member function, which is only separated out so that we can
+    re-write it in pyrex.
+
+    :param state: A DirState object.
+    :return: None
+    """
+    state._state_file.seek(state._end_of_header)
+    text = state._state_file.read()
+    # TODO: check the crc checksums. crc_measured = zlib.crc32(text)
+
+    fields = text.split('\0')
+    # Remove the last blank entry
+    trailing = fields.pop()
+    assert trailing == ''
+    # consider turning fields into a tuple.
+
+    # skip the first field which is the trailing null from the header.
+    cur = 1
+    # Each line now has an extra '\n' field which is not used
+    # so we just skip over it
+    # entry size:
+    #  3 fields for the key
+    #  + number of fields per tree_data (5) * tree count
+    #  + newline
+    num_present_parents = state._num_present_parents()
+    tree_count = 1 + num_present_parents
+    entry_size = state._fields_per_entry()
+    expected_field_count = entry_size * state._num_entries
+    field_count = len(fields)
+    # this checks our adjustment, and also catches file too short.
+    assert field_count - cur == expected_field_count, \
+        'field count incorrect %s != %s, entry_size=%s, '\
+        'num_entries=%s fields=%r' % (
+            field_count - cur, expected_field_count, entry_size,
+            state._num_entries, fields)
+
+    if num_present_parents == 1:
+        # Bind external functions to local names
+        _int = int
+        # We access all fields in order, so we can just iterate over
+        # them. Grab an straight iterator over the fields. (We use an
+        # iterator because we don't want to do a lot of additions, nor
+        # do we want to do a lot of slicing)
+        next = iter(fields).next
+        # Move the iterator to the current position
+        for x in xrange(cur):
+            next()
+        # The two blocks here are deliberate: the root block and the
+        # contents-of-root block.
+        state._dirblocks = [('', []), ('', [])]
+        current_block = state._dirblocks[0][1]
+        current_dirname = ''
+        append_entry = current_block.append
+        for count in xrange(state._num_entries):
+            dirname = next()
+            name = next()
+            file_id = next()
+            if dirname != current_dirname:
+                # new block - different dirname
+                current_block = []
+                current_dirname = dirname
+                state._dirblocks.append((current_dirname, current_block))
+                append_entry = current_block.append
+            # we know current_dirname == dirname, so re-use it to avoid
+            # creating new strings
+            entry = ((current_dirname, name, file_id),
+                     [(# Current Tree
+                         next(),                # minikind
+                         next(),                # fingerprint
+                         _int(next()),          # size
+                         next() == 'y',         # executable
+                         next(),                # packed_stat or revision_id
+                     ),
+                     ( # Parent 1
+                         next(),                # minikind
+                         next(),                # fingerprint
+                         _int(next()),          # size
+                         next() == 'y',         # executable
+                         next(),                # packed_stat or revision_id
+                     ),
+                     ])
+            trailing = next()
+            assert trailing == '\n'
+            # append the entry to the current block
+            append_entry(entry)
+        state._split_root_dirblock_into_contents()
+    else:
+        fields_to_entry = state._get_fields_to_entry()
+        entries = [fields_to_entry(fields[pos:pos+entry_size])
+                   for pos in xrange(cur, field_count, entry_size)]
+        state._entries_to_current_state(entries)
+    # To convert from format 2  => format 3
+    # state._dirblocks = sorted(state._dirblocks,
+    #                          key=lambda blk:blk[0].split('/'))
+    # To convert from format 3 => format 2
+    # state._dirblocks = sorted(state._dirblocks)
+    state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
+
+
+def cmp_by_dirs_py(path1, path2):
+    """Compare two paths directory by directory.
+
+    This is equivalent to doing::
+
+       cmp(path1.split('/'), path2.split('/'))
+
+    The idea is that you should compare path components separately. This
+    differs from plain ``cmp(path1, path2)`` for paths like ``'a-b'`` and
+    ``a/b``. "a-b" comes after "a" but would come before "a/b" lexically.
+
+    :param path1: first path
+    :param path2: second path
+    :return: positive number if ``path1`` comes first,
+        0 if paths are equal,
+        and negative number if ``path2`` sorts first
+    """
+    return cmp(path1.split('/'), path2.split('/'))

=== renamed file 'bzrlib/compiled/dirstate_helpers.pyx' => 'bzrlib/_dirstate_helpers_c.pyx'
=== renamed file 'bzrlib/tests/compiled/test_dirstate_helpers.py' => 'bzrlib/tests/test__dirstate_helpers.py'
--- a/bzrlib/tests/compiled/test_dirstate_helpers.py	2007-05-07 20:38:16 +0000
+++ b/bzrlib/tests/test__dirstate_helpers.py	2007-07-10 14:51:23 +0000
@@ -16,38 +16,227 @@
 
 """Tests for the compiled dirstate helpers."""
 
+import bisect
+
 from bzrlib import (
     tests,
     )
-try:
-    from bzrlib.compiled import dirstate_helpers
-except ImportError:
-    have_dirstate_helpers = False
-else:
-    have_dirstate_helpers = True
-from bzrlib.tests import test_dirstate
 
 
 class _CompiledDirstateHelpersFeature(tests.Feature):
     def _probe(self):
-        return have_dirstate_helpers
+        try:
+            import bzrlib._dirstate_helpers_c
+        except ImportError:
+            return False
+        return True
 
     def feature_name(self):
-        return 'bzrlib.compiled.dirstate_helpers'
+        return 'bzrlib._dirstate_helpers_c'
 
 CompiledDirstateHelpersFeature = _CompiledDirstateHelpersFeature()
 
 
-class TestCCmpByDirs(test_dirstate.TestCmpByDirs):
+class TestCmpByDirs(tests.TestCase):
+
+    def get_cmp_by_dirs(self):
+        """Get a specific implementation of cmp_by_dirs."""
+        from bzrlib._dirstate_helpers_py import cmp_by_dirs_py
+        return cmp_by_dirs_py
+
+    def assertPositive(self, val):
+        """Assert that val is greater than 0."""
+        self.assertTrue(val > 0, 'expected a positive value, but got %s' % val)
+
+    def assertNegative(self, val):
+        """Assert that val is less than 0."""
+        self.assertTrue(val < 0, 'expected a negative value, but got %s' % val)
+
+    def assertCmpByDirs(self, expected, str1, str2):
+        """Compare the two strings, in both directions.
+
+        :param expected: The expected comparison value. -1 means str1 comes
+            first, 0 means they are equal, 1 means str2 comes first
+        :param str1: string to compare
+        :param str2: string to compare
+        """
+        cmp_by_dirs = self.get_cmp_by_dirs()
+        if expected == 0:
+            self.assertEqual(str1, str2)
+            self.assertEqual(0, cmp_by_dirs(str1, str2))
+            self.assertEqual(0, cmp_by_dirs(str2, str1))
+        elif expected > 0:
+            self.assertPositive(cmp_by_dirs(str1, str2))
+            self.assertNegative(cmp_by_dirs(str2, str1))
+        else:
+            self.assertNegative(cmp_by_dirs(str1, str2))
+            self.assertPositive(cmp_by_dirs(str2, str1))
+
+    def test_cmp_empty(self):
+        """Compare against the empty string."""
+        self.assertCmpByDirs(0, '', '')
+        self.assertCmpByDirs(1, 'a', '')
+        self.assertCmpByDirs(1, 'ab', '')
+        self.assertCmpByDirs(1, 'abc', '')
+        self.assertCmpByDirs(1, 'abcd', '')
+        self.assertCmpByDirs(1, 'abcde', '')
+        self.assertCmpByDirs(1, 'abcdef', '')
+        self.assertCmpByDirs(1, 'abcdefg', '')
+        self.assertCmpByDirs(1, 'abcdefgh', '')
+        self.assertCmpByDirs(1, 'abcdefghi', '')
+        self.assertCmpByDirs(1, 'test/ing/a/path/', '')
+
+    def test_cmp_same_str(self):
+        """Compare the same string"""
+        self.assertCmpByDirs(0, 'a', 'a')
+        self.assertCmpByDirs(0, 'ab', 'ab')
+        self.assertCmpByDirs(0, 'abc', 'abc')
+        self.assertCmpByDirs(0, 'abcd', 'abcd')
+        self.assertCmpByDirs(0, 'abcde', 'abcde')
+        self.assertCmpByDirs(0, 'abcdef', 'abcdef')
+        self.assertCmpByDirs(0, 'abcdefg', 'abcdefg')
+        self.assertCmpByDirs(0, 'abcdefgh', 'abcdefgh')
+        self.assertCmpByDirs(0, 'abcdefghi', 'abcdefghi')
+        self.assertCmpByDirs(0, 'testing a long string', 'testing a long string')
+        self.assertCmpByDirs(0, 'x'*10000, 'x'*10000)
+        self.assertCmpByDirs(0, 'a/b', 'a/b')
+        self.assertCmpByDirs(0, 'a/b/c', 'a/b/c')
+        self.assertCmpByDirs(0, 'a/b/c/d', 'a/b/c/d')
+        self.assertCmpByDirs(0, 'a/b/c/d/e', 'a/b/c/d/e')
+
+    def test_simple_paths(self):
+        """Compare strings that act like normal string comparison"""
+        self.assertCmpByDirs(-1, 'a', 'b')
+        self.assertCmpByDirs(-1, 'aa', 'ab')
+        self.assertCmpByDirs(-1, 'ab', 'bb')
+        self.assertCmpByDirs(-1, 'aaa', 'aab')
+        self.assertCmpByDirs(-1, 'aab', 'abb')
+        self.assertCmpByDirs(-1, 'abb', 'bbb')
+        self.assertCmpByDirs(-1, 'aaaa', 'aaab')
+        self.assertCmpByDirs(-1, 'aaab', 'aabb')
+        self.assertCmpByDirs(-1, 'aabb', 'abbb')
+        self.assertCmpByDirs(-1, 'abbb', 'bbbb')
+        self.assertCmpByDirs(-1, 'aaaaa', 'aaaab')
+        self.assertCmpByDirs(-1, 'a/a', 'a/b')
+        self.assertCmpByDirs(-1, 'a/b', 'b/b')
+        self.assertCmpByDirs(-1, 'a/a/a', 'a/a/b')
+        self.assertCmpByDirs(-1, 'a/a/b', 'a/b/b')
+        self.assertCmpByDirs(-1, 'a/b/b', 'b/b/b')
+        self.assertCmpByDirs(-1, 'a/a/a/a', 'a/a/a/b')
+        self.assertCmpByDirs(-1, 'a/a/a/b', 'a/a/b/b')
+        self.assertCmpByDirs(-1, 'a/a/b/b', 'a/b/b/b')
+        self.assertCmpByDirs(-1, 'a/b/b/b', 'b/b/b/b')
+        self.assertCmpByDirs(-1, 'a/a/a/a/a', 'a/a/a/a/b')
+
+    def test_tricky_paths(self):
+        self.assertCmpByDirs(1, 'ab/cd/ef', 'ab/cc/ef')
+        self.assertCmpByDirs(1, 'ab/cd/ef', 'ab/c/ef')
+        self.assertCmpByDirs(-1, 'ab/cd/ef', 'ab/cd-ef')
+        self.assertCmpByDirs(-1, 'ab/cd', 'ab/cd-')
+        self.assertCmpByDirs(-1, 'ab/cd', 'ab-cd')
+
+
+class TestCCmpByDirs(TestCmpByDirs):
     """Test the C implementation of cmp_by_dirs"""
 
     _test_needs_features = [CompiledDirstateHelpersFeature]
 
     def get_cmp_by_dirs(self):
-        return dirstate_helpers.cmp_by_dirs_c
-
-
-class TestCompiledBisectDirblock(test_dirstate.TestBisectDirblock):
+        from bzrlib._dirstate_helpers_c import cmp_by_dirs_c
+        return cmp_by_dirs_c
+
+
+class TestBisectDirblock(tests.TestCase):
+    """Test that bisect_dirblock() returns the expected values.
+
+    bisect_dirblock is intended to work like bisect.bisect_left() except it
+    knows it is working on dirblocks and that dirblocks are sorted by ('path',
+    'to', 'foo') chunks rather than by raw 'path/to/foo'.
+    """
+
+    def get_bisect_dirblock(self):
+        """Return an implementation of bisect_dirblock"""
+        from bzrlib._dirstate_helpers_py import bisect_dirblock_py
+        return bisect_dirblock_py
+
+    def assertBisect(self, dirblocks, split_dirblocks, path, *args, **kwargs):
+        """Assert that bisect_split works like bisect_left on the split paths.
+
+        :param dirblocks: A list of (path, [info]) pairs.
+        :param split_dirblocks: A list of ((split, path), [info]) pairs.
+        :param path: The path we are indexing.
+
+        All other arguments will be passed along.
+        """
+        bisect_dirblock = self.get_bisect_dirblock()
+        self.assertIsInstance(dirblocks, list)
+        bisect_split_idx = bisect_dirblock(dirblocks, path, *args, **kwargs)
+        split_dirblock = (path.split('/'), [])
+        bisect_left_idx = bisect.bisect_left(split_dirblocks, split_dirblock,
+                                             *args)
+        self.assertEqual(bisect_left_idx, bisect_split_idx,
+                         'bisect_split disagreed. %s != %s'
+                         ' for key %s'
+                         % (bisect_left_idx, bisect_split_idx, path)
+                         )
+
+    def paths_to_dirblocks(self, paths):
+        """Convert a list of paths into dirblock form.
+
+        Also, ensure that the paths are in proper sorted order.
+        """
+        dirblocks = [(path, []) for path in paths]
+        split_dirblocks = [(path.split('/'), []) for path in paths]
+        self.assertEqual(sorted(split_dirblocks), split_dirblocks)
+        return dirblocks, split_dirblocks
+
+    def test_simple(self):
+        """In the simple case it works just like bisect_left"""
+        paths = ['', 'a', 'b', 'c', 'd']
+        dirblocks, split_dirblocks = self.paths_to_dirblocks(paths)
+        for path in paths:
+            self.assertBisect(dirblocks, split_dirblocks, path)
+        self.assertBisect(dirblocks, split_dirblocks, '_')
+        self.assertBisect(dirblocks, split_dirblocks, 'aa')
+        self.assertBisect(dirblocks, split_dirblocks, 'bb')
+        self.assertBisect(dirblocks, split_dirblocks, 'cc')
+        self.assertBisect(dirblocks, split_dirblocks, 'dd')
+        self.assertBisect(dirblocks, split_dirblocks, 'a/a')
+        self.assertBisect(dirblocks, split_dirblocks, 'b/b')
+        self.assertBisect(dirblocks, split_dirblocks, 'c/c')
+        self.assertBisect(dirblocks, split_dirblocks, 'd/d')
+
+    def test_involved(self):
+        """This is where bisect_left diverges slightly."""
+        paths = ['', 'a',
+                 'a/a', 'a/a/a', 'a/a/z', 'a/a-a', 'a/a-z',
+                 'a/z', 'a/z/a', 'a/z/z', 'a/z-a', 'a/z-z',
+                 'a-a', 'a-z',
+                 'z', 'z/a/a', 'z/a/z', 'z/a-a', 'z/a-z',
+                 'z/z', 'z/z/a', 'z/z/z', 'z/z-a', 'z/z-z',
+                 'z-a', 'z-z',
+                ]
+        dirblocks, split_dirblocks = self.paths_to_dirblocks(paths)
+        for path in paths:
+            self.assertBisect(dirblocks, split_dirblocks, path)
+
+    def test_involved_cached(self):
+        """This is where bisect_left diverges slightly."""
+        paths = ['', 'a',
+                 'a/a', 'a/a/a', 'a/a/z', 'a/a-a', 'a/a-z',
+                 'a/z', 'a/z/a', 'a/z/z', 'a/z-a', 'a/z-z',
+                 'a-a', 'a-z',
+                 'z', 'z/a/a', 'z/a/z', 'z/a-a', 'z/a-z',
+                 'z/z', 'z/z/a', 'z/z/z', 'z/z-a', 'z/z-z',
+                 'z-a', 'z-z',
+                ]
+        cache = {}
+        dirblocks, split_dirblocks = self.paths_to_dirblocks(paths)
+        for path in paths:
+            self.assertBisect(dirblocks, split_dirblocks, path, cache=cache)
+
+
+class TestCompiledBisectDirblock(TestBisectDirblock):
     """Test that bisect_dirblock() returns the expected values.
 
     bisect_dirblock is intended to work like bisect.bisect_left() except it
@@ -61,4 +250,5 @@
     _test_needs_features = [CompiledDirstateHelpersFeature]
 
     def get_bisect_dirblock(self):
-        return dirstate_helpers.bisect_dirblock_c
+        from bzrlib._dirstate_helpers_c import bisect_dirblock_c
+        return bisect_dirblock_c

=== modified file '.bzrignore'
--- a/.bzrignore	2007-05-03 23:33:14 +0000
+++ b/.bzrignore	2007-07-10 14:51:23 +0000
@@ -34,4 +34,4 @@
 ./html_docs
 ./pretty_docs
 ./api
-bzrlib/compiled/*.c
+bzrlib/_dirstate_helpers_c.c

=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py	2007-05-07 20:38:16 +0000
+++ b/bzrlib/dirstate.py	2007-07-10 14:51:23 +0000
@@ -200,8 +200,8 @@
 """
 
 
+import bisect
 import binascii
-import bisect
 import errno
 import os
 from stat import S_IEXEC
@@ -2248,42 +2248,6 @@
             raise errors.ObjectNotLocked(self)
 
 
-def bisect_dirblock_py(dirblocks, dirname, lo=0, hi=None, cache={}):
-    """Return the index where to insert dirname into the dirblocks.
-
-    The return value idx is such that all directories blocks in dirblock[:idx]
-    have names < dirname, and all blocks in dirblock[idx:] have names >=
-    dirname.
-
-    Optional args lo (default 0) and hi (default len(dirblocks)) bound the
-    slice of a to be searched.
-    """
-    if hi is None:
-        hi = len(dirblocks)
-    try:
-        dirname_split = cache[dirname]
-    except KeyError:
-        dirname_split = dirname.split('/')
-        cache[dirname] = dirname_split
-    while lo < hi:
-        mid = (lo+hi)//2
-        # Grab the dirname for the current dirblock
-        cur = dirblocks[mid][0]
-        try:
-            cur_split = cache[cur]
-        except KeyError:
-            cur_split = cur.split('/')
-            cache[cur] = cur_split
-        if cur_split < dirname_split: lo = mid+1
-        else: hi = mid
-    return lo
-
-# This is the function that will be used
-# But it may be overridden by the compiled version
-bisect_dirblock = bisect_dirblock_py
-
-
-
 def pack_stat(st, _encode=binascii.b2a_base64, _pack=struct.pack):
     """Convert stat values into a packed representation."""
     # jam 20060614 it isn't really worth removing more entries if we
@@ -2298,146 +2262,16 @@
         , st.st_dev, st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1]
 
 
-
-def _read_dirblocks_py(state):
-    """Read in the dirblocks for the given DirState object.
-
-    This is tightly bound to the DirState internal representation. It should be
-    thought of as a member function, which is only separated out so that we can
-    re-write it in pyrex.
-
-    :param state: A DirState object.
-    :return: None
-    """
-    state._state_file.seek(state._end_of_header)
-    text = state._state_file.read()
-    # TODO: check the crc checksums. crc_measured = zlib.crc32(text)
-
-    fields = text.split('\0')
-    # Remove the last blank entry
-    trailing = fields.pop()
-    assert trailing == ''
-    # consider turning fields into a tuple.
-
-    # skip the first field which is the trailing null from the header.
-    cur = 1
-    # Each line now has an extra '\n' field which is not used
-    # so we just skip over it
-    # entry size:
-    #  3 fields for the key
-    #  + number of fields per tree_data (5) * tree count
-    #  + newline
-    num_present_parents = state._num_present_parents()
-    tree_count = 1 + num_present_parents
-    entry_size = state._fields_per_entry()
-    expected_field_count = entry_size * state._num_entries
-    field_count = len(fields)
-    # this checks our adjustment, and also catches file too short.
-    assert field_count - cur == expected_field_count, \
-        'field count incorrect %s != %s, entry_size=%s, '\
-        'num_entries=%s fields=%r' % (
-            field_count - cur, expected_field_count, entry_size,
-            state._num_entries, fields)
-
-    if num_present_parents == 1:
-        # Bind external functions to local names
-        _int = int
-        # We access all fields in order, so we can just iterate over
-        # them. Grab an straight iterator over the fields. (We use an
-        # iterator because we don't want to do a lot of additions, nor
-        # do we want to do a lot of slicing)
-        next = iter(fields).next
-        # Move the iterator to the current position
-        for x in xrange(cur):
-            next()
-        # The two blocks here are deliberate: the root block and the
-        # contents-of-root block.
-        state._dirblocks = [('', []), ('', [])]
-        current_block = state._dirblocks[0][1]
-        current_dirname = ''
-        append_entry = current_block.append
-        for count in xrange(state._num_entries):
-            dirname = next()
-            name = next()
-            file_id = next()
-            if dirname != current_dirname:
-                # new block - different dirname
-                current_block = []
-                current_dirname = dirname
-                state._dirblocks.append((current_dirname, current_block))
-                append_entry = current_block.append
-            # we know current_dirname == dirname, so re-use it to avoid
-            # creating new strings
-            entry = ((current_dirname, name, file_id),
-                     [(# Current Tree
-                         next(),                # minikind
-                         next(),                # fingerprint
-                         _int(next()),          # size
-                         next() == 'y',         # executable
-                         next(),                # packed_stat or revision_id
-                     ),
-                     ( # Parent 1
-                         next(),                # minikind
-                         next(),                # fingerprint
-                         _int(next()),          # size
-                         next() == 'y',         # executable
-                         next(),                # packed_stat or revision_id
-                     ),
-                     ])
-            trailing = next()
-            assert trailing == '\n'
-            # append the entry to the current block
-            append_entry(entry)
-        state._split_root_dirblock_into_contents()
-    else:
-        fields_to_entry = state._get_fields_to_entry()
-        entries = [fields_to_entry(fields[pos:pos+entry_size])
-                   for pos in xrange(cur, field_count, entry_size)]
-        state._entries_to_current_state(entries)
-    # To convert from format 2  => format 3
-    # state._dirblocks = sorted(state._dirblocks,
-    #                          key=lambda blk:blk[0].split('/'))
-    # To convert from format 3 => format 2
-    # state._dirblocks = sorted(state._dirblocks)
-    state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
-
-_read_dirblocks = _read_dirblocks_py
-
-
-def cmp_by_dirs_py(path1, path2):
-    """Compare two paths directory by directory.
-
-    This is equivalent to doing::
-
-       cmp(path1.split('/'), path2.split('/'))
-
-    The idea is that you should compare path components separately. This
-    differs from plain ``cmp(path1, path2)`` for paths like ``'a-b'`` and
-    ``a/b``. "a-b" comes after "a" but would come before "a/b" lexically.
-
-    :param path1: first path
-    :param path2: second path
-    :return: positive number if ``path1`` comes first,
-        0 if paths are equal,
-        and negative number if ``path2`` sorts first
-    """
-    return cmp(path1.split('/'), path2.split('/'))
-
-cmp_by_dirs = cmp_by_dirs_py
-
-
 # Try to load the compiled form if possible
-# TODO: jam 20070503 We should have a way to run tests with and without the
-#       compiled extensions.
 try:
-    from bzrlib.compiled.dirstate_helpers import (
-        _read_dirblocks_c,
-        bisect_dirblock_c,
-        cmp_by_dirs_c,
+    from bzrlib._dirstate_helpers_c import (
+        _read_dirblocks_c as _read_dirblocks,
+        bisect_dirblock_c as bisect_dirblock,
+        cmp_by_dirs_c as cmp_by_dirs,
         )
 except ImportError:
-    pass
-else:
-    _read_dirblocks = _read_dirblocks_c
-    bisect_dirblock = bisect_dirblock_c
-    cmp_by_dirs = cmp_by_dirs_c
+    from bzrlib._dirstate_helpers_py import (
+        _read_dirblocks_py as _read_dirblocks,
+        bisect_dirblock_py as bisect_dirblock,
+        cmp_by_dirs_py as cmp_by_dirs,
+        )

=== modified file 'bzrlib/tests/__init__.py'
--- a/bzrlib/tests/__init__.py	2007-05-07 18:24:49 +0000
+++ b/bzrlib/tests/__init__.py	2007-07-10 14:51:23 +0000
@@ -121,7 +121,6 @@
     import bzrlib.tests.blackbox
     import bzrlib.tests.branch_implementations
     import bzrlib.tests.bzrdir_implementations
-    import bzrlib.tests.compiled
     import bzrlib.tests.interrepository_implementations
     import bzrlib.tests.interversionedfile_implementations
     import bzrlib.tests.intertree_implementations
@@ -135,7 +134,6 @@
             bzrlib.tests.blackbox,
             bzrlib.tests.branch_implementations,
             bzrlib.tests.bzrdir_implementations,
-            bzrlib.tests.compiled,
             bzrlib.tests.interrepository_implementations,
             bzrlib.tests.interversionedfile_implementations,
             bzrlib.tests.intertree_implementations,
@@ -2264,6 +2262,7 @@
     suite on a global basis, but it is not encouraged.
     """
     testmod_names = [
+                   'bzrlib.tests.test__dirstate_helpers',
                    'bzrlib.tests.test_ancestry',
                    'bzrlib.tests.test_annotate',
                    'bzrlib.tests.test_api',

=== modified file 'bzrlib/tests/test_dirstate.py'
--- a/bzrlib/tests/test_dirstate.py	2007-05-07 20:38:16 +0000
+++ b/bzrlib/tests/test_dirstate.py	2007-07-10 14:51:23 +0000
@@ -1968,95 +1968,6 @@
                                    state, ['b'])
 
 
-class TestBisectDirblock(TestCase):
-    """Test that bisect_dirblock() returns the expected values.
-
-    bisect_dirblock is intended to work like bisect.bisect_left() except it
-    knows it is working on dirblocks and that dirblocks are sorted by ('path',
-    'to', 'foo') chunks rather than by raw 'path/to/foo'.
-    """
-
-    def get_bisect_dirblock(self):
-        """Return an implementation of bisect_dirblock"""
-        return dirstate.bisect_dirblock_py
-
-    def assertBisect(self, dirblocks, split_dirblocks, path, *args, **kwargs):
-        """Assert that bisect_split works like bisect_left on the split paths.
-
-        :param dirblocks: A list of (path, [info]) pairs.
-        :param split_dirblocks: A list of ((split, path), [info]) pairs.
-        :param path: The path we are indexing.
-
-        All other arguments will be passed along.
-        """
-        bisect_dirblock = self.get_bisect_dirblock()
-        self.assertIsInstance(dirblocks, list)
-        bisect_split_idx = bisect_dirblock(dirblocks, path, *args, **kwargs)
-        split_dirblock = (path.split('/'), [])
-        bisect_left_idx = bisect.bisect_left(split_dirblocks, split_dirblock,
-                                             *args)
-        self.assertEqual(bisect_left_idx, bisect_split_idx,
-                         'bisect_split disagreed. %s != %s'
-                         ' for key %s'
-                         % (bisect_left_idx, bisect_split_idx, path)
-                         )
-
-    def paths_to_dirblocks(self, paths):
-        """Convert a list of paths into dirblock form.
-
-        Also, ensure that the paths are in proper sorted order.
-        """
-        dirblocks = [(path, []) for path in paths]
-        split_dirblocks = [(path.split('/'), []) for path in paths]
-        self.assertEqual(sorted(split_dirblocks), split_dirblocks)
-        return dirblocks, split_dirblocks
-
-    def test_simple(self):
-        """In the simple case it works just like bisect_left"""
-        paths = ['', 'a', 'b', 'c', 'd']
-        dirblocks, split_dirblocks = self.paths_to_dirblocks(paths)
-        for path in paths:
-            self.assertBisect(dirblocks, split_dirblocks, path)
-        self.assertBisect(dirblocks, split_dirblocks, '_')
-        self.assertBisect(dirblocks, split_dirblocks, 'aa')
-        self.assertBisect(dirblocks, split_dirblocks, 'bb')
-        self.assertBisect(dirblocks, split_dirblocks, 'cc')
-        self.assertBisect(dirblocks, split_dirblocks, 'dd')
-        self.assertBisect(dirblocks, split_dirblocks, 'a/a')
-        self.assertBisect(dirblocks, split_dirblocks, 'b/b')
-        self.assertBisect(dirblocks, split_dirblocks, 'c/c')
-        self.assertBisect(dirblocks, split_dirblocks, 'd/d')
-
-    def test_involved(self):
-        """This is where bisect_left diverges slightly."""
-        paths = ['', 'a',
-                 'a/a', 'a/a/a', 'a/a/z', 'a/a-a', 'a/a-z',
-                 'a/z', 'a/z/a', 'a/z/z', 'a/z-a', 'a/z-z',
-                 'a-a', 'a-z',
-                 'z', 'z/a/a', 'z/a/z', 'z/a-a', 'z/a-z',
-                 'z/z', 'z/z/a', 'z/z/z', 'z/z-a', 'z/z-z',
-                 'z-a', 'z-z',
-                ]
-        dirblocks, split_dirblocks = self.paths_to_dirblocks(paths)
-        for path in paths:
-            self.assertBisect(dirblocks, split_dirblocks, path)
-
-    def test_involved_cached(self):
-        """This is where bisect_left diverges slightly."""
-        paths = ['', 'a',
-                 'a/a', 'a/a/a', 'a/a/z', 'a/a-a', 'a/a-z',
-                 'a/z', 'a/z/a', 'a/z/z', 'a/z-a', 'a/z-z',
-                 'a-a', 'a-z',
-                 'z', 'z/a/a', 'z/a/z', 'z/a-a', 'z/a-z',
-                 'z/z', 'z/z/a', 'z/z/z', 'z/z-a', 'z/z-z',
-                 'z-a', 'z-z',
-                ]
-        cache = {}
-        dirblocks, split_dirblocks = self.paths_to_dirblocks(paths)
-        for path in paths:
-            self.assertBisect(dirblocks, split_dirblocks, path, cache=cache)
-
-
 class TestDirstateValidation(TestCaseWithDirState):
 
     def test_validate_correct_dirstate(self):
@@ -2114,101 +2025,3 @@
             'file a-id is absent in row')
 
 
-class TestCmpByDirs(TestCase):
-
-    def get_cmp_by_dirs(self):
-        """Get a specific implementation of cmp_by_dirs."""
-        return dirstate.cmp_by_dirs_py
-
-    def assertPositive(self, val):
-        """Assert that val is greater than 0."""
-        self.assertTrue(val > 0, 'expected a positive value, but got %s' % val)
-
-    def assertNegative(self, val):
-        """Assert that val is less than 0."""
-        self.assertTrue(val < 0, 'expected a negative value, but got %s' % val)
-
-    def assertCmpByDirs(self, expected, str1, str2):
-        """Compare the two strings, in both directions.
-
-        :param expected: The expected comparison value. -1 means str1 comes
-            first, 0 means they are equal, 1 means str2 comes first
-        :param str1: string to compare
-        :param str2: string to compare
-        """
-        cmp_by_dirs = self.get_cmp_by_dirs()
-        if expected == 0:
-            self.assertEqual(str1, str2)
-            self.assertEqual(0, cmp_by_dirs(str1, str2))
-            self.assertEqual(0, cmp_by_dirs(str2, str1))
-        elif expected > 0:
-            self.assertPositive(cmp_by_dirs(str1, str2))
-            self.assertNegative(cmp_by_dirs(str2, str1))
-        else:
-            self.assertNegative(cmp_by_dirs(str1, str2))
-            self.assertPositive(cmp_by_dirs(str2, str1))
-
-    def test_cmp_empty(self):
-        """Compare against the empty string."""
-        self.assertCmpByDirs(0, '', '')
-        self.assertCmpByDirs(1, 'a', '')
-        self.assertCmpByDirs(1, 'ab', '')
-        self.assertCmpByDirs(1, 'abc', '')
-        self.assertCmpByDirs(1, 'abcd', '')
-        self.assertCmpByDirs(1, 'abcde', '')
-        self.assertCmpByDirs(1, 'abcdef', '')
-        self.assertCmpByDirs(1, 'abcdefg', '')
-        self.assertCmpByDirs(1, 'abcdefgh', '')
-        self.assertCmpByDirs(1, 'abcdefghi', '')
-        self.assertCmpByDirs(1, 'test/ing/a/path/', '')
-
-    def test_cmp_same_str(self):
-        """Compare the same string"""
-        self.assertCmpByDirs(0, 'a', 'a')
-        self.assertCmpByDirs(0, 'ab', 'ab')
-        self.assertCmpByDirs(0, 'abc', 'abc')
-        self.assertCmpByDirs(0, 'abcd', 'abcd')
-        self.assertCmpByDirs(0, 'abcde', 'abcde')
-        self.assertCmpByDirs(0, 'abcdef', 'abcdef')
-        self.assertCmpByDirs(0, 'abcdefg', 'abcdefg')
-        self.assertCmpByDirs(0, 'abcdefgh', 'abcdefgh')
-        self.assertCmpByDirs(0, 'abcdefghi', 'abcdefghi')
-        self.assertCmpByDirs(0, 'testing a long string', 'testing a long string')
-        self.assertCmpByDirs(0, 'x'*10000, 'x'*10000)
-        self.assertCmpByDirs(0, 'a/b', 'a/b')
-        self.assertCmpByDirs(0, 'a/b/c', 'a/b/c')
-        self.assertCmpByDirs(0, 'a/b/c/d', 'a/b/c/d')
-        self.assertCmpByDirs(0, 'a/b/c/d/e', 'a/b/c/d/e')
-
-    def test_simple_paths(self):
-        """Compare strings that act like normal string comparison"""
-        self.assertCmpByDirs(-1, 'a', 'b')
-        self.assertCmpByDirs(-1, 'aa', 'ab')
-        self.assertCmpByDirs(-1, 'ab', 'bb')
-        self.assertCmpByDirs(-1, 'aaa', 'aab')
-        self.assertCmpByDirs(-1, 'aab', 'abb')
-        self.assertCmpByDirs(-1, 'abb', 'bbb')
-        self.assertCmpByDirs(-1, 'aaaa', 'aaab')
-        self.assertCmpByDirs(-1, 'aaab', 'aabb')
-        self.assertCmpByDirs(-1, 'aabb', 'abbb')
-        self.assertCmpByDirs(-1, 'abbb', 'bbbb')
-        self.assertCmpByDirs(-1, 'aaaaa', 'aaaab')
-        self.assertCmpByDirs(-1, 'a/a', 'a/b')
-        self.assertCmpByDirs(-1, 'a/b', 'b/b')
-        self.assertCmpByDirs(-1, 'a/a/a', 'a/a/b')
-        self.assertCmpByDirs(-1, 'a/a/b', 'a/b/b')
-        self.assertCmpByDirs(-1, 'a/b/b', 'b/b/b')
-        self.assertCmpByDirs(-1, 'a/a/a/a', 'a/a/a/b')
-        self.assertCmpByDirs(-1, 'a/a/a/b', 'a/a/b/b')
-        self.assertCmpByDirs(-1, 'a/a/b/b', 'a/b/b/b')
-        self.assertCmpByDirs(-1, 'a/b/b/b', 'b/b/b/b')
-        self.assertCmpByDirs(-1, 'a/a/a/a/a', 'a/a/a/a/b')
-
-    def test_tricky_paths(self):
-        self.assertCmpByDirs(1, 'ab/cd/ef', 'ab/cc/ef')
-        self.assertCmpByDirs(1, 'ab/cd/ef', 'ab/c/ef')
-        self.assertCmpByDirs(-1, 'ab/cd/ef', 'ab/cd-ef')
-        self.assertCmpByDirs(-1, 'ab/cd', 'ab/cd-')
-        self.assertCmpByDirs(-1, 'ab/cd', 'ab-cd')
-
-

=== modified file 'setup.py'
--- a/setup.py	2007-05-03 20:11:37 +0000
+++ b/setup.py	2007-07-10 14:51:23 +0000
@@ -159,16 +159,16 @@
     from distutils.command.build_ext import build_ext
     from distutils.extension import Extension
     ext_modules.extend([
-        Extension("bzrlib.compiled.dirstate_helpers",
-                  ["bzrlib/compiled/dirstate_helpers.c"],
+        Extension("bzrlib._dirstate_helpers_c",
+                  ["bzrlib/_dirstate_helpers_c.c"],
                   libraries=[],
                   ),
     ])
 else:
     from distutils.extension import Extension
     ext_modules.extend([
-        Extension("bzrlib.compiled.dirstate_helpers",
-                  ["bzrlib/compiled/dirstate_helpers.pyx"],
+        Extension("bzrlib._dirstate_helpers_c",
+                  ["bzrlib/_dirstate_helpers_c.pyx"],
                   libraries=[],
                   ),
     ])



More information about the bazaar-commits mailing list