Rev 2699: Merge knit pack access branch. in http://people.ubuntu.com/~robertc/baz2.0/repository

Robert Collins robertc at robertcollins.net
Fri Aug 3 01:29:14 BST 2007


At http://people.ubuntu.com/~robertc/baz2.0/repository

------------------------------------------------------------
revno: 2699
revision-id: robertc at robertcollins.net-20070803002910-4w0bilw5cgrxvybk
parent: robertc at robertcollins.net-20070803002352-l5kgfk9bxqg32pqb
parent: robertc at robertcollins.net-20070802234357-tms20bm74u7lwzwr
committer: Robert Collins <robertc at robertcollins.net>
branch nick: repository
timestamp: Fri 2007-08-03 10:29:10 +1000
message:
  Merge knit pack access branch.
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
  bzrlib/tests/test_knit.py      test_knit.py-20051212171302-95d4c00dd5f11f2b
    ------------------------------------------------------------
    revno: 2592.1.25.2.7.1.28.1.6.1.3.1.11
    revision-id: robertc at robertcollins.net-20070802234357-tms20bm74u7lwzwr
    parent: robertc at robertcollins.net-20070802230604-pbpbsl4y1wyr1dvm
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: knits
    timestamp: Fri 2007-08-03 09:43:57 +1000
    message:
      * In ``bzrlib.knit`` the internal interface has been altered to use
        3-tuples (index, pos, length) rather than two-tuples (pos, length) to
        describe where data in a knit is, allowing knits to be split into 
        many files. (Robert Collins)
      
      * ``bzrlib.knit._KnitData`` split into cache management and physical access
        with two access classes - ``_PackAccess`` and ``_KnitAccess`` defined.
        The former provides access into a .pack file, and the latter provides the
        current production repository form of .knit files. (Robert Collins)
    modified:
      NEWS                           NEWS-20050323055033-4e00b5db738777ff
      bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
      bzrlib/tests/test_knit.py      test_knit.py-20051212171302-95d4c00dd5f11f2b
    ------------------------------------------------------------
    revno: 2592.1.25.2.7.1.28.1.6.1.3.1.10
    revision-id: robertc at robertcollins.net-20070802230604-pbpbsl4y1wyr1dvm
    parent: pqm at pqm.ubuntu.com-20070802221338-9333q05a8caaciwo
    parent: robertc at robertcollins.net-20070801075314-2maihdqr02hah1t3
    committer: Robert Collins <robertc at robertcollins.net>
    branch nick: knits
    timestamp: Fri 2007-08-03 09:06:04 +1000
    message:
      Pull in pre-requisite index changes.
    modified:
      bzrlib/index.py                index.py-20070712131115-lolkarso50vjr64s-1
      bzrlib/knit.py                 knit.py-20051212171256-f056ac8f0fbe1bd9
      bzrlib/tests/test_index.py     test_index.py-20070712131115-lolkarso50vjr64s-2
      bzrlib/tests/test_knit.py      test_knit.py-20051212171302-95d4c00dd5f11f2b
=== modified file 'NEWS'
--- a/NEWS	2007-08-03 00:23:52 +0000
+++ b/NEWS	2007-08-03 00:29:10 +0000
@@ -195,6 +195,16 @@
     * ``bzrlib.pack.make_readv_reader`` allows readv based access to pack
       files that are stored on a transport. (Robert Collins)
 
+    * In ``bzrlib.knit`` the internal interface has been altered to use
+      3-tuples (index, pos, length) rather than two-tuples (pos, length) to
+      describe where data in a knit is, allowing knits to be split into 
+      many files. (Robert Collins)
+
+    * ``bzrlib.knit._KnitData`` split into cache management and physical access
+      with two access classes - ``_PackAccess`` and ``_KnitAccess`` defined.
+      The former provides access into a .pack file, and the latter provides the
+      current production repository form of .knit files. (Robert Collins)
+
     * New methods on Repository - ``start_write_group``,
       ``commit_write_group``, ``abort_write_group`` and ``is_in_write_group`` -
       which provide a clean hook point for transactional Repositories - ones

=== modified file 'bzrlib/knit.py'
--- a/bzrlib/knit.py	2007-08-02 11:45:09 +0000
+++ b/bzrlib/knit.py	2007-08-03 00:29:10 +0000
@@ -556,7 +556,7 @@
         assert set(current_values[4]).difference(set(new_parents)) == set()
         self._index.add_version(version_id,
                                 current_values[1],
-                                (current_values[2], current_values[3]),
+                                (None, current_values[2], current_values[3]),
                                 new_parents)
 
     def _extract_blocks(self, version_id, source, target):
@@ -1322,7 +1322,14 @@
         return version_id in self._cache
 
     def get_position(self, version_id):
-        """Return data position and size of specified version."""
+        """Return details needed to access the version.
+        
+        .kndx indices do not support split-out data, so return None for the 
+        index field.
+
+        :return: a tuple (None, data position, size) to hand to the access
+            logic to get the record.
+        """
         entry = self._cache[version_id]
         return None, entry[2], entry[3]
 
@@ -1630,11 +1637,7 @@
 
         keys = {}
         for (version_id, options, access_memo, parents) in versions:
-            # index keys are tuples:
-            try:
-                pos, size = access_memo
-            except ValueError:
-                index, pos, size = access_memo
+            index, pos, size = access_memo
             key = (version_id, )
             parents = tuple((parent, ) for parent in parents)
             if 'no-eol' in options:
@@ -1702,10 +1705,9 @@
 
         :param sizes: An iterable containing the size of each raw data segment.
         :param raw_data: A bytestring containing the data.
-        :return: A list of memos to retrieve the record later. For the .knit access
-            method these are readv pairs - offset, length. Note that this is
-            matched to a particular index engine, so can vary between
-            access methods.
+        :return: A list of memos to retrieve the record later. Each memo is a
+            tuple - (index, pos, length), where the index field is always None
+            for the .knit access method.
         """
         assert type(raw_data) == str, \
             'data must be plain bytes was %s' % type(raw_data)
@@ -1749,9 +1751,9 @@
     def get_raw_records(self, memos_for_retrieval):
         """Get the raw bytes for a records.
 
-        :param memos_for_retrieval: An iterable containing the access method
-            specific memo for retriving the bytes. For the .knit method this is
-            a readv tuple.
+        :param memos_for_retrieval: An iterable containing the (index, pos, 
+            length) memo for retrieving the bytes. The .knit method ignores
+            the index as there is always only a single file.
         :return: An iterator over the bytes of the records.
         """
         read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]
@@ -1787,11 +1789,9 @@
 
         :param sizes: An iterable containing the size of each raw data segment.
         :param raw_data: A bytestring containing the data.
-        :return: A list of memos to retrieve the record later. For the pack
-            access method these are the pack offset, lenth pairs with a 
-            pack key of the write index.
-            Note that this is matched to a particular index engine, so can vary
-            between access methods.
+        :return: A list of memos to retrieve the record later. Each memo is a
+            tuple - (index, pos, length), where the index field is the 
+            write_index object supplied to the PackAccess object.
         """
         assert type(raw_data) == str, \
             'data must be plain bytes was %s' % type(raw_data)
@@ -1810,9 +1810,10 @@
     def get_raw_records(self, memos_for_retrieval):
         """Get the raw bytes for a records.
 
-        :param memos_for_retrieval: An iterable containing the access method
-            specific memo for retriving the bytes. For the Pack access method
-            this is a tuple (index, offset, length).
+        :param memos_for_retrieval: An iterable containing the (index, pos, 
+            length) memo for retrieving the bytes. The Pack access method
+            looks up the pack to use for a given record in its index_to_pack
+            map.
         :return: An iterator over the bytes of the records.
         """
         # first pass, group into same-index requests
@@ -1847,7 +1848,11 @@
 
 
 class _KnitData(object):
-    """Manage extraction of data from a KnitAccess, caching and decompressing."""
+    """Manage extraction of data from a KnitAccess, caching and decompressing.
+    
+    The KnitData class provides the logic for parsing and using knit records,
+    making use of an access method for the low level read and write operations.
+    """
 
     def __init__(self, access):
         """Create a KnitData object.

=== modified file 'bzrlib/tests/test_knit.py'
--- a/bzrlib/tests/test_knit.py	2007-08-02 11:45:09 +0000
+++ b/bzrlib/tests/test_knit.py	2007-08-03 00:29:10 +0000
@@ -1904,25 +1904,25 @@
     def test_add_no_callback_errors(self):
         index = self.two_graph_index()
         self.assertRaises(errors.ReadOnlyError, index.add_version,
-            'new', 'fulltext,no-eol', (50, 60), ['separate'])
+            'new', 'fulltext,no-eol', (None, 50, 60), ['separate'])
 
     def test_add_version_smoke(self):
         index = self.two_graph_index(catch_adds=True)
-        index.add_version('new', 'fulltext,no-eol', (50, 60), ['separate'])
+        index.add_version('new', 'fulltext,no-eol', (None, 50, 60), ['separate'])
         self.assertEqual([[(('new', ), 'N50 60', ((('separate',),),))]],
             self.caught_entries)
 
     def test_add_version_delta_not_delta_index(self):
         index = self.two_graph_index(catch_adds=True)
         self.assertRaises(errors.KnitCorrupt, index.add_version,
-            'new', 'no-eol,line-delta', (0, 100), ['parent'])
+            'new', 'no-eol,line-delta', (None, 0, 100), ['parent'])
         self.assertEqual([], self.caught_entries)
 
     def test_add_version_same_dup(self):
         index = self.two_graph_index(catch_adds=True)
         # options can be spelt two different ways
-        index.add_version('tip', 'fulltext,no-eol', (0, 100), ['parent'])
-        index.add_version('tip', 'no-eol,fulltext', (0, 100), ['parent'])
+        index.add_version('tip', 'fulltext,no-eol', (None, 0, 100), ['parent'])
+        index.add_version('tip', 'no-eol,fulltext', (None, 0, 100), ['parent'])
         # but neither should have added data.
         self.assertEqual([[], []], self.caught_entries)
         
@@ -1930,26 +1930,26 @@
         index = self.two_graph_index(deltas=True, catch_adds=True)
         # change options
         self.assertRaises(errors.KnitCorrupt, index.add_version,
-            'tip', 'no-eol,line-delta', (0, 100), ['parent'])
-        self.assertRaises(errors.KnitCorrupt, index.add_version,
-            'tip', 'line-delta,no-eol', (0, 100), ['parent'])
-        self.assertRaises(errors.KnitCorrupt, index.add_version,
-            'tip', 'fulltext', (0, 100), ['parent'])
+            'tip', 'no-eol,line-delta', (None, 0, 100), ['parent'])
+        self.assertRaises(errors.KnitCorrupt, index.add_version,
+            'tip', 'line-delta,no-eol', (None, 0, 100), ['parent'])
+        self.assertRaises(errors.KnitCorrupt, index.add_version,
+            'tip', 'fulltext', (None, 0, 100), ['parent'])
         # position/length
         self.assertRaises(errors.KnitCorrupt, index.add_version,
-            'tip', 'fulltext,no-eol', (50, 100), ['parent'])
+            'tip', 'fulltext,no-eol', (None, 50, 100), ['parent'])
         self.assertRaises(errors.KnitCorrupt, index.add_version,
-            'tip', 'fulltext,no-eol', (0, 1000), ['parent'])
+            'tip', 'fulltext,no-eol', (None, 0, 1000), ['parent'])
         # parents
         self.assertRaises(errors.KnitCorrupt, index.add_version,
-            'tip', 'fulltext,no-eol', (0, 100), [])
+            'tip', 'fulltext,no-eol', (None, 0, 100), [])
         self.assertEqual([], self.caught_entries)
         
     def test_add_versions_nodeltas(self):
         index = self.two_graph_index(catch_adds=True)
         index.add_versions([
-                ('new', 'fulltext,no-eol', (50, 60), ['separate']),
-                ('new2', 'fulltext', (0, 6), ['new']),
+                ('new', 'fulltext,no-eol', (None, 50, 60), ['separate']),
+                ('new2', 'fulltext', (None, 0, 6), ['new']),
                 ])
         self.assertEqual([(('new', ), 'N50 60', ((('separate',),),)),
             (('new2', ), ' 0 6', ((('new',),),))],
@@ -1959,8 +1959,8 @@
     def test_add_versions_deltas(self):
         index = self.two_graph_index(deltas=True, catch_adds=True)
         index.add_versions([
-                ('new', 'fulltext,no-eol', (50, 60), ['separate']),
-                ('new2', 'line-delta', (0, 6), ['new']),
+                ('new', 'fulltext,no-eol', (None, 50, 60), ['separate']),
+                ('new2', 'line-delta', (None, 0, 6), ['new']),
                 ])
         self.assertEqual([(('new', ), 'N50 60', ((('separate',),), ())),
             (('new2', ), ' 0 6', ((('new',),), (('new',),), ))],
@@ -1970,14 +1970,14 @@
     def test_add_versions_delta_not_delta_index(self):
         index = self.two_graph_index(catch_adds=True)
         self.assertRaises(errors.KnitCorrupt, index.add_versions,
-            [('new', 'no-eol,line-delta', (0, 100), ['parent'])])
+            [('new', 'no-eol,line-delta', (None, 0, 100), ['parent'])])
         self.assertEqual([], self.caught_entries)
 
     def test_add_versions_same_dup(self):
         index = self.two_graph_index(catch_adds=True)
         # options can be spelt two different ways
-        index.add_versions([('tip', 'fulltext,no-eol', (0, 100), ['parent'])])
-        index.add_versions([('tip', 'no-eol,fulltext', (0, 100), ['parent'])])
+        index.add_versions([('tip', 'fulltext,no-eol', (None, 0, 100), ['parent'])])
+        index.add_versions([('tip', 'no-eol,fulltext', (None, 0, 100), ['parent'])])
         # but neither should have added data.
         self.assertEqual([[], []], self.caught_entries)
         
@@ -1985,23 +1985,23 @@
         index = self.two_graph_index(deltas=True, catch_adds=True)
         # change options
         self.assertRaises(errors.KnitCorrupt, index.add_versions,
-            [('tip', 'no-eol,line-delta', (0, 100), ['parent'])])
-        self.assertRaises(errors.KnitCorrupt, index.add_versions,
-            [('tip', 'line-delta,no-eol', (0, 100), ['parent'])])
-        self.assertRaises(errors.KnitCorrupt, index.add_versions,
-            [('tip', 'fulltext', (0, 100), ['parent'])])
+            [('tip', 'no-eol,line-delta', (None, 0, 100), ['parent'])])
+        self.assertRaises(errors.KnitCorrupt, index.add_versions,
+            [('tip', 'line-delta,no-eol', (None, 0, 100), ['parent'])])
+        self.assertRaises(errors.KnitCorrupt, index.add_versions,
+            [('tip', 'fulltext', (None, 0, 100), ['parent'])])
         # position/length
         self.assertRaises(errors.KnitCorrupt, index.add_versions,
-            [('tip', 'fulltext,no-eol', (50, 100), ['parent'])])
+            [('tip', 'fulltext,no-eol', (None, 50, 100), ['parent'])])
         self.assertRaises(errors.KnitCorrupt, index.add_versions,
-            [('tip', 'fulltext,no-eol', (0, 1000), ['parent'])])
+            [('tip', 'fulltext,no-eol', (None, 0, 1000), ['parent'])])
         # parents
         self.assertRaises(errors.KnitCorrupt, index.add_versions,
-            [('tip', 'fulltext,no-eol', (0, 100), [])])
+            [('tip', 'fulltext,no-eol', (None, 0, 100), [])])
         # change options in the second record
         self.assertRaises(errors.KnitCorrupt, index.add_versions,
-            [('tip', 'fulltext,no-eol', (0, 100), ['parent']),
-             ('tip', 'no-eol,line-delta', (0, 100), ['parent'])])
+            [('tip', 'fulltext,no-eol', (None, 0, 100), ['parent']),
+             ('tip', 'no-eol,line-delta', (None, 0, 100), ['parent'])])
         self.assertEqual([], self.caught_entries)
 
     def test_iter_parents(self):
@@ -2173,25 +2173,25 @@
     def test_add_no_callback_errors(self):
         index = self.two_graph_index()
         self.assertRaises(errors.ReadOnlyError, index.add_version,
-            'new', 'fulltext,no-eol', (50, 60), ['separate'])
+            'new', 'fulltext,no-eol', (None, 50, 60), ['separate'])
 
     def test_add_version_smoke(self):
         index = self.two_graph_index(catch_adds=True)
-        index.add_version('new', 'fulltext,no-eol', (50, 60), [])
+        index.add_version('new', 'fulltext,no-eol', (None, 50, 60), [])
         self.assertEqual([[(('new', ), 'N50 60')]],
             self.caught_entries)
 
     def test_add_version_delta_not_delta_index(self):
         index = self.two_graph_index(catch_adds=True)
         self.assertRaises(errors.KnitCorrupt, index.add_version,
-            'new', 'no-eol,line-delta', (0, 100), [])
+            'new', 'no-eol,line-delta', (None, 0, 100), [])
         self.assertEqual([], self.caught_entries)
 
     def test_add_version_same_dup(self):
         index = self.two_graph_index(catch_adds=True)
         # options can be spelt two different ways
-        index.add_version('tip', 'fulltext,no-eol', (0, 100), [])
-        index.add_version('tip', 'no-eol,fulltext', (0, 100), [])
+        index.add_version('tip', 'fulltext,no-eol', (None, 0, 100), [])
+        index.add_version('tip', 'no-eol,fulltext', (None, 0, 100), [])
         # but neither should have added data.
         self.assertEqual([[], []], self.caught_entries)
         
@@ -2199,26 +2199,26 @@
         index = self.two_graph_index(catch_adds=True)
         # change options
         self.assertRaises(errors.KnitCorrupt, index.add_version,
-            'tip', 'no-eol,line-delta', (0, 100), [])
-        self.assertRaises(errors.KnitCorrupt, index.add_version,
-            'tip', 'line-delta,no-eol', (0, 100), [])
-        self.assertRaises(errors.KnitCorrupt, index.add_version,
-            'tip', 'fulltext', (0, 100), [])
+            'tip', 'no-eol,line-delta', (None, 0, 100), [])
+        self.assertRaises(errors.KnitCorrupt, index.add_version,
+            'tip', 'line-delta,no-eol', (None, 0, 100), [])
+        self.assertRaises(errors.KnitCorrupt, index.add_version,
+            'tip', 'fulltext', (None, 0, 100), [])
         # position/length
         self.assertRaises(errors.KnitCorrupt, index.add_version,
-            'tip', 'fulltext,no-eol', (50, 100), [])
+            'tip', 'fulltext,no-eol', (None, 50, 100), [])
         self.assertRaises(errors.KnitCorrupt, index.add_version,
-            'tip', 'fulltext,no-eol', (0, 1000), [])
+            'tip', 'fulltext,no-eol', (None, 0, 1000), [])
         # parents
         self.assertRaises(errors.KnitCorrupt, index.add_version,
-            'tip', 'fulltext,no-eol', (0, 100), ['parent'])
+            'tip', 'fulltext,no-eol', (None, 0, 100), ['parent'])
         self.assertEqual([], self.caught_entries)
         
     def test_add_versions(self):
         index = self.two_graph_index(catch_adds=True)
         index.add_versions([
-                ('new', 'fulltext,no-eol', (50, 60), []),
-                ('new2', 'fulltext', (0, 6), []),
+                ('new', 'fulltext,no-eol', (None, 50, 60), []),
+                ('new2', 'fulltext', (None, 0, 6), []),
                 ])
         self.assertEqual([(('new', ), 'N50 60'), (('new2', ), ' 0 6')],
             sorted(self.caught_entries[0]))
@@ -2227,20 +2227,20 @@
     def test_add_versions_delta_not_delta_index(self):
         index = self.two_graph_index(catch_adds=True)
         self.assertRaises(errors.KnitCorrupt, index.add_versions,
-            [('new', 'no-eol,line-delta', (0, 100), ['parent'])])
+            [('new', 'no-eol,line-delta', (None, 0, 100), ['parent'])])
         self.assertEqual([], self.caught_entries)
 
     def test_add_versions_parents_not_parents_index(self):
         index = self.two_graph_index(catch_adds=True)
         self.assertRaises(errors.KnitCorrupt, index.add_versions,
-            [('new', 'no-eol,fulltext', (0, 100), ['parent'])])
+            [('new', 'no-eol,fulltext', (None, 0, 100), ['parent'])])
         self.assertEqual([], self.caught_entries)
 
     def test_add_versions_same_dup(self):
         index = self.two_graph_index(catch_adds=True)
         # options can be spelt two different ways
-        index.add_versions([('tip', 'fulltext,no-eol', (0, 100), [])])
-        index.add_versions([('tip', 'no-eol,fulltext', (0, 100), [])])
+        index.add_versions([('tip', 'fulltext,no-eol', (None, 0, 100), [])])
+        index.add_versions([('tip', 'no-eol,fulltext', (None, 0, 100), [])])
         # but neither should have added data.
         self.assertEqual([[], []], self.caught_entries)
         
@@ -2248,23 +2248,23 @@
         index = self.two_graph_index(catch_adds=True)
         # change options
         self.assertRaises(errors.KnitCorrupt, index.add_versions,
-            [('tip', 'no-eol,line-delta', (0, 100), [])])
-        self.assertRaises(errors.KnitCorrupt, index.add_versions,
-            [('tip', 'line-delta,no-eol', (0, 100), [])])
-        self.assertRaises(errors.KnitCorrupt, index.add_versions,
-            [('tip', 'fulltext', (0, 100), [])])
+            [('tip', 'no-eol,line-delta', (None, 0, 100), [])])
+        self.assertRaises(errors.KnitCorrupt, index.add_versions,
+            [('tip', 'line-delta,no-eol', (None, 0, 100), [])])
+        self.assertRaises(errors.KnitCorrupt, index.add_versions,
+            [('tip', 'fulltext', (None, 0, 100), [])])
         # position/length
         self.assertRaises(errors.KnitCorrupt, index.add_versions,
-            [('tip', 'fulltext,no-eol', (50, 100), [])])
+            [('tip', 'fulltext,no-eol', (None, 50, 100), [])])
         self.assertRaises(errors.KnitCorrupt, index.add_versions,
-            [('tip', 'fulltext,no-eol', (0, 1000), [])])
+            [('tip', 'fulltext,no-eol', (None, 0, 1000), [])])
         # parents
         self.assertRaises(errors.KnitCorrupt, index.add_versions,
-            [('tip', 'fulltext,no-eol', (0, 100), ['parent'])])
+            [('tip', 'fulltext,no-eol', (None, 0, 100), ['parent'])])
         # change options in the second record
         self.assertRaises(errors.KnitCorrupt, index.add_versions,
-            [('tip', 'fulltext,no-eol', (0, 100), []),
-             ('tip', 'no-eol,line-delta', (0, 100), [])])
+            [('tip', 'fulltext,no-eol', (None, 0, 100), []),
+             ('tip', 'no-eol,line-delta', (None, 0, 100), [])])
         self.assertEqual([], self.caught_entries)
 
     def test_iter_parents(self):



More information about the bazaar-commits mailing list