Rev 3378: Review feedback, making things more clear, adding documentation on what is used where. in http://people.ubuntu.com/~robertc/baz2.0/versioned_files
Robert Collins
robertc at robertcollins.net
Tue Jun 17 06:02:44 BST 2008
At http://people.ubuntu.com/~robertc/baz2.0/versioned_files
------------------------------------------------------------
revno: 3378
revision-id: robertc at robertcollins.net-20080617050234-knib0hpz6pjx3sl1
parent: robertc at robertcollins.net-20080612032525-hp52uf3wa7546c4u
committer: Robert Collins <robertc at robertcollins.net>
branch nick: VersionedFiles.add_api
timestamp: Tue 2008-06-17 15:02:34 +1000
message:
Review feedback, making things more clear, adding documentation on what is used where.
modified:
bzrlib/bundle/serializer/v4.py v10.py-20070611062757-5ggj7k18s9dej0fr-1
bzrlib/fetch.py fetch.py-20050818234941-26fea6105696365d
bzrlib/remote.py remote.py-20060720103555-yeeg2x51vn0rbtdp-1
bzrlib/repofmt/pack_repo.py pack_repo.py-20070813041115-gjv5ma7ktfqwsjgn-1
bzrlib/repository.py rev_storage.py-20051111201905-119e9401e46257e3
bzrlib/versionedfile.py versionedfile.py-20060222045106-5039c71ee3b65490
=== modified file 'bzrlib/bundle/serializer/v4.py'
--- a/bzrlib/bundle/serializer/v4.py 2008-06-11 04:20:16 +0000
+++ b/bzrlib/bundle/serializer/v4.py 2008-06-17 05:02:34 +0000
@@ -305,10 +305,9 @@
"""Write bundle records for all revisions of all files"""
texts = self.repository.texts
text_keys = []
- for file_id, revision_ids in \
- self.repository.fileids_altered_by_revision_ids(
- self.revision_ids).iteritems():
- revision_ids = list(revision_ids)
+ altered_fileids = self.repository.fileids_altered_by_revision_ids(
+ self.revision_ids)
+ for file_id, revision_ids in altered_fileids.iteritems():
for revision_id in revision_ids:
text_keys.append((file_id, revision_id))
self.add_mp_records_keys('file', texts, text_keys)
@@ -527,7 +526,11 @@
d_func = multiparent.MultiParent.from_patch
vf_records = []
for key, meta, text in records:
- # Adapt to tuple interface:
+ # Adapt to tuple interface: A length two key is a file_id,
+ # revision_id pair, a length 1 key is a
+ # revision/signature/inventory. We need to do this because
+ # the metadata extraction from the bundle has not yet been updated
+ # to use the consistent tuple interface itself.
if len(key) == 2:
prefix = key[:1]
else:
=== modified file 'bzrlib/fetch.py'
--- a/bzrlib/fetch.py 2008-06-11 04:20:16 +0000
+++ b/bzrlib/fetch.py 2008-06-17 05:02:34 +0000
@@ -221,7 +221,6 @@
self.from_repository, self._last_revision,
find_ghosts=self.find_ghosts)
except errors.NoSuchRevision, e:
- import pdb;pdb.set_trace()
raise InstallFailed([self._last_revision])
def _fetch_inventory_weave(self, revs, pb):
@@ -387,8 +386,8 @@
root_id_order.sort(key=operator.itemgetter(0))
# Create a record stream containing the roots to create.
def yield_roots():
- for root_id, rev_id in root_id_order:
- key = (root_id, rev_id)
+ for key in root_id_order:
+ root_id, rev_id = key
rev_parents = parent_map[rev_id]
# We drop revision parents with different file-ids, because
# that represents a rename of the root to a different location
=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py 2008-06-11 07:22:00 +0000
+++ b/bzrlib/remote.py 2008-06-17 05:02:34 +0000
@@ -782,11 +782,6 @@
self._ensure_real()
return self._real_repository.get_ancestry(revision_id, topo_sorted)
- @property
- def inventories(self):
- self._ensure_real()
- return self._real_repository.inventories
-
def fileids_altered_by_revision_ids(self, revision_ids):
self._ensure_real()
return self._real_repository.fileids_altered_by_revision_ids(revision_ids)
=== modified file 'bzrlib/repofmt/pack_repo.py'
--- a/bzrlib/repofmt/pack_repo.py 2008-06-11 07:22:00 +0000
+++ b/bzrlib/repofmt/pack_repo.py 2008-06-17 05:02:34 +0000
@@ -1656,7 +1656,29 @@
class KnitPackRepository(KnitRepository):
- """Repository with knit objects stored inside pack containers."""
+ """Repository with knit objects stored inside pack containers.
+
+ The layering for a KnitPackRepository is:
+
+ Graph | HPSS | Repository public layer |
+ ===================================================
+ Tuple based apis below, string based, and key based apis above
+ ---------------------------------------------------
+ KnitVersionedFiles
+ Provides .texts, .revisions etc
+ This adapts the N-tuple keys to physical knit records which only have a
+ single string identifier (for historical reasons), which in older formats
+ was always the revision_id, and in the mapped code for packs is always
+ the last element of key tuples.
+ ---------------------------------------------------
+ GraphIndex
+ A separate GraphIndex is used for each of the
+ texts/inventories/revisions/signatures contained within each individual
+ pack file. The GraphIndex layer works in N-tuples and is unaware of any
+ semantic value.
+ ===================================================
+
+ """
def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
_serializer):
=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py 2008-06-11 07:22:00 +0000
+++ b/bzrlib/repository.py 2008-06-17 05:02:34 +0000
@@ -446,16 +446,41 @@
revisions and file history. It's normally accessed only by the Branch,
which views a particular line of development through that history.
- The Repository builds on top of Stores and a Transport, which respectively
- describe the disk data format and the way of accessing the (possibly
+ The Repository builds on top of some byte storage facilies (the revisions,
+ signatures, inventories and texts attributes) and a Transport, which
+ respectively provide byte storage and a means to access the (possibly
remote) disk.
+ The byte storage facilities are addressed via tuples, which we refer to
+ as 'keys' throughout the code base. Revision_keys, inventory_keys and
+ signature_keys are all 1-tuples: (revision_id,). text_keys are two-tuples:
+ (file_id, revision_id). We use this interface because it allows low
+ friction with the underlying code that implements disk indices, network
+ encoding and other parts of bzrlib.
+
:ivar revisions: A bzrlib.versionedfile.VersionedFiles instance containing
the serialised revisions for the repository. This can be used to obtain
revision graph information or to access raw serialised revisions.
The result of trying to insert data into the repository via this store
is undefined: it should be considered read-only except for implementors
of repositories.
+ :ivar signatures: A bzrlib.versionedfile.VersionedFiles instance containing
+ the serialised signatures for the repository. This can be used to
+ obtain access to raw serialised signatures. The result of trying to
+ insert data into the repository via this store is undefined: it should
+ be considered read-only except for implementors of repositories.
+ :ivar inventories: A bzrlib.versionedfile.VersionedFiles instance containing
+ the serialised inventories for the repository. This can be used to
+ obtain unserialised inventories. The result of trying to insert data
+ into the repository via this store is undefined: it should be
+ considered read-only except for implementors of repositories.
+ :ivar texts: A bzrlib.versionedfile.VersionedFiles instance containing the
+ texts of files and directories for the repository. This can be used to
+ obtain file texts or file graphs. Note that Repository.iter_file_bytes
+ is usually a better interface for accessing file texts.
+ The result of trying to insert data into the repository via this store
+ is undefined: it should be considered read-only except for implementors
+ of repositories.
:ivar _transport: Transport for file access to repository, typically
pointing to .bzr/repository.
"""
=== modified file 'bzrlib/versionedfile.py'
--- a/bzrlib/versionedfile.py 2008-06-12 03:25:25 +0000
+++ b/bzrlib/versionedfile.py 2008-06-17 05:02:34 +0000
@@ -678,6 +678,15 @@
Currently no implementation allows the graph of different key prefixes to
intersect, but the API does allow such implementations in the future.
+
+ The keyspace is expressed via simple tuples. Any instance of VersionedFiles
+ may have a different length key-size, but that size will be constant for
+ all texts added to or retrieved from it. For instance, bzrlib uses
+ instances with a key-size of 2 for storing user files in a repository, with
+ the first element the fileid, and the second the version of that file.
+
+ The use of tuples allows a single code base to support several different
+ uses with only the mapping logic changing from instance to instance.
"""
def add_lines(self, key, parents, lines, parent_texts=None,
More information about the bazaar-commits
mailing list