Rev 4719: Create a concrete IndirectDirState class. in http://bazaar.launchpad.net/~lifeless/bzr/dirstate2
Robert Collins
robertc at robertcollins.net
Tue Sep 29 00:23:26 BST 2009
At http://bazaar.launchpad.net/~lifeless/bzr/dirstate2
------------------------------------------------------------
revno: 4719
revision-id: robertc at robertcollins.net-20090928232256-qclqkb7y0su13qjl
parent: robertc at robertcollins.net-20090928223300-wgeoel86utggcmu6
committer: Robert Collins <robertc at robertcollins.net>
branch nick: dirstate2
timestamp: Tue 2009-09-29 09:22:56 +1000
message:
Create a concrete IndirectDirState class.
=== modified file 'bzrlib/dirstate.py'
--- a/bzrlib/dirstate.py 2009-09-14 01:48:28 +0000
+++ b/bzrlib/dirstate.py 2009-09-28 23:22:56 +0000
@@ -305,20 +305,19 @@
return statvalue, sha1
-class DirState(object):
+class AbstractDirState(object):
"""Record directory and metadata state for fast access.
A dirstate is a specialised data structure for managing local working
tree state information. Its not yet well defined whether it is platform
specific, and if it is how we detect/parameterize that.
- Dirstates use the usual lock_write, lock_read and unlock mechanisms.
- Unlike most bzr disk formats, DirStates must be locked for reading, using
- lock_read. (This is an os file lock internally.) This is necessary
- because the file can be rewritten in place.
-
DirStates must be explicitly written with save() to commit changes; just
unlocking them does not write the changes to disk.
+
+ AbstractDirState does not specify locking behaviour, though it is aware
+ of whether it is locked or not. Child classes such as DirState provide
+ locking.
"""
_kind_to_minikind = {
@@ -3074,6 +3073,75 @@
raise errors.ObjectNotLocked(self)
+class DirState(AbstractDirState):
+ """DirState implemented using a single file and OSLocks.
+
+ Unlike most bzr disk formats, DirStates must be locked for reading, using
+ lock_read. (This is an os file lock internally.) This is necessary
+ because the file can be rewritten in place.
+
+ Because a single file is used, updates that are interrupted can leave
+ a corrupt data structure on disk.
+ """
+
+
+class IndirectedDirState(AbstractDirState):
+ """DirState implemented using an indirection pointer and hash-named files.
+
+ Unlike DirState, IndirectedDirState is nearly atomic on the file system.
+ The base name of the dirstate specifies a directory containing a 'current'
+ file, and many content files named with their hash. Updates to
+ IndirectedDirState are very robust and do not lock out other readers.
+
+ Disk layout
+ -----------
+ basepath/
+ format = "bzr IndirectDirstate 1"
+ current = "MD5hash...\n". If missing then there will be one HASH.old
+ file which has the value current had if the read had started earlier,
+ and may be used.
+ MD5HASH.old = temp file used while replacing current. May only ever be
+ one - it is created by renaming current.
+ MD5HASH = a dirstate state file
+ MD5HASH.current = a candidate replacement for current, about to be
+ moved into place.
+
+ The 'current' file contains a hash which is the current actual dirstate
+ to read. Updates have two forms - stat cache updates (when a logical
+ read lock is in place), and semantic updates - when a write lock is in
+ place. Updates write a new current file '$hash.current', move the 'current'
+ file to '$hash.old', and then move '$hash.current' to 'current', finally
+ removing '$hash.old'. If the Update is a stat cache update, a check is
+ done after moving 'current' to '$hash.old' - if the old hash is not the
+ hash that was meant to be replaced, then some other task has updated the
+ dirstate (and may have done more than a stat cache update). When this is
+ detected, '$hash.old' is moved back to 'current', and the stat cache
+ update discarded. Updaters are not permitted to write directly to 'current'
+ or to rename a '$hash.current' file to 'current' unless they currently
+ have '$hash.old'.
+ After successfully updating 'current', the old state contained in the
+ content file named in '$hash.old' should be removed. If the removal
+ errors due to other processing holding the file open, the errors can be
+ removed.
+ Finally, when closing a dirstate state file, 'current' should be checked,
+ and if it is different to the dirstate file that was read initially, an
+ attempt to remove that dirstate file should be made - last one out close
+ the doors.
+
+ When reading an IndirectedDirState, check for a format file to determine
+ if the dirstate is a known format. If there is no current file, it is
+ either in-progress or an interrupted update has occured. Readers should
+ look for *.old to get the last-written dirstate, and read that instead.
+ Concurrency concerns here mean that by the time .old is processed current
+ may have been updated and the old state file removed, so a short loop is
+ recommended.
+
+ If a write is fatally interrupted, recovery *must* rollback, not forward,
+ as a later client cannot tell if the update would have rolled back when
+ checking the '$hash.old' file.
+ """
+
+
def py_update_entry(state, entry, abspath, stat_value,
_stat_to_minikind=DirState._stat_to_minikind,
_pack_stat=pack_stat):
=== modified file 'bzrlib/tests/test_dirstate.py'
--- a/bzrlib/tests/test_dirstate.py 2009-09-28 22:33:00 +0000
+++ b/bzrlib/tests/test_dirstate.py 2009-09-28 23:22:56 +0000
@@ -52,6 +52,7 @@
test_osutils.dir_reader_scenarios(), suite)
dirstate_implementation_scenarios = [
('dirstate1', {'_dirstate_class':dirstate.DirState}),
+ ('dirstate2', {'_dirstate_class':dirstate.IndirectedDirState}),
]
tests.multiply_tests(suite, dirstate_implementation_scenarios,
remaining_tests)
More information about the bazaar-commits
mailing list