Rev 5241: (parthm) Estimated records to be fetched are now shown for fetch (2a only). in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Wed May 19 16:12:24 BST 2010

At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 5241 [merge]
revision-id: pqm at pqm.ubuntu.com-20100519151219-b1agjbkntjbirf92
parent: pqm at pqm.ubuntu.com-20100519022050-bskgk8zjo9p1whu3
parent: parth.malwankar at gmail.com-20100519133009-n29eitc526s4tfd4
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Wed 2010-05-19 16:12:19 +0100
message:
  (parthm) Estimated records to be fetched are now shown for fetch (2a only).
added:
  bzrlib/recordcounter.py        recordcounter.py-20100512152727-q0kn8gah0tre0uqs-1
modified:
  NEWS                           NEWS-20050323055033-4e00b5db738777ff
  bzrlib/builtins.py             builtins.py-20050830033751-fc01482b9ca23183
  bzrlib/remote.py               remote.py-20060720103555-yeeg2x51vn0rbtdp-1
  bzrlib/repofmt/groupcompress_repo.py repofmt.py-20080715094215-wp1qfvoo7093c8qr-1
  bzrlib/repository.py           rev_storage.py-20051111201905-119e9401e46257e3
  bzrlib/smart/repository.py     repository.py-20061128022038-vr5wy5bubyb8xttk-1
  bzrlib/tests/blackbox/test_checkout.py test_checkout.py-20060211231752-a5cde67cf70af854
=== modified file 'NEWS'

--- a/NEWS	2010-05-19 02:20:50 +0000
+++ b/NEWS	2010-05-19 02:58:05 +0000
@@ -101,9 +101,9 @@
   versions before 1.6.
   (Andrew Bennetts, #528041)
 
-* Heavyweight checkout operation now shows a message to the user indicating
-  history is being copied.
-  (Parth Malwankar, #538868)
+* Improved progress bar for fetch (2a format only). Bazaar now shows an
+  estimate of the number of records to be fetched vs actually fetched.
+  (Parth Malwankar, #374740, #538868)
 
 * Reduce peak memory by one copy of compressed text.
   (John Arbash Meinel, #566940)

=== modified file 'bzrlib/builtins.py'
--- a/bzrlib/builtins.py	2010-05-14 13:39:47 +0000
+++ b/bzrlib/builtins.py	2010-05-19 02:58:05 +0000
@@ -1336,11 +1336,6 @@
             except errors.NoWorkingTree:
                 source.bzrdir.create_workingtree(revision_id)
                 return
-
-        if not lightweight:
-            message = ('Copying history to "%s". '
-                'To checkout without local history use --lightweight.' % to_location)
-            ui.ui_factory.show_message(message)
         source.create_checkout(to_location, revision_id, lightweight,
                                accelerator_tree, hardlink)
 

=== added file 'bzrlib/recordcounter.py'
--- a/bzrlib/recordcounter.py	1970-01-01 00:00:00 +0000
+++ b/bzrlib/recordcounter.py	2010-05-19 13:30:09 +0000
@@ -0,0 +1,86 @@
+# Copyright (C) 2010 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+"""Record counting support for showing progress of revision fetch."""
+
+
+class RecordCounter(object):
+    """Container for maintains estimates of work requires for fetch.
+
+    Instance of this class is used along with a progress bar to provide
+    the user an estimate of the amount of work pending for a fetch (push,
+    pull, branch, checkout) operation.
+    """
+    def __init__(self):
+        self.initialized = False
+        self.current = 0
+        self.key_count = 0
+        self.max = 0
+
+        # Users of RecordCounter instance update progress bar every
+        # _STEP_ records. We choose are reasonably high number to keep
+        # display updates from being two frequent. This is an odd number
+        # to ensure that the last digit of the records fetched in
+        # fetches vs estimate ratio changes periodically.
+        self.STEP = 71
+
+    def is_initialized(self):
+        return self.initialized
+
+    def _estimate_max(self, key_count):
+        """Estimate the maximum amount of 'inserting stream' work.
+
+        This is just an estimate.
+        """
+        # Note: The magic number below is based of empirical data
+        # based on 3 seperate projects. Estimatation can probably
+        # be improved but this should work well for most cases.
+        # The project used for the estimate (with approx. numbers) were:
+        # lp:bzr with records_fetched = 7 * revs_required
+        # lp:emacs with records_fetched = 8 * revs_required
+        # bzr-svn checkout of lp:parrot = 10.63 * revs_required
+        # Hence, 10.3 was chosen as for a realistic progress bar as:
+        # 1. If records fetched is is lower than 10.3x then we simply complete
+        #    with 10.3x. Under promise, over deliver.
+        # 2. In case of remote fetch, when we start the count fetch vs estimate
+        #    display with revs_required/estimate, having a multiplier with a
+        #    decimal point produces a realistic looking _estimate_ number rather
+        #    than using something like 3125/31250 (for 10x)
+        # 3. Based on the above data, the possibility of overshooting this
+        #    factor is minimal, and in case of an overshoot the estimate value
+        #    should not need to be corrected too many times.
+        return int(key_count * 10.3)
+
+    def setup(self, key_count, current=0):
+        """Setup RecordCounter with basic estimate of work pending.
+
+        Setup self.max and self.current to reflect the amount of work
+        pending for a fetch.
+        """
+        self.current = current
+        self.key_count = key_count
+        self.max = self._estimate_max(key_count)
+        self.initialized = True
+
+    def increment(self, count):
+        """Increment self.current by count.
+
+        Apart from incrementing self.current by count, also ensure
+        that self.max > self.current.
+        """
+        self.current += count
+        if self.current > self.max:
+            self.max += self.key_count
+

=== modified file 'bzrlib/remote.py'
--- a/bzrlib/remote.py	2010-05-13 16:17:54 +0000
+++ b/bzrlib/remote.py	2010-05-14 12:40:03 +0000
@@ -1980,7 +1980,8 @@
         if response_tuple[0] != 'ok':
             raise errors.UnexpectedSmartServerResponse(response_tuple)
         byte_stream = response_handler.read_streamed_body()
-        src_format, stream = smart_repo._byte_stream_to_stream(byte_stream)
+        src_format, stream = smart_repo._byte_stream_to_stream(byte_stream,
+            self._record_counter)
         if src_format.network_name() != repo._format.network_name():
             raise AssertionError(
                 "Mismatched RemoteRepository and stream src %r, %r" % (

=== modified file 'bzrlib/repofmt/groupcompress_repo.py'
--- a/bzrlib/repofmt/groupcompress_repo.py	2010-05-13 18:52:58 +0000
+++ b/bzrlib/repofmt/groupcompress_repo.py	2010-05-14 13:25:05 +0000
@@ -1108,13 +1108,29 @@
         yield 'chk_bytes', _get_parent_id_basename_to_file_id_pages()
 
     def get_stream(self, search):
+        def wrap_and_count(pb, rc, stream):
+            """Yield records from stream while showing progress."""
+            count = 0
+            for record in stream:
+                if count == rc.STEP:
+                    rc.increment(count)
+                    pb.update('Estimate', rc.current, rc.max)
+                    count = 0
+                count += 1
+                yield record
+
         revision_ids = search.get_keys()
+        pb = ui.ui_factory.nested_progress_bar()
+        rc = self._record_counter
+        self._record_counter.setup(len(revision_ids))
         for stream_info in self._fetch_revision_texts(revision_ids):
-            yield stream_info
+            yield (stream_info[0],
+                wrap_and_count(pb, rc, stream_info[1]))
         self._revision_keys = [(rev_id,) for rev_id in revision_ids]
         self.from_repository.revisions.clear_cache()
         self.from_repository.signatures.clear_cache()
-        yield self._get_inventory_stream(self._revision_keys)
+        s = self._get_inventory_stream(self._revision_keys)
+        yield (s[0], wrap_and_count(pb, rc, s[1]))
         self.from_repository.inventories.clear_cache()
         # TODO: The keys to exclude might be part of the search recipe
         # For now, exclude all parents that are at the edge of ancestry, for
@@ -1123,10 +1139,13 @@
         parent_keys = from_repo._find_parent_keys_of_revisions(
                         self._revision_keys)
         for stream_info in self._get_filtered_chk_streams(parent_keys):
-            yield stream_info
+            yield (stream_info[0], wrap_and_count(pb, rc, stream_info[1]))
         self.from_repository.chk_bytes.clear_cache()
-        yield self._get_text_stream()
+        s = self._get_text_stream()
+        yield (s[0], wrap_and_count(pb, rc, s[1]))
         self.from_repository.texts.clear_cache()
+        pb.update('Done', rc.max, rc.max)
+        pb.finished()
 
     def get_stream_for_missing_keys(self, missing_keys):
         # missing keys can only occur when we are byte copying and not

=== modified file 'bzrlib/repository.py'
--- a/bzrlib/repository.py	2010-05-13 18:52:58 +0000
+++ b/bzrlib/repository.py	2010-05-14 13:32:48 +0000
@@ -43,7 +43,6 @@
     symbol_versioning,
     trace,
     tsort,
-    ui,
     versionedfile,
     )
 from bzrlib.bundle import serializer
@@ -55,6 +54,7 @@
 from bzrlib import (
     errors,
     registry,
+    ui,
     )
 from bzrlib.decorators import needs_read_lock, needs_write_lock, only_raises
 from bzrlib.inter import InterObject
@@ -64,6 +64,7 @@
     ROOT_ID,
     entry_factory,
     )
+from bzrlib.recordcounter import RecordCounter
 from bzrlib.lock import _RelockDebugMixin, LogicalLockResult
 from bzrlib.trace import (
     log_exception_quietly, note, mutter, mutter_callsite, warning)
@@ -4283,7 +4284,8 @@
                 is_resume = False
             try:
                 # locked_insert_stream performs a commit|suspend.
-                return self._locked_insert_stream(stream, src_format, is_resume)
+                return self._locked_insert_stream(stream, src_format,
+                    is_resume)
             except:
                 self.target_repo.abort_write_group(suppress_errors=True)
                 raise
@@ -4336,8 +4338,7 @@
                 # required if the serializers are different only in terms of
                 # the inventory.
                 if src_serializer == to_serializer:
-                    self.target_repo.revisions.insert_record_stream(
-                        substream)
+                    self.target_repo.revisions.insert_record_stream(substream)
                 else:
                     self._extract_and_insert_revisions(substream,
                         src_serializer)
@@ -4451,6 +4452,7 @@
         """Create a StreamSource streaming from from_repository."""
         self.from_repository = from_repository
         self.to_format = to_format
+        self._record_counter = RecordCounter()
 
     def delta_on_metadata(self):
         """Return True if delta's are permitted on metadata streams.

=== modified file 'bzrlib/smart/repository.py'
--- a/bzrlib/smart/repository.py	2010-05-06 23:41:35 +0000
+++ b/bzrlib/smart/repository.py	2010-05-14 13:36:34 +0000
@@ -39,6 +39,7 @@
     SuccessfulSmartServerResponse,
     )
 from bzrlib.repository import _strip_NULL_ghosts, network_format_registry
+from bzrlib.recordcounter import RecordCounter
 from bzrlib import revision as _mod_revision
 from bzrlib.versionedfile import (
     NetworkRecordStream,
@@ -544,12 +545,14 @@
     :ivar first_bytes: The first bytes to give the next NetworkRecordStream.
     """
 
-    def __init__(self, byte_stream):
+    def __init__(self, byte_stream, record_counter):
         """Create a _ByteStreamDecoder."""
         self.stream_decoder = pack.ContainerPushParser()
         self.current_type = None
         self.first_bytes = None
         self.byte_stream = byte_stream
+        self._record_counter = record_counter
+        self.key_count = 0
 
     def iter_stream_decoder(self):
         """Iterate the contents of the pack from stream_decoder."""
@@ -580,13 +583,46 @@
 
     def record_stream(self):
         """Yield substream_type, substream from the byte stream."""
+        def wrap_and_count(pb, rc, substream):
+            """Yield records from stream while showing progress."""
+            counter = 0
+            if rc:
+                if self.current_type != 'revisions' and self.key_count != 0:
+                    # As we know the number of revisions now (in self.key_count)
+                    # we can setup and use record_counter (rc).
+                    if not rc.is_initialized():
+                        rc.setup(self.key_count, self.key_count)
+            for record in substream.read():
+                if rc:
+                    if rc.is_initialized() and counter == rc.STEP:
+                        rc.increment(counter)
+                        pb.update('Estimate', rc.current, rc.max)
+                        counter = 0
+                    if self.current_type == 'revisions':
+                        # Total records is proportional to number of revs
+                        # to fetch. With remote, we used self.key_count to
+                        # track the number of revs. Once we have the revs
+                        # counts in self.key_count, the progress bar changes
+                        # from 'Estimating..' to 'Estimate' above.
+                        self.key_count += 1
+                        if counter == rc.STEP:
+                            pb.update('Estimating..', self.key_count)
+                            counter = 0
+                counter += 1
+                yield record
+
         self.seed_state()
+        pb = ui.ui_factory.nested_progress_bar()
+        rc = self._record_counter
         # Make and consume sub generators, one per substream type:
         while self.first_bytes is not None:
             substream = NetworkRecordStream(self.iter_substream_bytes())
             # after substream is fully consumed, self.current_type is set to
             # the next type, and self.first_bytes is set to the matching bytes.
-            yield self.current_type, substream.read()
+            yield self.current_type, wrap_and_count(pb, rc, substream)
+        if rc:
+            pb.update('Done', rc.max, rc.max)
+        pb.finished()
 
     def seed_state(self):
         """Prepare the _ByteStreamDecoder to decode from the pack stream."""
@@ -597,13 +633,13 @@
         list(self.iter_substream_bytes())
 
 
-def _byte_stream_to_stream(byte_stream):
+def _byte_stream_to_stream(byte_stream, record_counter=None):
     """Convert a byte stream into a format and a stream.
 
     :param byte_stream: A bytes iterator, as output by _stream_to_byte_stream.
     :return: (RepositoryFormat, stream_generator)
     """
-    decoder = _ByteStreamDecoder(byte_stream)
+    decoder = _ByteStreamDecoder(byte_stream, record_counter)
     for bytes in byte_stream:
         decoder.stream_decoder.accept_bytes(bytes)
         for record in decoder.stream_decoder.read_pending_records(max=1):

=== modified file 'bzrlib/tests/blackbox/test_checkout.py'
--- a/bzrlib/tests/blackbox/test_checkout.py	2010-04-30 09:52:08 +0000
+++ b/bzrlib/tests/blackbox/test_checkout.py	2010-05-12 12:55:04 +0000
@@ -65,7 +65,6 @@
 
     def test_checkout_dash_r(self):
         out, err = self.run_bzr(['checkout', '-r', '-2', 'branch', 'checkout'])
-        self.assertContainsRe(out, 'Copying history to "checkout".')
         # the working tree should now be at revision '1' with the content
         # from 1.
         result = bzrdir.BzrDir.open('checkout')
@@ -75,7 +74,6 @@
     def test_checkout_light_dash_r(self):
         out, err = self.run_bzr(['checkout','--lightweight', '-r', '-2',
             'branch', 'checkout'])
-        self.assertNotContainsRe(out, 'Copying history')
         # the working tree should now be at revision '1' with the content
         # from 1.
         result = bzrdir.BzrDir.open('checkout')