Rev 6086: (jr) bzr add now skips large files in recursive mode. The default "large" in file:///home/pqm/archives/thelove/bzr/%2Btrunk/

Canonical.com Patch Queue Manager pqm at pqm.ubuntu.com
Sat Aug 20 05:22:53 UTC 2011


At file:///home/pqm/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 6086 [merge]
revision-id: pqm at pqm.ubuntu.com-20110820052249-41vfmtn5g0i7ngqb
parent: pqm at pqm.ubuntu.com-20110820031245-5e0bpa72b333273n
parent: weyrick at mozek.us-20110819222603-625cb10d0yhtoh1q
committer: Canonical.com Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Sat 2011-08-20 05:22:49 +0000
message:
  (jr) bzr add now skips large files in recursive mode. The default "large"
   size is 20MB, and is configurable via the add.maximum_file_size
   option. A value of 0 disables skipping. Named items passed to add are
   never skipped. (Shannon Weyrick, #54624) (Shannon Weyrick)
modified:
  bzrlib/add.py                  add.py-20050323030017-3a77d63feda58e33
  bzrlib/builtins.py             builtins.py-20050830033751-fc01482b9ca23183
  bzrlib/config.py               config.py-20051011043216-070c74f4e9e338e8
  bzrlib/help_topics/en/configuration.txt configuration.txt-20060314161707-868350809502af01
  bzrlib/mutabletree.py          mutabletree.py-20060906023413-4wlkalbdpsxi2r4y-2
  bzrlib/osutils.py              osutils.py-20050309040759-eeaff12fbf77ac86
  bzrlib/tests/blackbox/test_add.py test_add.py-20060518072250-857e4f86f54a30b2
  bzrlib/tests/test_config.py    testconfig.py-20051011041908-742d0c15d8d8c8eb
  doc/en/release-notes/bzr-2.5.txt bzr2.5.txt-20110708125756-587p0hpw7oke4h05-1
=== modified file 'bzrlib/add.py'
--- a/bzrlib/add.py	2011-06-14 02:21:41 +0000
+++ b/bzrlib/add.py	2011-08-19 21:01:49 +0000
@@ -17,9 +17,11 @@
 """Helper functions for adding files to working trees."""
 
 import sys
+import os
 
 from bzrlib import (
     osutils,
+    ui, 
     )
 
 
@@ -53,6 +55,48 @@
             self._to_file.write('adding %s\n' % _quote(path))
         return None
 
+    def skip_file(self, tree, path, kind, stat_value = None):
+        """Test whether the given file should be skipped or not.
+        
+        The default action never skips. Note this is only called during
+        recursive adds
+        
+        :param tree: The tree we are working in
+        :param path: The path being added
+        :param kind: The kind of object being added.
+        :param stat: Stat result for this file, if available already
+        :return bool. True if the file should be skipped (not added)
+        """
+        return False
+
+
+class AddWithSkipLargeAction(AddAction):
+    """A class that can decide to skip a file if it's considered too large"""
+
+    # default 20 MB
+    _DEFAULT_MAX_FILE_SIZE = 20000000
+    _optionName = 'add.maximum_file_size'
+    _maxSize = None
+
+    def skip_file(self, tree, path, kind, stat_value = None):
+        if kind != 'file':
+            return False            
+        if self._maxSize is None:
+            config = tree.branch.get_config()
+            self._maxSize = config.get_user_option_as_int_from_SI(
+                self._optionName,  
+                self._DEFAULT_MAX_FILE_SIZE)
+        if stat_value is None:
+            file_size = os.path.getsize(path);
+        else:
+            file_size = stat_value.st_size;
+        if self._maxSize > 0 and file_size > self._maxSize:
+            ui.ui_factory.show_warning(
+                "skipping %s (larger than %s of %d bytes)" % 
+                (path, self._optionName,  self._maxSize))
+            return True
+        return False
+
 
 class AddFromBaseAction(AddAction):
     """This class will try to extract file ids from another tree."""

=== modified file 'bzrlib/builtins.py'
--- a/bzrlib/builtins.py	2011-08-18 04:23:06 +0000
+++ b/bzrlib/builtins.py	2011-08-19 22:26:03 +0000
@@ -674,6 +674,10 @@
     
     Any files matching patterns in the ignore list will not be added
     unless they are explicitly mentioned.
+    
+    In recursive mode, files larger than the configuration option 
+    add.maximum_file_size will be skipped. Named items are never skipped due
+    to file size.
     """
     takes_args = ['file*']
     takes_options = [
@@ -706,7 +710,7 @@
             action = bzrlib.add.AddFromBaseAction(base_tree, base_path,
                           to_file=self.outf, should_print=(not is_quiet()))
         else:
-            action = bzrlib.add.AddAction(to_file=self.outf,
+            action = bzrlib.add.AddWithSkipLargeAction(to_file=self.outf,
                 should_print=(not is_quiet()))
 
         if base_tree:

=== modified file 'bzrlib/config.py'
--- a/bzrlib/config.py	2011-08-19 12:54:18 +0000
+++ b/bzrlib/config.py	2011-08-20 05:22:49 +0000
@@ -75,6 +75,7 @@
 import os
 import string
 import sys
+import re
 
 
 from bzrlib.decorators import needs_write_lock
@@ -413,6 +414,45 @@
             # add) the final ','
             l = [l]
         return l
+        
+    def get_user_option_as_int_from_SI(self,  option_name,  default=None):
+        """Get a generic option from a human readable size in SI units, e.g 10MB
+        
+        Accepted suffixes are K,M,G. It is case-insensitive and may be followed
+        by a trailing b (i.e. Kb, MB). This is intended to be practical and not
+        pedantic.
+        
+        :return Integer, expanded to its base-10 value if a proper SI unit is 
+            found. If the option doesn't exist, or isn't a value in 
+            SI units, return default (which defaults to None)
+        """
+        val = self.get_user_option(option_name)
+        if isinstance(val, list):
+            val = val[0]
+        if val is None:
+            val = default
+        else:
+            p = re.compile("^(\d+)([kmg])*b*$", re.IGNORECASE)
+            try:
+                m = p.match(val)
+                if m is not None:
+                    val = int(m.group(1))
+                    if m.group(2) is not None:
+                        if m.group(2).lower() == 'k':
+                            val *= 10**3
+                        elif m.group(2).lower() == 'm':
+                            val *= 10**6
+                        elif m.group(2).lower() == 'g':
+                            val *= 10**9
+                else:
+                    ui.ui_factory.show_warning('Invalid config value for "%s" '
+                                               ' value %r is not an SI unit.'
+                                                % (option_name, val))
+                    val = default
+            except TypeError:
+                val = default
+        return val
+        
 
     def gpg_signing_command(self):
         """What program should be used to sign signatures?"""

=== modified file 'bzrlib/help_topics/en/configuration.txt'
--- a/bzrlib/help_topics/en/configuration.txt	2011-08-16 15:12:39 +0000
+++ b/bzrlib/help_topics/en/configuration.txt	2011-08-19 22:26:03 +0000
@@ -628,6 +628,14 @@
 If present, defines the ``--strict`` option default value for checking
 uncommitted changes before sending a merge directive.
 
+add.maximum_file_size
+~~~~~~~~~~~~~~~~~~~~~
+
+Defines the maximum file size the command line "add" operation will allow
+in recursive mode, with files larger than this value being skipped. You may 
+specify this value as an integer (in which case it is interpreted as bytes), 
+or you may specify the value using SI units, i.e. 10KB, 20MB, 1G. A value of 0 
+will disable skipping.
 
 External Merge Tools
 --------------------

=== modified file 'bzrlib/mutabletree.py'
--- a/bzrlib/mutabletree.py	2011-07-23 16:33:38 +0000
+++ b/bzrlib/mutabletree.py	2011-08-15 20:10:03 +0000
@@ -582,8 +582,9 @@
         :param parent_ie: Parent inventory entry if known, or None.  If
             None, the parent is looked up by name and used if present, otherwise it
             is recursively added.
+        :param path: 
         :param kind: Kind of new entry (file, directory, etc)
-        :param action: callback(tree, parent_ie, path, kind); can return file_id
+        :param inv_path:
         :return: Inventory entry for path and a list of paths which have been added.
         """
         # Nothing to do if path is already versioned.
@@ -628,7 +629,7 @@
             if (prev_dir is None or not is_inside([prev_dir], path)):
                 yield (path, inv_path, this_ie, None)
             prev_dir = path
-
+        
     def __init__(self, tree, action, conflicts_related=None):
         self.tree = tree
         if action is None:
@@ -695,12 +696,18 @@
 
             # get the contents of this directory.
 
-            # find the kind of the path being added.
+            # find the kind of the path being added, and save stat_value
+            # for reuse
+            stat_value = None
             if this_ie is None:
-                kind = osutils.file_kind(abspath)
+                stat_value = osutils.file_stat(abspath)
+                kind = osutils.file_kind_from_stat_mode(stat_value.st_mode)
             else:
                 kind = this_ie.kind
-
+            
+            # allow AddAction to skip this file
+            if self.action.skip_file(self.tree,  abspath,  kind,  stat_value):
+                continue
             if not InventoryEntry.versionable_kind(kind):
                 trace.warning("skipping %s (can't add file of kind '%s')",
                               abspath, kind)
@@ -769,7 +776,7 @@
                         # recurse into this already versioned subdir.
                         things_to_add.append((subp, sub_invp, sub_ie, this_ie))
                     else:
-                        # user selection overrides ignoes
+                        # user selection overrides ignores
                         # ignore while selecting files - if we globbed in the
                         # outer loop we would ignore user files.
                         ignore_glob = self.tree.is_ignored(subp)

=== modified file 'bzrlib/osutils.py'
--- a/bzrlib/osutils.py	2011-08-12 12:18:34 +0000
+++ b/bzrlib/osutils.py	2011-08-19 22:26:03 +0000
@@ -2178,15 +2178,18 @@
     return file_kind_from_stat_mode(mode)
 file_kind_from_stat_mode = file_kind_from_stat_mode_thunk
 
-
-def file_kind(f, _lstat=os.lstat):
+def file_stat(f, _lstat=os.lstat):
     try:
-        return file_kind_from_stat_mode(_lstat(f).st_mode)
+        # XXX cache?
+        return _lstat(f)
     except OSError, e:
         if getattr(e, 'errno', None) in (errno.ENOENT, errno.ENOTDIR):
             raise errors.NoSuchFile(f)
         raise
 
+def file_kind(f, _lstat=os.lstat):
+    stat_value = file_stat(f, _lstat)
+    return file_kind_from_stat_mode(stat_value.st_mode)
 
 def until_no_eintr(f, *a, **kw):
     """Run f(*a, **kw), retrying if an EINTR error occurs.

=== modified file 'bzrlib/tests/blackbox/test_add.py'
--- a/bzrlib/tests/blackbox/test_add.py	2011-07-11 06:47:32 +0000
+++ b/bzrlib/tests/blackbox/test_add.py	2011-08-19 21:01:49 +0000
@@ -239,3 +239,27 @@
         out, err = self.run_bzr(["add", "a", "b"], working_dir=u"\xA7")
         self.assertEquals(out, "adding a\n" "adding b\n")
         self.assertEquals(err, "")
+        
+    def test_add_skip_large_files(self):
+        """Test skipping files larger than add.maximum_file_size"""
+        tree = self.make_branch_and_tree('.')
+        self.build_tree(['small.txt', 'big.txt', 'big2.txt'])
+        self.build_tree_contents([('small.txt', '0\n')])
+        self.build_tree_contents([('big.txt', '01234567890123456789\n')])
+        self.build_tree_contents([('big2.txt', '01234567890123456789\n')])
+        tree.branch.get_config().set_user_option('add.maximum_file_size', 5)
+        out = self.run_bzr('add')[0]
+        results = sorted(out.rstrip('\n').split('\n'))
+        self.assertEquals(['adding small.txt'], 
+                          results)
+        # named items never skipped, even if over max
+        out, err = self.run_bzr(["add", "big2.txt"])
+        results = sorted(out.rstrip('\n').split('\n'))
+        self.assertEquals(['adding big2.txt'], 
+                          results)
+        self.assertEquals(err, "")
+        tree.branch.get_config().set_user_option('add.maximum_file_size', 30)
+        out = self.run_bzr('add')[0]
+        results = sorted(out.rstrip('\n').split('\n'))
+        self.assertEquals(['adding big.txt'], 
+                          results)

=== modified file 'bzrlib/tests/test_config.py'
--- a/bzrlib/tests/test_config.py	2011-08-19 12:54:18 +0000
+++ b/bzrlib/tests/test_config.py	2011-08-20 05:22:49 +0000
@@ -1035,6 +1035,26 @@
         # automatically cast to list
         self.assertEqual(['x'], get_list('one_item'))
 
+    def test_get_user_option_as_int_from_SI(self):
+        conf, parser = self.make_config_parser("""
+plain = 100
+si_k = 5k,
+si_kb = 5kb,
+si_m = 5M,
+si_mb = 5MB,
+si_g = 5g,
+si_gb = 5gB,
+""")
+        get_si = conf.get_user_option_as_int_from_SI
+        self.assertEqual(100, get_si('plain'))
+        self.assertEqual(5000, get_si('si_k'))
+        self.assertEqual(5000, get_si('si_kb'))
+        self.assertEqual(5000000, get_si('si_m'))
+        self.assertEqual(5000000, get_si('si_mb'))
+        self.assertEqual(5000000000, get_si('si_g'))
+        self.assertEqual(5000000000, get_si('si_gb'))
+        self.assertEqual(None, get_si('non-exist'))
+        self.assertEqual(42, get_si('non-exist-with-default',  42))
 
 class TestSupressWarning(TestIniConfig):
 

=== modified file 'doc/en/release-notes/bzr-2.5.txt'
--- a/doc/en/release-notes/bzr-2.5.txt	2011-08-20 03:12:45 +0000
+++ b/doc/en/release-notes/bzr-2.5.txt	2011-08-20 05:22:49 +0000
@@ -81,6 +81,11 @@
 * Relative local paths can now be specified in URL syntax by using the
   "file:" prefix.  (Jelmer Vernooij)
 
+* bzr add now skips large files in recursive mode. The default "large"
+  size is 20MB, and is configurable via the add.maximum_file_size
+  option. A value of 0 disables skipping. Named items passed to add are
+  never skipped. (Shannon Weyrick, #54624)
+
 Improvements
 ************
 
@@ -175,6 +180,10 @@
   no longer support it.
   (Martin Pool)
 
+* New method ``Config.get_user_option_as_int_from_SI`` added for expanding a
+  value in SI format (i.e. "20MB", "1GB") into its integer equivalent. 
+  (Shannon Weyrick)
+
 * ``Transport`` now has a ``_parsed_url`` attribute instead of
   separate ``_user``, ``_password``, ``_port``, ``_scheme``, ``_host``
   and ``_path`` attributes. Proxies are provided for the moment but




More information about the bazaar-commits mailing list