Rev 6303: (gz) Add _ModuleContext to track location in source and other refactorings in file:///srv/pqm.bazaar-vcs.org/archives/thelove/bzr/%2Btrunk/

Fri Nov 25 16:53:39 UTC 2011

At file:///srv/pqm.bazaar-vcs.org/archives/thelove/bzr/%2Btrunk/

------------------------------------------------------------
revno: 6303 [merge]
revision-id: pqm at pqm.ubuntu.com-20111125165338-tzw8uqnq48801bu4
parent: pqm at pqm.ubuntu.com-20111125162842-2g618aviwm32lie4
parent: martin.packman at canonical.com-20111122184816-srfa6yrcctscy0pq
committer: Patch Queue Manager <pqm at pqm.ubuntu.com>
branch nick: +trunk
timestamp: Fri 2011-11-25 16:53:38 +0000
message:
  (gz) Add _ModuleContext to track location in source and other refactorings
   in export_pot (Martin Packman)
modified:
  bzrlib/export_pot.py           bzrgettext-20110429104643-3wjy38532whc21yj-2
  bzrlib/tests/test_export_pot.py test_export_pot.py-20110509102137-efovgz233s9uk2b2-1
=== modified file 'bzrlib/export_pot.py'

--- a/bzrlib/export_pot.py	2011-11-21 13:00:48 +0000
+++ b/bzrlib/export_pot.py	2011-11-22 18:48:16 +0000
@@ -33,6 +33,7 @@
     commands as _mod_commands,
     errors,
     help_topics,
+    option,
     plugin,
     help,
     )
@@ -68,12 +69,71 @@
     return s
 
 
+def _parse_source(source_text):
+    """Get object to lineno mappings from given source_text"""
+    import ast
+    cls_to_lineno = {}
+    str_to_lineno = {}
+    for node in ast.walk(ast.parse(source_text)):
+        # TODO: worry about duplicates?
+        if isinstance(node, ast.ClassDef):
+            # TODO: worry about nesting?
+            cls_to_lineno[node.name] = node.lineno
+        elif isinstance(node, ast.Str):
+            # Python AST gives location of string literal as the line the
+            # string terminates on. It's more useful to have the line the
+            # string begins on. Unfortunately, counting back newlines is
+            # only an approximation as the AST is ignorant of escaping.
+            str_to_lineno[node.s] = node.lineno - node.s.count('\n')
+    return cls_to_lineno, str_to_lineno
+
+
+class _ModuleContext(object):
+    """Record of the location within a source tree"""
+
+    def __init__(self, path, lineno=1, _source_info=None):
+        self.path = path
+        self.lineno = lineno
+        if _source_info is not None:
+            self._cls_to_lineno, self._str_to_lineno = _source_info
+
+    @classmethod
+    def from_module(cls, module):
+        """Get new context from module object and parse source for linenos"""
+        sourcepath = inspect.getsourcefile(module)
+        # TODO: fix this to do the right thing rather than rely on cwd
+        relpath = os.path.relpath(sourcepath)
+        return cls(relpath,
+            _source_info=_parse_source("".join(inspect.findsource(module)[0])))
+
+    def from_class(self, cls):
+        """Get new context with same details but lineno of class in source"""
+        try:
+            lineno = self._cls_to_lineno[cls.__name__]
+        except (AttributeError, KeyError):
+            mutter("Definition of %r not found in %r", cls, self.path)
+            return self
+        return self.__class__(self.path, lineno,
+            (self._cls_to_lineno, self._str_to_lineno))
+
+    def from_string(self, string):
+        """Get new context with same details but lineno of string in source"""
+        try:
+            lineno = self._str_to_lineno[string]
+        except (AttributeError, KeyError):
+            mutter("String %r not found in %r", string[:20], self.path)
+            return self
+        return self.__class__(self.path, lineno,
+            (self._cls_to_lineno, self._str_to_lineno))
+
+
 class _PotExporter(object):
     """Write message details to output stream in .pot file format"""
 
     def __init__(self, outf):
         self.outf = outf
         self._msgids = set()
+        self._module_contexts = {}
 
     def poentry(self, path, lineno, s, comment=None):
         if s in self._msgids:
@@ -92,6 +152,10 @@
             "\n".format(
                 path=path, lineno=lineno, comment=comment, msg=_normalize(s)))
 
+    def poentry_in_context(self, context, string, comment=None):
+        context = context.from_string(string)
+        self.poentry(context.path, context.lineno, string, comment)
+
     def poentry_per_paragraph(self, path, lineno, msgid, include=None):
         # TODO: How to split long help?
         paragraphs = msgid.split('\n\n')
@@ -101,78 +165,50 @@
             self.poentry(path, lineno, p)
             lineno += p.count('\n') + 2
 
-
-_LAST_CACHE = _LAST_CACHED_SRC = None
-
-def _offsets_of_literal(src):
-    global _LAST_CACHE, _LAST_CACHED_SRC
-    if src == _LAST_CACHED_SRC:
-        return _LAST_CACHE.copy()
-
-    import ast
-    root = ast.parse(src)
-    offsets = {}
-    for node in ast.walk(root):
-        if not isinstance(node, ast.Str):
-            continue
-        offsets[node.s] = node.lineno - node.s.count('\n')
-
-    _LAST_CACHED_SRC = src
-    _LAST_CACHE = offsets.copy()
-    return offsets
+    def get_context(self, obj):
+        module = inspect.getmodule(obj)
+        try:
+            context = self._module_contexts[module.__name__]
+        except KeyError:
+            context = _ModuleContext.from_module(module)
+            self._module_contexts[module.__name__] = context
+        if inspect.isclass(obj):
+            context = context.from_class(obj)
+        return context
+
+
+def _write_option(exporter, context, opt, note):
+    if getattr(opt, 'hidden', False):
+        return   
+    if getattr(opt, 'title', None):
+        exporter.poentry_in_context(context, opt.title,
+            "title of {name!r} {what}".format(name=opt.name, what=note))
+    if getattr(opt, 'help', None):
+        exporter.poentry_in_context(context, opt.help,
+            "help of {name!r} {what}".format(name=opt.name, what=note))
+
 
 def _standard_options(exporter):
-    from bzrlib.option import Option
-    src = inspect.findsource(Option)[0]
-    src = ''.join(src)
-    path = 'bzrlib/option.py'
-    offsets = _offsets_of_literal(src)
-
-    for name in sorted(Option.OPTIONS.keys()):
-        opt = Option.OPTIONS[name]
-        if getattr(opt, 'hidden', False):
-            continue
-        if getattr(opt, 'title', None):
-            lineno = offsets.get(opt.title, 9999)
-            if lineno == 9999:
-                note(gettext("%r is not found in bzrlib/option.py") % opt.title)
-            exporter.poentry(path, lineno, opt.title,
-                     'title of %r option' % name)
-        if getattr(opt, 'help', None):
-            lineno = offsets.get(opt.help, 9999)
-            if lineno == 9999:
-                note(gettext("%r is not found in bzrlib/option.py") % opt.help)
-            exporter.poentry(path, lineno, opt.help,
-                     'help of %r option' % name)
-
-def _command_options(exporter, path, cmd):
-    src, default_lineno = inspect.findsource(cmd.__class__)
-    offsets = _offsets_of_literal(''.join(src))
+    OPTIONS = option.Option.OPTIONS
+    context = exporter.get_context(option)
+    for name in sorted(OPTIONS.keys()):
+        opt = OPTIONS[name]
+        _write_option(exporter, context.from_string(name), opt, "option")
+
+
+def _command_options(exporter, context, cmd):
+    note = "option of {0!r} command".format(cmd.name())
     for opt in cmd.takes_options:
-        if isinstance(opt, str):
-            continue
-        if getattr(opt, 'hidden', False):
-            continue
-        name = opt.name
-        if getattr(opt, 'title', None):
-            lineno = offsets.get(opt.title, default_lineno)
-            exporter.poentry(path, lineno, opt.title,
-                     'title of %r option of %r command' % (name, cmd.name()))
-        if getattr(opt, 'help', None):
-            lineno = offsets.get(opt.help, default_lineno)
-            exporter.poentry(path, lineno, opt.help,
-                     'help of %r option of %r command' % (name, cmd.name()))
+        # String values in Command option lists are for global options
+        if not isinstance(opt, str):
+            _write_option(exporter, context, opt, note)
 
 
 def _write_command_help(exporter, cmd):
-    path = inspect.getfile(cmd.__class__)
-    if path.endswith('.pyc'):
-        path = path[:-1]
-    path = os.path.relpath(path)
-    src, lineno = inspect.findsource(cmd.__class__)
-    offsets = _offsets_of_literal(''.join(src))
-    lineno = offsets[cmd.__doc__]
-    doc = inspect.getdoc(cmd)
+    context = exporter.get_context(cmd.__class__)
+    rawdoc = cmd.__doc__
+    dcontext = context.from_string(rawdoc)
+    doc = inspect.cleandoc(rawdoc)
 
     def exclude_usage(p):
         # ':Usage:' has special meaning in help topics.
@@ -180,8 +216,9 @@
         if p.splitlines()[0] != ':Usage:':
             return True
 
-    exporter.poentry_per_paragraph(path, lineno, doc, exclude_usage)
-    _command_options(exporter, path, cmd)
+    exporter.poentry_per_paragraph(dcontext.path, dcontext.lineno, doc,
+        exclude_usage)
+    _command_options(exporter, context, cmd)
 
 
 def _command_helps(exporter, plugin_name=None):
@@ -226,11 +263,7 @@
 
 def _error_messages(exporter):
     """Extract fmt string from bzrlib.errors."""
-    path = errors.__file__
-    if path.endswith('.pyc'):
-        path = path[:-1]
-    offsets = _offsets_of_literal(open(path).read())
-
+    context = exporter.get_context(errors)
     base_klass = errors.BzrError
     for name in dir(errors):
         klass = getattr(errors, name)
@@ -245,8 +278,8 @@
         fmt = getattr(klass, "_fmt", None)
         if fmt:
             note(gettext("Exporting message from error: %s"), name)
-            exporter.poentry('bzrlib/errors.py',
-                     offsets.get(fmt, 9999), fmt)
+            exporter.poentry_in_context(context, fmt)
+
 
 def _help_topics(exporter):
     topic_registry = help_topics.topic_registry
@@ -265,6 +298,7 @@
             exporter.poentry('dummy/help_topics/'+key+'/summary.txt',
                      1, summary)
 
+
 def export_pot(outf, plugin=None):
     exporter = _PotExporter(outf)
     if plugin is None:

=== modified file 'bzrlib/tests/test_export_pot.py'
--- a/bzrlib/tests/test_export_pot.py	2011-11-21 13:00:48 +0000
+++ b/bzrlib/tests/test_export_pot.py	2011-11-22 18:48:16 +0000
@@ -67,6 +67,160 @@
         self.assertEqual(export_pot._normalize(s), e)
 
 
+class TestParseSource(tests.TestCase):
+    """Check mappings to line numbers generated from python source"""
+
+    def test_classes(self):
+        src = '''
+class Ancient:
+    """Old style class"""
+
+class Modern(object):
+    """New style class"""
+'''
+        cls_lines, _ = export_pot._parse_source(src)
+        self.assertEqual(cls_lines,
+            {"Ancient": 2, "Modern": 5})
+
+    def test_classes_nested(self):
+        src = '''
+class Matroska(object):
+    class Smaller(object):
+        class Smallest(object):
+            pass
+'''
+        cls_lines, _ = export_pot._parse_source(src)
+        self.assertEqual(cls_lines,
+            {"Matroska": 2, "Smaller": 3, "Smallest":4})
+
+    def test_strings_docstrings(self):
+        src = '''\
+"""Module"""
+
+def function():
+    """Function"""
+
+class Class(object):
+    """Class"""
+
+    def method(self):
+        """Method"""
+'''
+        _, str_lines = export_pot._parse_source(src)
+        self.assertEqual(str_lines,
+            {"Module": 1, "Function": 4, "Class": 7, "Method": 10})
+
+    def test_strings_literals(self):
+        src = '''\
+s = "One"
+t = (2, "Two")
+f = dict(key="Three")
+'''
+        _, str_lines = export_pot._parse_source(src)
+        self.assertEqual(str_lines,
+            {"One": 1, "Two": 2, "Three": 3})
+
+    def test_strings_multiline(self):
+        src = '''\
+"""Start
+
+End
+"""
+t = (
+    "A"
+    "B"
+    "C"
+    )
+'''
+        _, str_lines = export_pot._parse_source(src)
+        self.assertEqual(str_lines,
+            {"Start\n\nEnd\n": 1, "ABC": 6})
+
+    def test_strings_multiline_escapes(self):
+        src = '''\
+s = "Escaped\\n"
+r = r"Raw\\n"
+t = (
+    "A\\n\\n"
+    "B\\n\\n"
+    "C\\n\\n"
+    )
+'''
+        _, str_lines = export_pot._parse_source(src)
+        self.expectFailure("Escaped newlines confuses the multiline handling",
+            self.assertNotEqual, str_lines,
+            {"Escaped\n": 0, "Raw\\n": 2, "A\n\nB\n\nC\n\n": -2})
+        self.assertEqual(str_lines,
+            {"Escaped\n": 1, "Raw\\n": 2, "A\n\nB\n\nC\n\n": 4})
+
+
+class TestModuleContext(tests.TestCase):
+    """Checks for source context tracking objects"""
+
+    def check_context(self, context, path, lineno):
+        self.assertEquals((context.path, context.lineno), (path, lineno))
+
+    def test___init__(self):
+        context = export_pot._ModuleContext("one.py")
+        self.check_context(context, "one.py", 1)
+        context = export_pot._ModuleContext("two.py", 5)
+        self.check_context(context, "two.py", 5)
+
+    def test_from_class(self):
+        """New context returned with lineno updated from class"""
+        path = "cls.py"
+        class A(object): pass
+        class B(object): pass
+        cls_lines = {"A": 5, "B": 7}
+        context = export_pot._ModuleContext(path, _source_info=(cls_lines, {}))
+        contextA = context.from_class(A)
+        self.check_context(contextA, path, 5)
+        contextB1 = context.from_class(B)
+        self.check_context(contextB1, path, 7)
+        contextB2 = contextA.from_class(B)
+        self.check_context(contextB2, path, 7)
+        self.check_context(context, path, 1)
+        self.assertEquals("", self.get_log())
+
+    def test_from_class_missing(self):
+        """When class has no lineno the old context details are returned"""
+        path = "cls_missing.py"
+        class A(object): pass
+        class M(object): pass
+        context = export_pot._ModuleContext(path, 3, ({"A": 15}, {}))
+        contextA = context.from_class(A)
+        contextM1 = context.from_class(M)
+        self.check_context(contextM1, path, 3)
+        contextM2 = contextA.from_class(M)
+        self.check_context(contextM2, path, 15)
+        self.assertContainsRe(self.get_log(), "Definition of <.*M'> not found")
+
+    def test_from_string(self):
+        """New context returned with lineno updated from string"""
+        path = "str.py"
+        str_lines = {"one": 14, "two": 42}
+        context = export_pot._ModuleContext(path, _source_info=({}, str_lines))
+        context1 = context.from_string("one")
+        self.check_context(context1, path, 14)
+        context2A = context.from_string("two")
+        self.check_context(context2A, path, 42)
+        context2B = context1.from_string("two")
+        self.check_context(context2B, path, 42)
+        self.check_context(context, path, 1)
+        self.assertEquals("", self.get_log())
+
+    def test_from_string_missing(self):
+        """When string has no lineno the old context details are returned"""
+        path = "str_missing.py"
+        context = export_pot._ModuleContext(path, 4, ({}, {"line\n": 21}))
+        context1 = context.from_string("line\n")
+        context2A = context.from_string("not there")
+        self.check_context(context2A, path, 4)
+        context2B = context1.from_string("not there")
+        self.check_context(context2B, path, 21)
+        self.assertContainsRe(self.get_log(), "String 'not there' not found")
+
+
 class PoEntryTestCase(tests.TestCase):
 
     def setUp(self):
@@ -79,6 +233,7 @@
                 textwrap.dedent(expected)
                 )
 
+
 class TestPoEntry(PoEntryTestCase):
 
     def test_simple(self):