| 1 | # -*- coding: utf-8 -*- |
---|
| 2 | # |
---|
| 3 | # Copyright (C) 1998 Dinu C. Gherman <gherman@europemail.com> |
---|
| 4 | # Copyright (C) 2005-2007 Christopher Lenz <cmlenz@gmx.de> |
---|
| 5 | # Copyright (C) 2007-2010 Edgewall Software |
---|
| 6 | # |
---|
| 7 | # This software is licensed as described in the file COPYING, which |
---|
| 8 | # you should have received as part of this distribution. The terms |
---|
| 9 | # are also available at http://bitten.edgewall.org/wiki/License. |
---|
| 10 | # |
---|
| 11 | # This module is based on the pycount.py script written by Dinu C. |
---|
| 12 | # Gherman, and is used here under the following license: |
---|
| 13 | # |
---|
| 14 | # Permission to use, copy, modify, and distribute this software |
---|
| 15 | # and its documentation without fee and for any purpose, except |
---|
| 16 | # direct commerial advantage, is hereby granted, provided that |
---|
| 17 | # the above copyright notice appear in all copies and that both |
---|
| 18 | # that copyright notice and this permission notice appear in |
---|
| 19 | # supporting documentation. |
---|
| 20 | |
---|
1 | 21 | """Support for counting the lines of code in Python programs.""" |
---|
| 22 | |
---|
1 | 23 | import re |
---|
| 24 | |
---|
1 | 25 | __all__ = ['BLANK', 'CODE', 'COMMENT', 'DOC', 'count'] |
---|
1 | 26 | __docformat__ = 'restructuredtext en' |
---|
| 27 | |
---|
| 28 | # Reg. exps. to find the end of a triple quote, given that |
---|
| 29 | # we know we're in one; use the "match" method; .span()[1] |
---|
| 30 | # will be the index of the character following the final |
---|
| 31 | # quote. |
---|
1 | 32 | _squote3_finder = re.compile( |
---|
1 | 33 | r"([^\']|" |
---|
1 | 34 | r"\.|" |
---|
1 | 35 | r"'[^\']|" |
---|
1 | 36 | r"'\.|" |
---|
1 | 37 | r"''[^\']|" |
---|
1 | 38 | r"''\.)*'''") |
---|
| 39 | |
---|
1 | 40 | _dquote3_finder = re.compile( |
---|
1 | 41 | r'([^\"]|' |
---|
1 | 42 | r'\.|' |
---|
1 | 43 | r'"[^\"]|' |
---|
1 | 44 | r'"\.|' |
---|
1 | 45 | r'""[^\"]|' |
---|
1 | 46 | r'""\.)*"""') |
---|
| 47 | |
---|
| 48 | # Reg. exps. to find the leftmost one-quoted string; use the |
---|
| 49 | # "search" method; .span()[0] bounds the string found. |
---|
1 | 50 | _dquote1_finder = re.compile(r'"([^"]|\.)*"') |
---|
1 | 51 | _squote1_finder = re.compile(r"'([^']|\.)*'") |
---|
| 52 | |
---|
| 53 | # _is_comment matches pure comment line. |
---|
1 | 54 | _is_comment = re.compile(r"^[ \t]*#").match |
---|
| 55 | |
---|
| 56 | # _is_blank matches empty line. |
---|
1 | 57 | _is_blank = re.compile(r"^[ \t]*$").match |
---|
| 58 | |
---|
| 59 | # find leftmost splat or quote. |
---|
1 | 60 | _has_nightmare = re.compile(r"""[\"'#]""").search |
---|
| 61 | |
---|
| 62 | # _is_doc_candidate matches lines that start with a triple quote. |
---|
1 | 63 | _is_doc_candidate = re.compile(r"^[ \t]*('''|\"\"\")") |
---|
| 64 | |
---|
1 | 65 | BLANK, CODE, COMMENT, DOC = 0, 1, 2, 3 |
---|
| 66 | |
---|
1 | 67 | def count(source): |
---|
| 68 | """Parse the given file-like object as Python source code. |
---|
| 69 | |
---|
| 70 | For every line in the code, this function yields a ``(lineno, type, line)`` |
---|
| 71 | tuple, where ``lineno`` is the line number (starting at 0), ``type`` is |
---|
| 72 | one of `BLANK`, `CODE`, `COMMENT` or `DOC`, and ``line`` is the actual |
---|
| 73 | content of the line. |
---|
| 74 | |
---|
| 75 | :param source: a file-like object containing Python code |
---|
| 76 | """ |
---|
| 77 | |
---|
2 | 78 | quote3_finder = {'"': _dquote3_finder, "'": _squote3_finder} |
---|
2 | 79 | quote1_finder = {'"': _dquote1_finder, "'": _squote1_finder } |
---|
| 80 | |
---|
2 | 81 | in_doc = False |
---|
2 | 82 | in_triple_quote = None |
---|
| 83 | |
---|
2 | 84 | for lineno, line in enumerate(source): |
---|
0 | 85 | classified = False |
---|
| 86 | |
---|
0 | 87 | if in_triple_quote: |
---|
0 | 88 | if in_doc: |
---|
0 | 89 | yield lineno, DOC, line |
---|
0 | 90 | else: |
---|
0 | 91 | yield lineno, CODE, line |
---|
0 | 92 | classified = True |
---|
0 | 93 | m = in_triple_quote.match(line) |
---|
0 | 94 | if m == None: |
---|
0 | 95 | continue |
---|
| 96 | # Get rid of everything through the end of the triple. |
---|
0 | 97 | end = m.span()[1] |
---|
0 | 98 | line = line[end:] |
---|
0 | 99 | in_doc = in_triple_quote = False |
---|
| 100 | |
---|
0 | 101 | if _is_blank(line): |
---|
0 | 102 | if not classified: |
---|
0 | 103 | yield lineno, BLANK, line |
---|
0 | 104 | continue |
---|
| 105 | |
---|
0 | 106 | if _is_comment(line): |
---|
0 | 107 | if not classified: |
---|
0 | 108 | yield lineno, COMMENT, line |
---|
0 | 109 | continue |
---|
| 110 | |
---|
| 111 | # Now we have a code line, a doc start line, or crap left |
---|
| 112 | # over following the close of a multi-line triple quote; in |
---|
| 113 | # (& only in) the last case, classified==1. |
---|
0 | 114 | if not classified: |
---|
0 | 115 | if _is_doc_candidate.match(line): |
---|
0 | 116 | yield lineno, DOC, line |
---|
0 | 117 | in_doc = True |
---|
0 | 118 | else: |
---|
0 | 119 | yield lineno, CODE, line |
---|
| 120 | |
---|
| 121 | # The only reason to continue parsing is to make sure the |
---|
| 122 | # start of a multi-line triple quote isn't missed. |
---|
0 | 123 | while True: |
---|
0 | 124 | m = _has_nightmare(line) |
---|
0 | 125 | if not m: |
---|
0 | 126 | break |
---|
0 | 127 | else: |
---|
0 | 128 | i = m.span()[0] |
---|
| 129 | |
---|
0 | 130 | ch = line[i] # splat or quote |
---|
0 | 131 | if ch == '#': |
---|
| 132 | # Chop off comment; and there are no quotes |
---|
| 133 | # remaining because splat was leftmost. |
---|
0 | 134 | break |
---|
| 135 | # A quote is leftmost. |
---|
0 | 136 | elif ch * 3 == line[i:i + 3]: |
---|
| 137 | # at the start of a triple quote |
---|
0 | 138 | in_triple_quote = quote3_finder[ch] |
---|
0 | 139 | m = in_triple_quote.match(line, i + 3) |
---|
0 | 140 | if m: |
---|
| 141 | # Remove the string & continue. |
---|
0 | 142 | end = m.span()[1] |
---|
0 | 143 | line = line[:i] + line[end:] |
---|
0 | 144 | in_doc = in_triple_quote = False |
---|
0 | 145 | else: |
---|
| 146 | # Triple quote doesn't end on this line. |
---|
0 | 147 | break |
---|
0 | 148 | else: |
---|
| 149 | # At a single quote; remove the string & continue. |
---|
0 | 150 | prev_line = line[:] |
---|
0 | 151 | line = re.sub(quote1_finder[ch], ' ', line, 1) |
---|
| 152 | # No more change detected, so be quiet or give up. |
---|
0 | 153 | if prev_line == line: |
---|
| 154 | # Let's be quiet and hope only one line is affected. |
---|
0 | 155 | line = '' |
---|