1 | # -*- coding: utf-8 -*- |
---|
2 | # |
---|
3 | # Copyright (C) 1998 Dinu C. Gherman <gherman@europemail.com> |
---|
4 | # Copyright (C) 2005-2007 Christopher Lenz <cmlenz@gmx.de> |
---|
5 | # Copyright (C) 2007-2010 Edgewall Software |
---|
6 | # |
---|
7 | # This software is licensed as described in the file COPYING, which |
---|
8 | # you should have received as part of this distribution. The terms |
---|
9 | # are also available at http://bitten.edgewall.org/wiki/License. |
---|
10 | # |
---|
11 | # This module is based on the pycount.py script written by Dinu C. |
---|
12 | # Gherman, and is used here under the following license: |
---|
13 | # |
---|
14 | # Permission to use, copy, modify, and distribute this software |
---|
15 | # and its documentation without fee and for any purpose, except |
---|
16 | # direct commerial advantage, is hereby granted, provided that |
---|
17 | # the above copyright notice appear in all copies and that both |
---|
18 | # that copyright notice and this permission notice appear in |
---|
19 | # supporting documentation. |
---|
20 | |
---|
21 | """Support for counting the lines of code in Python programs.""" |
---|
22 | |
---|
23 | import re |
---|
24 | |
---|
25 | __all__ = ['BLANK', 'CODE', 'COMMENT', 'DOC', 'count'] |
---|
26 | __docformat__ = 'restructuredtext en' |
---|
27 | |
---|
28 | # Reg. exps. to find the end of a triple quote, given that |
---|
29 | # we know we're in one; use the "match" method; .span()[1] |
---|
30 | # will be the index of the character following the final |
---|
31 | # quote. |
---|
32 | _squote3_finder = re.compile( |
---|
33 | r"([^\']|" |
---|
34 | r"\.|" |
---|
35 | r"'[^\']|" |
---|
36 | r"'\.|" |
---|
37 | r"''[^\']|" |
---|
38 | r"''\.)*'''") |
---|
39 | |
---|
40 | _dquote3_finder = re.compile( |
---|
41 | r'([^\"]|' |
---|
42 | r'\.|' |
---|
43 | r'"[^\"]|' |
---|
44 | r'"\.|' |
---|
45 | r'""[^\"]|' |
---|
46 | r'""\.)*"""') |
---|
47 | |
---|
48 | # Reg. exps. to find the leftmost one-quoted string; use the |
---|
49 | # "search" method; .span()[0] bounds the string found. |
---|
50 | _dquote1_finder = re.compile(r'"([^"]|\.)*"') |
---|
51 | _squote1_finder = re.compile(r"'([^']|\.)*'") |
---|
52 | |
---|
53 | # _is_comment matches pure comment line. |
---|
54 | _is_comment = re.compile(r"^[ \t]*#").match |
---|
55 | |
---|
56 | # _is_blank matches empty line. |
---|
57 | _is_blank = re.compile(r"^[ \t]*$").match |
---|
58 | |
---|
59 | # find leftmost splat or quote. |
---|
60 | _has_nightmare = re.compile(r"""[\"'#]""").search |
---|
61 | |
---|
62 | # _is_doc_candidate matches lines that start with a triple quote. |
---|
63 | _is_doc_candidate = re.compile(r"^[ \t]*('''|\"\"\")") |
---|
64 | |
---|
65 | BLANK, CODE, COMMENT, DOC = 0, 1, 2, 3 |
---|
66 | |
---|
67 | def count(source): |
---|
68 | """Parse the given file-like object as Python source code. |
---|
69 | |
---|
70 | For every line in the code, this function yields a ``(lineno, type, line)`` |
---|
71 | tuple, where ``lineno`` is the line number (starting at 0), ``type`` is |
---|
72 | one of `BLANK`, `CODE`, `COMMENT` or `DOC`, and ``line`` is the actual |
---|
73 | content of the line. |
---|
74 | |
---|
75 | :param source: a file-like object containing Python code |
---|
76 | """ |
---|
77 | |
---|
78 | quote3_finder = {'"': _dquote3_finder, "'": _squote3_finder} |
---|
79 | quote1_finder = {'"': _dquote1_finder, "'": _squote1_finder } |
---|
80 | |
---|
81 | in_doc = False |
---|
82 | in_triple_quote = None |
---|
83 | |
---|
84 | for lineno, line in enumerate(source): |
---|
85 | classified = False |
---|
86 | |
---|
87 | if in_triple_quote: |
---|
88 | if in_doc: |
---|
89 | yield lineno, DOC, line |
---|
90 | else: |
---|
91 | yield lineno, CODE, line |
---|
92 | classified = True |
---|
93 | m = in_triple_quote.match(line) |
---|
94 | if m == None: |
---|
95 | continue |
---|
96 | # Get rid of everything through the end of the triple. |
---|
97 | end = m.span()[1] |
---|
98 | line = line[end:] |
---|
99 | in_doc = in_triple_quote = False |
---|
100 | |
---|
101 | if _is_blank(line): |
---|
102 | if not classified: |
---|
103 | yield lineno, BLANK, line |
---|
104 | continue |
---|
105 | |
---|
106 | if _is_comment(line): |
---|
107 | if not classified: |
---|
108 | yield lineno, COMMENT, line |
---|
109 | continue |
---|
110 | |
---|
111 | # Now we have a code line, a doc start line, or crap left |
---|
112 | # over following the close of a multi-line triple quote; in |
---|
113 | # (& only in) the last case, classified==1. |
---|
114 | if not classified: |
---|
115 | if _is_doc_candidate.match(line): |
---|
116 | yield lineno, DOC, line |
---|
117 | in_doc = True |
---|
118 | else: |
---|
119 | yield lineno, CODE, line |
---|
120 | |
---|
121 | # The only reason to continue parsing is to make sure the |
---|
122 | # start of a multi-line triple quote isn't missed. |
---|
123 | while True: |
---|
124 | m = _has_nightmare(line) |
---|
125 | if not m: |
---|
126 | break |
---|
127 | else: |
---|
128 | i = m.span()[0] |
---|
129 | |
---|
130 | ch = line[i] # splat or quote |
---|
131 | if ch == '#': |
---|
132 | # Chop off comment; and there are no quotes |
---|
133 | # remaining because splat was leftmost. |
---|
134 | break |
---|
135 | # A quote is leftmost. |
---|
136 | elif ch * 3 == line[i:i + 3]: |
---|
137 | # at the start of a triple quote |
---|
138 | in_triple_quote = quote3_finder[ch] |
---|
139 | m = in_triple_quote.match(line, i + 3) |
---|
140 | if m: |
---|
141 | # Remove the string & continue. |
---|
142 | end = m.span()[1] |
---|
143 | line = line[:i] + line[end:] |
---|
144 | in_doc = in_triple_quote = False |
---|
145 | else: |
---|
146 | # Triple quote doesn't end on this line. |
---|
147 | break |
---|
148 | else: |
---|
149 | # At a single quote; remove the string & continue. |
---|
150 | prev_line = line[:] |
---|
151 | line = re.sub(quote1_finder[ch], ' ', line, 1) |
---|
152 | # No more change detected, so be quiet or give up. |
---|
153 | if prev_line == line: |
---|
154 | # Let's be quiet and hope only one line is affected. |
---|
155 | line = '' |
---|