Edgewall Software

source: trunk/bitten/util/loc.py @ 1001

Last change on this file since 1001 was 910, checked in by osimons, 13 years ago

Updated copyright to 2010.

  • Property svn:eol-style set to native
File size: 5.2 KB
CovLine 
1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 1998 Dinu C. Gherman <gherman@europemail.com>
4# Copyright (C) 2005-2007 Christopher Lenz <cmlenz@gmx.de>
5# Copyright (C) 2007-2010 Edgewall Software
6#
7# This software is licensed as described in the file COPYING, which
8# you should have received as part of this distribution. The terms
9# are also available at http://bitten.edgewall.org/wiki/License.
10#
11# This module is based on the pycount.py script written by Dinu C.
12# Gherman, and is used here under the following license:
13#
14#     Permission to use, copy, modify, and distribute this software
15#     and its documentation without fee and for any purpose, except
16#     direct commerial advantage, is hereby granted, provided that
17#     the above copyright notice appear in all copies and that both
18#     that copyright notice and this  permission notice appear in
19#     supporting documentation.
20
121"""Support for counting the lines of code in Python programs."""
22
123import re
24
125__all__ = ['BLANK', 'CODE', 'COMMENT', 'DOC', 'count']
126__docformat__ = 'restructuredtext en'
27
28# Reg. exps. to find the end of a triple quote, given that
29# we know we're in one; use the "match" method; .span()[1]
30# will be the index of the character following the final
31# quote.
132_squote3_finder = re.compile(
133    r"([^\']|"
134    r"\.|"
135    r"'[^\']|"
136    r"'\.|"
137    r"''[^\']|"
138    r"''\.)*'''")
39
140_dquote3_finder = re.compile(
141    r'([^\"]|'
142    r'\.|'
143    r'"[^\"]|'
144    r'"\.|'
145    r'""[^\"]|'
146    r'""\.)*"""')
47
48# Reg. exps. to find the leftmost one-quoted string; use the
49# "search" method; .span()[0] bounds the string found.
150_dquote1_finder = re.compile(r'"([^"]|\.)*"')
151_squote1_finder = re.compile(r"'([^']|\.)*'")
52
53# _is_comment matches pure comment line.
154_is_comment = re.compile(r"^[ \t]*#").match
55
56# _is_blank matches empty line.
157_is_blank = re.compile(r"^[ \t]*$").match
58
59# find leftmost splat or quote.
160_has_nightmare = re.compile(r"""[\"'#]""").search
61
62# _is_doc_candidate matches lines that start with a triple quote.
163_is_doc_candidate = re.compile(r"^[ \t]*('''|\"\"\")")
64
165BLANK, CODE, COMMENT, DOC  = 0, 1, 2, 3
66
167def count(source):
68    """Parse the given file-like object as Python source code.
69   
70    For every line in the code, this function yields a ``(lineno, type, line)``
71    tuple, where ``lineno`` is the line number (starting at 0), ``type`` is
72    one of `BLANK`, `CODE`, `COMMENT` or `DOC`, and ``line`` is the actual
73    content of the line.
74   
75    :param source: a file-like object containing Python code
76    """
77
278    quote3_finder = {'"': _dquote3_finder, "'": _squote3_finder}
279    quote1_finder = {'"': _dquote1_finder, "'": _squote1_finder }
80
281    in_doc = False
282    in_triple_quote = None
83
284    for lineno, line in enumerate(source):
085        classified = False
86
087        if in_triple_quote:
088            if in_doc:
089                yield lineno, DOC, line
090            else:
091                yield lineno, CODE, line
092            classified = True
093            m = in_triple_quote.match(line)
094            if m == None:
095                continue
96            # Get rid of everything through the end of the triple.
097            end = m.span()[1]
098            line = line[end:]
099            in_doc = in_triple_quote = False
100
0101        if _is_blank(line):
0102            if not classified:
0103                yield lineno, BLANK, line
0104            continue
105
0106        if _is_comment(line):
0107            if not classified:
0108                yield lineno, COMMENT, line
0109            continue
110
111        # Now we have a code line, a doc start line, or crap left
112        # over following the close of a multi-line triple quote; in
113        # (& only in) the last case, classified==1.
0114        if not classified:
0115            if _is_doc_candidate.match(line):
0116                yield lineno, DOC, line
0117                in_doc = True
0118            else:
0119                yield lineno, CODE, line
120
121        # The only reason to continue parsing is to make sure the
122        # start of a multi-line triple quote isn't missed.
0123        while True:
0124            m = _has_nightmare(line)
0125            if not m:
0126                break
0127            else:
0128                i = m.span()[0]
129
0130            ch = line[i]    # splat or quote
0131            if ch == '#':
132                # Chop off comment; and there are no quotes
133                # remaining because splat was leftmost.
0134                break
135            # A quote is leftmost.
0136            elif ch * 3 == line[i:i + 3]:
137                # at the start of a triple quote
0138                in_triple_quote = quote3_finder[ch]
0139                m = in_triple_quote.match(line, i + 3)
0140                if m:
141                    # Remove the string & continue.
0142                    end = m.span()[1]
0143                    line = line[:i] + line[end:]
0144                    in_doc = in_triple_quote = False
0145                else:
146                    # Triple quote doesn't end on this line.
0147                    break
0148            else:
149                # At a single quote; remove the string & continue.
0150                prev_line = line[:]
0151                line = re.sub(quote1_finder[ch], ' ', line, 1)
152                # No more change detected, so be quiet or give up.
0153                if prev_line == line:
154                    # Let's be quiet and hope only one line is affected.
0155                    line = ''
Note: See TracBrowser for help on using the repository browser.