#!/usr/bin/env python

# This file is Copyright 2003, 2006, 2007, 2009, 2010 Dean Hall.
#
# This file is part of the Python-on-a-Chip program.
# Python-on-a-Chip is free software: you can redistribute it and/or modify
# it under the terms of the GNU LESSER GENERAL PUBLIC LICENSE Version 2.1.
# 
# Python-on-a-Chip is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# A copy of the GNU LESSER GENERAL PUBLIC LICENSE Version 2.1
# is seen in the file COPYING up one directory from this.

"""
PyCscope

PyCscope creates a Cscope-like index file for a tree of Python source.
"""

## @file
#  @copybrief pycscope

## @package pycscope
#  @brief PyCscope creates a Cscope-like index file for a tree of Python source.
#
# 2007/12/25:
#   Improvements contributed by K. Rader of Google:
#   - Added the `-i` argument to specify a file-list file
#   - Fixups to the header and footer to make a valid file that cscope can read
#


__author__ = "Dean Hall"
__copyright__ = "Copyright 2003, 2006, 2007, 2009, 2010 Dean Hall.  See LICENSE for details."
__date__ = "2007/12/25"
__version__ = "0.3"
__usage__ = """Usage: pycscope.py [-R] [-f reffile] [-i srclistfile] [files ...]

-R              Recurse directories for files.
-f reffile      Use reffile as cross-ref file name instead of cscope.out.
-i srclistfile  Use a file that contains a list of source files to scan."""


import getopt, sys, os, os.path, string, types
import keyword, parser, symbol, token

# Marks as defined by Cscope
MARK_FILE = "\t@"
MARK_FUNC_DEF = "\t$"
MARK_FUNC_CALL = "\t`"
MARK_FUNC_END = "\t}"
MARK_INCLUDE = "\t~<"   # TODO: assume all includes are global for now
MARK_ASGN = "\t="
MARK_CLASS = "\tc"
MARK_GLOBAL = "\tg"
MARK_FUNC_PARM = "\tp"

# Reverse the key,value pairs in the token dict
tok_name_lookup = dict((v,k) for k,v in token.tok_name.iteritems())
TOK_NEWLINE = tok_name_lookup["NEWLINE"]
TOK_NAME = tok_name_lookup["NAME"]
TOK_LPAR = tok_name_lookup["LPAR"]
TOK_ENDMARKER = tok_name_lookup["ENDMARKER"]
TOK_INDENT = tok_name_lookup["INDENT"]
TOK_DEDENT = tok_name_lookup["DEDENT"]

# Reverse the key,value pairs in the symbol dict
sym_name_lookup = dict((v,k) for k,v in symbol.sym_name.iteritems())
SYM_TRAILER = sym_name_lookup["trailer"]
SYM_VARARGSLIST = sym_name_lookup["varargslist"]

# Get the list of Python keywords and add a few common builtins
kwlist = keyword.kwlist
kwlist.extend(("True", "False", "None", "object"))

# Globals for the recursive walkAst function
latestnewline = 1
latestsymbol = ""
latesttoken = ""
prevtoken = ""
mark = ""
infuncdef = False
indentcount = 0


def main():
    """Parse command line args and act accordingly.
    """
    # Parse the command line arguments
    try:
        opts, args = getopt.getopt(sys.argv[1:], "Rf:i:")
    except getopt.GetoptError:
        print __usage__
        sys.exit(2)
    recurse = False
    indexfn = "cscope.out"
    for o, a in opts:
        if o == "-R":
            recurse = True
        if o == "-f":
            indexfn = a
        if o == "-i":
            args.extend(map(string.rstrip, open(a, 'r').readlines()))

    # Create the buffer to store the output (list of strings)
    indexbuff = []
    fnamesbuff = []

    # Search current dir by default
    if len(args) == 0:
        args = "."

    # Parse the given list of files/dirs
    basepath = os.getcwd()
    for name in args:
        if os.path.isdir(os.path.join(basepath, name)):
            parseDir(basepath, name, indexbuff, recurse, fnamesbuff)
        else:
            try:
                parseFile(basepath, name, indexbuff, fnamesbuff)
            except SyntaxError:
                pass

    # Symbol data for the last file ends with a file mark
    indexbuff.append("\n" + MARK_FILE)
    writeIndex(basepath, indexfn, indexbuff, fnamesbuff)


def parseDir(basepath, relpath, indexbuff, recurse, fnamesbuff):
    """Parses all files in the directory and
    recurses into subdirectories if requested.
    """
    dirpath = os.path.join(basepath, relpath)
    for name in os.listdir(dirpath):
        fullpath = os.path.join(dirpath, name)
        if os.path.isdir(fullpath) and recurse:
            parseDir(basepath, os.path.join(relpath, name), indexbuff, recurse,
                     fnamesbuff)
        else:
            try:
                parseFile(basepath, os.path.join(relpath, name), indexbuff,
                          fnamesbuff)
            except SyntaxError:
                pass


def parseFile(basepath, relpath, indexbuff, fnamesbuff):
    """Parses a source file and puts the resulting index into the buffer.
    """
    # Don't parse if it's not python source
    if relpath[-3:] != ".py":
        return

    # Open the file and get the contents
    fullpath = os.path.join(basepath, relpath)
    f = open(fullpath, 'r')
    filecontents = f.read()
    f.close()

    # Add the file mark to the index
    fnamesbuff.append(relpath)
    indexbuff.append("\n%s%s" % (MARK_FILE, relpath))
    global latestnewline
    latestnewline = len(indexbuff)

    # Add path info to any syntax errors in the source files
    try:
        parseSource(filecontents, indexbuff)
    except SyntaxError, se:
        se.filename = fullpath
        raise se


def parseSource(sourcecode, indexbuff):
    """Parses python source code and puts the resulting index into the buffer.
    """
    # Parse the source to an Abstract Syntax Tree
    ast = parser.suite(sourcecode)
    astlist = parser.ast2list(ast, True)

    # Set these globals before each file's AST is walked
    global sourcelinehassymbol
    sourcelinehassymbol = False
    global currentlinenum
    currentlinenum = 0

    # Walk the AST to index the rest of the file
    walkAst(astlist, indexbuff)


def walkAst(astlist, indexbuff):
    """Scan the AST for tokens, write out index lines.
    """
    global latestnewline
    global latestsymbol
    global latesttoken
    global prevtoken
    global mark
    global sourcelinehassymbol
    global infuncdef
    global indentcount
    global currentlinenum

    # Remember the latest symbol
    if astlist[0] > 256:
        latestsymbol = astlist[0]

    # Handle the tokens
    else:
        # Save the previous token and get the latest one
        prevtoken = latesttoken
        latesttoken = astlist[0]

        # If this code is on a new line number
        if astlist[2] != currentlinenum:
            currentlinenum = astlist[2]

            # If there was a symbol of interest,
            # remember this location in the index
            if sourcelinehassymbol:
                latestnewline = len(indexbuff)
                sourcelinehassymbol = False

            # If there was no symbol of interest between this and the previous
            # newline, remove all entries added since the previous newline
            else:
                del indexbuff[latestnewline:]

            # Write the new line number
            indexbuff.append("\n\n%d " % astlist[2])

            # Clear an include mark when a newline token is reached
            # This is what ends a comma-separated list of modules after import
            if mark == MARK_INCLUDE:
                mark = ""

        if latesttoken == TOK_NAME:
            # If a name is not a python keyword, it is a symbol of interest
            if astlist[1] not in kwlist:

                # Remember that there is a symbol of interest
                sourcelinehassymbol = True

                # Write the mark and the symbol
                indexbuff.append("\n%s%s\n" % (mark, astlist[1]))

                # Clear the mark unless it's an include mark
                # This is what allows a comma-separated list of modules after import
                if mark != MARK_INCLUDE:
                    mark = ""

            # If the name is a python keyword
            else:
                # Some keywords determine what mark should prefix the next name
                kw = astlist[1]
                if kw == "def":
                    mark = MARK_FUNC_DEF

                    # Remember that we're in a function definition
                    infuncdef = True
                    indentcount = 0
                elif kw == "import":
                    mark = MARK_INCLUDE
                elif kw == "class":
                    mark = MARK_CLASS

                # Write out the keyword
                indexbuff.append("%s " % kw)

        # This set of tokens and symbols indicates a function call (not perfect)
        elif (latesttoken == TOK_LPAR) and (prevtoken == TOK_NAME) and (
            (latestsymbol == SYM_TRAILER) or (latestsymbol == SYM_VARARGSLIST)):

            # Insert a function-call mark before the previous name
            indexbuff[-1] = "\n%s%s( " % (MARK_FUNC_CALL, indexbuff[-1][1:])

        # Count the number of indents; to be used by dedent
        elif latesttoken == TOK_INDENT:
            if infuncdef:
                indentcount += 1

        # When dedent reaches the level of the function def,
        # write the function-end mark
        elif latesttoken == TOK_DEDENT:
            if infuncdef:
                indentcount -= 1
                if indentcount == 0:
                    indexbuff.insert(-1, "\n\n%d \n%s\n" % (astlist[2], MARK_FUNC_END))
                    latestnewline += 1
                    infuncdef = False

        # Replace the last line number placeholder with a newline
        # when at the end of a file
        elif latesttoken == TOK_ENDMARKER:
            if len(indexbuff) > 0:
                indexbuff[-1] = "\n"

        # For uninteresting tokens, just write the accompanying string
        else:
            if len(astlist[1]) > 0:
                nonsymboltext = astlist[1].replace("\n","\\n") + ' '
            else:
                nonsymboltext = ''
            indexbuff.append(nonsymboltext)

    # Recurse into all nodes
    for i in range(1, len(astlist)):
        if type(astlist[i]) == types.ListType:
            walkAst(astlist[i], indexbuff)


def writeIndex(basepath, indexfn, indexbuff, fnamesbuff):
    """Write the index buffer to the output file.
    """
    fout = open(os.path.join(basepath, indexfn), 'w')

    # Write the header and index
    index = ''.join(indexbuff)
    index_len = len(index)
    hdr_len = len(basepath) + 25
    fout.write("cscope 15 %s -c %010d" % (basepath, hdr_len + index_len))
    fout.write(index)

    # Write trailer info
    fnames = '\n'.join(fnamesbuff) + '\n'
    fout.write("\n1\n.\n0\n")
    fout.write("%d\n" % len(fnamesbuff))
    fout.write("%d\n" % len(fnames))
    fout.write(fnames)
    fout.close()


if __name__ == "__main__":
    main()