#!/usr/bin/env python
"""\
wmlindent - re-indent WML in a uniform way.

By Eric S. Raymond, June 2007.

Call with no arguments to filter WML on stdin to reindented WML on
stdout.  If arguments are specified, they are taken to be files to be
re-indented in place; interrupting will be safe as each reindenting
will be done to a copy that is atomically renamed when it's done. This
code never modifies anything but leading whitespace on lines.

The indent unit is four spaces.  Absence of an option to change this is
deliberate; the purpose of this tool is to *prevent* style wars, not encourage
them.

Note: This does not include a parser.  It will produce bad results on WML
that is syntactically unbalanced.  Unbalanced double quotes that aren't part
of a multiline literal will also confuse it.  You will receive warnings
oiif there's an indent open at end of file or if a closer occurs with
indent already zero; these two conditions strongly suggest unbalanced WML.
"""

import sys, os, getopt

def is_directive(str):
    "Identify things that shouldn't be indented."
    for prefix in ("#ifdef", "#else", "#endif", "#define", "#enddef"):
        if str.startswith(prefix):
            return True
    return False

def reindent(name, infp, outfp):
    "Reindent WML."
    baseindent = "    "
    dostrip = True
    indent = ""
    for line in infp:
        # Strip each line, unless we're in something like a multiline string. 
        if dostrip:
            transformed = line.lstrip()
        else:
            transformed = line
        if transformed == "":
            transformed = "\n"
        # In the close case, we must compute new indent *before* emitting
        # the new line so the close tag will be at the same level as the
        # one that started the block.
        if transformed.startswith("[/"):
            if indent == "":
                print >>sys.stderr, "wmlindent: from %s, close tag with indent already zero." % name
            else:
                indent = indent[:-len(baseindent)]
        if dostrip and transformed and not is_directive(transformed):
            output = indent + transformed
        else:
            output = transformed
        outfp.write(output)
        # May need to indent based on the line we just saw.
        if transformed.startswith("[") and not transformed.startswith("[/"):
            indent += baseindent
        # Compute the dostrip state likewise.  This is the only tricky part.
        # We look for unbalanced string quotes,
        syntax = transformed.split("#")[0]
        if "=" in syntax and syntax.count('"') == 1:
            dostrip = True
        elif syntax.count('"') == 1:
            dostrip = False
    if indent != "":
        print >>sys.stderr, "wmlindent: from %s, end of file with indent nonzero." % name

def convertor(linefilter, filelist):
    "Apply a filter to command-line arguments."
    if not filelist:
        linefilter("standard input", sys.stdin, sys.stdout)
    else:
        try:
            for filename in filelist:
                infp = open(filename, "r")
                outfp = open(filename + ".out", "w")
                linefilter(filename, infp, outfp)
                infp.close()
                outfp.close()
        except KeyboardInterrupt:
            os.remove(filename + ".out")
        else:
            os.remove(filename)	# For Windows portability
            # There's a tiny window here.  It's unavoidable, because there's
            # no known way to do an atomic rename under Windows when the
            # taget exists -- see Python manual 14.1.4::rename()
            os.rename(filename + ".out", filename)

if __name__ == '__main__':
    (options, arguments) = getopt.getopt(sys.argv[1:], "h:")
    for (opt, val) in options:
        if opt == "-?":
            print __doc__
    convertor(lambda n, f1, f2: reindent(n, f1, f2), arguments)
