#!/usr/bin/env python

####
#   bbLaTeX.py 1.0
#   LaTeX to bbCode Translator
#   Copyright (c) Andrew J. Bennieston, 2006
#   Released under the BSD License
####

####
#   This code is rather messy at the moment, functions defined
#   just before they're used, a class defined somewhere about
#   a third of the way down the file, and nothing tied together
#   or made easy to extend. Over time I plan on rewriting chunks
#   of this into classes and functions which should tidy the code
#   up a little. For now, though, it does what I need it to!
####

####
#   Procedure:
#   (0) Replace \input with file contents
#   (1) Enumerate sections and subsections
#   (2) Generate TOC and section numbering map
#   (3) Replace LaTeX with bbCode
####

import sys, string, re

# usage() prints information about how to use bbLaTeX
def usage():
    s = "\nUsage: " + sys.argv[0] + " [input-file]\n\n"
    s += "Convert LaTeX input to bbCode output.\n"
    s += "With no input-file, or when input-file is -, read\n"
    s += "standard input. Writes to {inputfile%.tex}.bbc,\n"
    s += "or to standard output when reading from\n"
    s += "standard input.\n"
    print s

# Perform some startup checks, open the LaTeX file
inputfile = None
filename = ""
if len(sys.argv) == 2:
    # Filename from argv[1] (if '-' use stdin)
    if sys.argv[1] == '-':
        inputfile = sys.stdin
        filename = '-'
    else:
        inputfile = open(sys.argv[1], 'r')
        filename = sys.argv[1]
elif len(sys.argv) == 1:
    inputfile = sys.stdin
    filename = '-'
else:
    # Undefined, print usage and exit
    usage()
    sys.exit(1)
# Read the input into a string
latex = inputfile.read()
# Close the input file
inputfile.close()

# filecontents(matchobj) returns the contents of the file
def filecontents(matchobj):
    s = ''
    try:
        f = open(matchobj.group(1), 'r')
        s = f.read()
        f.close()
    except:
        sys.stderr.write('Error parsing ' + matchobj.group(0) + '\n')
    return s

# Process the file, including any \input files
inputre = re.compile(r'\\input\{(.+?)\}', re.I)
# Replace each occurrence of \input{foo} with the file contents
latex = inputre.sub(filecontents,latex)

# Manage section and subsection numbering, and generate TOC
class SectionManager(object):
    def __init__(self, sectpre, sectpost, subpre, subpost):
        self.sections = [ ]
        self.sectionnum = 0
        self.subsectionnum = 0
        self.sectpre = sectpre
        self.sectpost = sectpost
        self.subpre = subpre
        self.subpost = subpost
    def matched(self, matchobj):
        try:
            if re.search(r'\\section',matchobj.group(0)):
                # Found a new section
                self.sectionnum += 1 # Increment section number
                self.subsectionnum = 0 # Reset subsection number
                self.sections.append( (matchobj.group(1) , [ ] ) )
                sect = self.sectpre + str(self.sectionnum) + ' '
                sect += matchobj.group(1) + self.sectpost
                return sect
            elif re.search(r'\\subsection',matchobj.group(0)):
                # Found new subsection
                self.subsectionnum += 1 # Increment subsection number
                snum = str(self.sectionnum) + '.' + str(self.subsectionnum)
                self.sections[-1][1].append(matchobj.group(2))
                sub = self.subpre + snum + ' '
                sub += matchobj.group(2) + self.subpost
                return sub
            else:
                return ''
        except:
            return ''
    def TOC(self):
        toc = "[list=1]\n"
        for section in self.sections:
            sect = section[0]
            subs = section[1]
            toc += '[*]' + sect + "\n"
            if len(subs) != 0:
                toc += "[list=1]"
                for sub in subs:
                    toc += '[*]' + sub + "\n"
                toc += "[/list]"
        toc += "[/list]\n"
        return toc
        
# Create a SectionManager object using bbCode pre and post strings
smgr = SectionManager('[size=16]','[/size]','[size=14]','[/size]')
sectre = re.compile(r'(?:\\section\{(.+?)\})|(?:\\subsection\{(.+?)\})', re.I)
latex = sectre.sub(smgr.matched, latex)
# Substitute table of contents
latex = re.sub(r'\\tableofcontents',smgr.TOC(),latex)
       
# Replace URLs
latex = re.sub(r'\\url\{(.+?)\}',r'[url]\1[/url]',latex)

# Replace boldface, italic and monospaced
latex = re.sub(r'\\textit\{(.+?)\}',r'[i]\1[/i]',latex) # Italic
latex = re.sub(r'\\textbf\{(.+?)\}',r'[b]\1[/b]',latex) # Boldface
latex = re.sub(r'\\texttt\{(.+?)\}',r'[color=blue]\1[/color]',latex) # Monospace

# Replace \begin{quote} and \end{quote}
latex = re.sub(r'\\begin\{quote\}','[quote]',latex)
latex = re.sub(r'\\end\{quote\}','[/quote]',latex)

# Replace title
latex = re.sub(r'\\title\{(.+?)\}',r'[size=18]\1[/size]',latex)

# Replace author
latex = re.sub(r'\\author\{(.+?)\}',r'[size=14][b]\1[/b][/size]',latex)

# Replace date
latex = re.sub(r'\\date\{(.+?)\}',r'[size=14][b]\1[/b][/size]',latex)

# Replace \copyright
latex = re.sub(r'\\copyright','(c)',latex)

# Strip LaTeX that we don't care about
#   \documentclass
#   \usepackage
#   \begin{document}
#   \end{document
#   \maketitle
#   \newpage
#   \pagestyle{...}
#   \thispagestyle{...}
regexps = [ r'\\documentclass(?:\[(.+?)\])?\{(.+?)\}',
            r'\\usepackage\{(.+?)\}',
            r'\\begin\{document\}',
            r'\\end\{document\}',
            r'\\maketitle',
            r'\\newpage',
            r'\\pagestyle\{(.+?)\}',
            r'\\thispagestyle\{(.+?)\}' ]
for rx in regexps:
    latex = re.sub(rx, '', latex)

# Finally change escaped LaTeX special characters back to normal characters
latex = re.sub(r'\\([_$#%&~^{}><\\])', r'\1', latex)

# Remove leading and trailing whitespace
latex = string.strip(latex)
# Remove repeated newlines
latex = string.replace(latex, "\n\n", "\n")
# And again to catch some cases where odd numbers of newlines existed!
latex = string.replace(latex, "\n\n", "\n")

# Now replace verbatim and listing environments with [code] ... [/code] blocks
latex = re.sub(r'\\begin\{verbatim\}','[code]', latex)
latex = re.sub(r'\\end\{verbatim\}','[/code]', latex)
latex = re.sub(r'\\begin\{listing\}','[code]', latex)
latex = re.sub(r'\\end\{listing\}','[/code]', latex)
# Change \listinginput to [code] <contents of file> [/code]
def listingInput(matchobj):
    s = "[code]\n"
    s += filecontents(matchobj)
    s += "[/code]\n"
    return s
latex = re.sub(r'\\listinginput(?:\[(?:\d+?)\])?\{(?:\d+?)\}\{(.+?)\}',
               listingInput, latex)

# Output stage
outfile = None
if filename == '-':
    outfile = sys.stdout
else:
    outfile = open(re.sub('\.tex','.bbc',filename), 'w')
outfile.write(latex)
outfile.close()