#!/usr/bin/env python #### # bbLaTeX.py 1.0 # LaTeX to bbCode Translator # Copyright (c) Andrew J. Bennieston, 2006 # Released under the BSD License #### #### # This code is rather messy at the moment, functions defined # just before they're used, a class defined somewhere about # a third of the way down the file, and nothing tied together # or made easy to extend. Over time I plan on rewriting chunks # of this into classes and functions which should tidy the code # up a little. For now, though, it does what I need it to! #### #### # Procedure: # (0) Replace \input with file contents # (1) Enumerate sections and subsections # (2) Generate TOC and section numbering map # (3) Replace LaTeX with bbCode #### import sys, string, re # usage() prints information about how to use bbLaTeX def usage(): s = "\nUsage: " + sys.argv[0] + " [input-file]\n\n" s += "Convert LaTeX input to bbCode output.\n" s += "With no input-file, or when input-file is -, read\n" s += "standard input. Writes to {inputfile%.tex}.bbc,\n" s += "or to standard output when reading from\n" s += "standard input.\n" print s # Perform some startup checks, open the LaTeX file inputfile = None filename = "" if len(sys.argv) == 2: # Filename from argv[1] (if '-' use stdin) if sys.argv[1] == '-': inputfile = sys.stdin filename = '-' else: inputfile = open(sys.argv[1], 'r') filename = sys.argv[1] elif len(sys.argv) == 1: inputfile = sys.stdin filename = '-' else: # Undefined, print usage and exit usage() sys.exit(1) # Read the input into a string latex = inputfile.read() # Close the input file inputfile.close() # filecontents(matchobj) returns the contents of the file def filecontents(matchobj): s = '' try: f = open(matchobj.group(1), 'r') s = f.read() f.close() except: sys.stderr.write('Error parsing ' + matchobj.group(0) + '\n') return s # Process the file, including any \input files inputre = re.compile(r'\\input\{(.+?)\}', re.I) # Replace each occurrence of \input{foo} with the file contents latex = inputre.sub(filecontents,latex) # Manage section and subsection numbering, and generate TOC class SectionManager(object): def __init__(self, sectpre, sectpost, subpre, subpost): self.sections = [ ] self.sectionnum = 0 self.subsectionnum = 0 self.sectpre = sectpre self.sectpost = sectpost self.subpre = subpre self.subpost = subpost def matched(self, matchobj): try: if re.search(r'\\section',matchobj.group(0)): # Found a new section self.sectionnum += 1 # Increment section number self.subsectionnum = 0 # Reset subsection number self.sections.append( (matchobj.group(1) , [ ] ) ) sect = self.sectpre + str(self.sectionnum) + ' ' sect += matchobj.group(1) + self.sectpost return sect elif re.search(r'\\subsection',matchobj.group(0)): # Found new subsection self.subsectionnum += 1 # Increment subsection number snum = str(self.sectionnum) + '.' + str(self.subsectionnum) self.sections[-1][1].append(matchobj.group(2)) sub = self.subpre + snum + ' ' sub += matchobj.group(2) + self.subpost return sub else: return '' except: return '' def TOC(self): toc = "[list=1]\n" for section in self.sections: sect = section[0] subs = section[1] toc += '[*]' + sect + "\n" if len(subs) != 0: toc += "[list=1]" for sub in subs: toc += '[*]' + sub + "\n" toc += "[/list]" toc += "[/list]\n" return toc # Create a SectionManager object using bbCode pre and post strings smgr = SectionManager('[size=16]','[/size]','[size=14]','[/size]') sectre = re.compile(r'(?:\\section\{(.+?)\})|(?:\\subsection\{(.+?)\})', re.I) latex = sectre.sub(smgr.matched, latex) # Substitute table of contents latex = re.sub(r'\\tableofcontents',smgr.TOC(),latex) # Replace URLs latex = re.sub(r'\\url\{(.+?)\}',r'[url]\1[/url]',latex) # Replace boldface, italic and monospaced latex = re.sub(r'\\textit\{(.+?)\}',r'[i]\1[/i]',latex) # Italic latex = re.sub(r'\\textbf\{(.+?)\}',r'[b]\1[/b]',latex) # Boldface latex = re.sub(r'\\texttt\{(.+?)\}',r'[color=blue]\1[/color]',latex) # Monospace # Replace \begin{quote} and \end{quote} latex = re.sub(r'\\begin\{quote\}','[quote]',latex) latex = re.sub(r'\\end\{quote\}','[/quote]',latex) # Replace title latex = re.sub(r'\\title\{(.+?)\}',r'[size=18]\1[/size]',latex) # Replace author latex = re.sub(r'\\author\{(.+?)\}',r'[size=14][b]\1[/b][/size]',latex) # Replace date latex = re.sub(r'\\date\{(.+?)\}',r'[size=14][b]\1[/b][/size]',latex) # Replace \copyright latex = re.sub(r'\\copyright','(c)',latex) # Strip LaTeX that we don't care about # \documentclass # \usepackage # \begin{document} # \end{document # \maketitle # \newpage # \pagestyle{...} # \thispagestyle{...} regexps = [ r'\\documentclass(?:\[(.+?)\])?\{(.+?)\}', r'\\usepackage\{(.+?)\}', r'\\begin\{document\}', r'\\end\{document\}', r'\\maketitle', r'\\newpage', r'\\pagestyle\{(.+?)\}', r'\\thispagestyle\{(.+?)\}' ] for rx in regexps: latex = re.sub(rx, '', latex) # Finally change escaped LaTeX special characters back to normal characters latex = re.sub(r'\\([_$#%&~^{}><\\])', r'\1', latex) # Remove leading and trailing whitespace latex = string.strip(latex) # Remove repeated newlines latex = string.replace(latex, "\n\n", "\n") # And again to catch some cases where odd numbers of newlines existed! latex = string.replace(latex, "\n\n", "\n") # Now replace verbatim and listing environments with [code] ... [/code] blocks latex = re.sub(r'\\begin\{verbatim\}','[code]', latex) latex = re.sub(r'\\end\{verbatim\}','[/code]', latex) latex = re.sub(r'\\begin\{listing\}','[code]', latex) latex = re.sub(r'\\end\{listing\}','[/code]', latex) # Change \listinginput to [code] [/code] def listingInput(matchobj): s = "[code]\n" s += filecontents(matchobj) s += "[/code]\n" return s latex = re.sub(r'\\listinginput(?:\[(?:\d+?)\])?\{(?:\d+?)\}\{(.+?)\}', listingInput, latex) # Output stage outfile = None if filename == '-': outfile = sys.stdout else: outfile = open(re.sub('\.tex','.bbc',filename), 'w') outfile.write(latex) outfile.close()