#!/usr/bin/python # # This is the most horrible of hacks. Pretend you're not looking. import cStringIO as StringIO import re, sys sections = { 'chapter': 'chapter', 'section': 'sect1', 'subsection': 'sect2', 'subsubsection': 'sect3', } envs = { 'codesample2': 'programlisting', 'codesample4': 'programlisting', 'enumerate': 'orderedlist', 'figure': 'informalfigure', 'itemize': 'itemizedlist', 'note': 'note', 'quote': 'blockquote', } def process(ifp, ofp): print >> ofp, '\n' stack = [] para = True inlist = 0 for line in ifp: if line.startswith('%%% Local Variables:'): break line = (line.rstrip() .replace('~', ' ') .replace('&', '&') .replace('---', '&emdash;') .replace('\_', '_') .replace('\{', '{') .replace('\}', '}') .replace('\$', '$') .replace('\%', '%') .replace('\#', '#') .replace('<', '<') .replace('>', '>') .replace('``', '') .replace("''", '') .replace('\\', '\\')) line = re.sub(r'\s*\\(?:centering|small)\b\s*', '', line) line = re.sub(r'\\(?:hgrc\\|hgrc)\b', r' /.hgrc', line) line = re.sub(r'\\item\[(?P[^]]+)\]', r'\item \g:', line) line = re.sub(r'\\bug{(?P\d+)}', r'issue \g', line) line = re.sub(r'\\cite{([^}]+)}', r'\1', line) line = re.sub(r'\\hggopt{(?P[^}]+)}', r'', line) line = re.sub(r'\\hgxopt{(?P[^}]+)}{(?P[^}]+)}{(?P[^}]+)}', r'', line) line = re.sub(r'\\hgxcmd{(?P[^}]+)}{(?P[^}]+)}', r'\g', line) line = re.sub(r'\\hgext{(?P[^}]+)}', r'\g', line) line = re.sub(r'\\hgopt{(?P[^}]+)}{(?P[^}]+)}', r'', line) line = re.sub(r'\\cmdopt{(?P[^}]+)}{(?P[^}]+)}', r'', line) line = re.sub(r'\\hgcmd{(?P[^}]+)}', r'hg \g', line) line = re.sub(r'\\caption{(?P[^}]+?)}', r'\g', line) line = re.sub(r'\\grafix{(?P[^}]+)}', r'XXX add text', line) line = re.sub(r'\\envar{(?P[^}]+)}', r'\g', line) line = re.sub(r'\\rcsection{(?P[^}]+)}', r'\g', line) line = re.sub(r'\\rcitem{(?P[^}]+)}{(?P[^}]+)}', r'\g', line) line = re.sub(r'\\dirname{(?P[^}]+?)}', r'\g', line) line = re.sub(r'\\filename{(?P[^}]+?)}', r'\g', line) line = re.sub(r'\\tildefile{(?P[^}]+)}', r'~/\g', line) line = re.sub(r'\\sfilename{(?P[^}]+)}', r'\g', line) line = re.sub(r'\\sdirname{(?P[^}]+)}', r'\g', line) line = re.sub(r'\\interaction{(?P[^}]+)}', r'', line) line = re.sub(r'\\excode{(?P[^}]+)}', r'', line) line = re.sub(r'\\pymod{(?P[^}]+)}', r'\g', line) line = re.sub(r'\\pymodclass{(?P[^}]+)}{(?P[^}]+)}', r'\g', line) line = re.sub(r'\\url{(?P[^}]+)}', r'\g', line) line = re.sub(r'\\href{(?P[^}]+)}{(?P[^}]+)}', r'\g', line) line = re.sub(r'\\command{(?P[^}]+)}', r'\g', line) line = re.sub(r'\\option{(?P[^}]+)}', r'', line) line = re.sub(r'\\ref{(?P[^}]+)}', r'', line) line = re.sub(r'\\emph{(?P[^}]+)}', r'\g', line) line = re.sub(r'\\texttt{(?P[^}]+)}', r'\g', line) line = re.sub(r'\\textbf{(?P[^}]+)}', r'\g', line) line = re.sub(r'\\hook{(?P[^}]+)}', r'\g', line) line = re.sub(r'\\tplfilter{(?P[^}]+)}', r'\g', line) line = re.sub(r'\\tplkword{(?P[^}]+)}', r'\g', line) line = re.sub(r'\\tplkwfilt{(?P[^}]+)}{(?P[^}]+)}', r'\g', line) line = re.sub(r'\\[vV]erb(.)(?P[^\1]+?)\1', r'\g', line) line = re.sub(r'\\package{(?P[^}]+)}', r'\g', line) line = re.sub(r'\\hgcmdargs{(?P[^}]+)}{(?P[^}]+)}', r'hg \g \g', line) line = re.sub(r'\\cmdargs{(?P[^}]+)}{(?P[^}]+)}', r'\g \g', line) m = re.match(r'\\(chapter|section|subsection|subsubsection){(.*)}', line) if m: kind, content = m.groups() sec = sections[kind] while stack and stack[-1] >= sec: close = stack.pop() print >> ofp, '' % close stack.append(sec) print >> ofp, '<%s>\n%s' % (sec, content) else: m = re.match(r'\s*\\(begin|end){(?P[^}]+)}', line) if m: if not para: print >> ofp, '' if inlist: ofp.write('') para = True state, env = m.groups() env = envs[env] if state == 'begin': ofp.write('<') if env in ('itemizedlist', 'orderedlist'): inlist = 1 else: ofp.write('> ofp, env + '>' else: if line.startswith('\\item '): if inlist > 1: print >> ofp, '' print >> ofp, '' else: inlist = 2 para = True line = line[6:] if line and para: if inlist: ofp.write('') ofp.write('') para = False if not line and not para: print >> ofp, '' if inlist: ofp.write('') para = True print >> ofp, line while stack: print >> ofp, '' % stack.pop() ofp.write('\n'.join(['\n'])) if __name__ == '__main__': for name in sys.argv[1:]: if not name.endswith('.tex'): continue newname = name[:-3] + 'xml' ofp = StringIO.StringIO() process(open(name), ofp) s = ofp.getvalue() s = re.sub('\n+', '', s, re.M) open(newname, 'w').write(s)