bos@552: #!/usr/bin/python bos@552: # bos@552: # This is the most horrible of hacks. Pretend you're not looking. bos@552: bos@552: import cStringIO as StringIO bos@552: import re, sys bos@552: bos@552: sections = { bos@552: 'chapter': 'chapter', bos@552: 'section': 'sect1', bos@552: 'subsection': 'sect2', bos@552: 'subsubsection': 'sect3', bos@552: } bos@552: bos@552: envs = { bos@552: 'codesample2': 'programlisting', bos@552: 'codesample4': 'programlisting', bos@552: 'enumerate': 'orderedlist', bos@556: 'figure': 'informalfigure', bos@552: 'itemize': 'itemizedlist', bos@552: 'note': 'note', bos@552: 'quote': 'blockquote', bos@552: } bos@552: bos@552: def process(ifp, ofp): bos@556: print >> ofp, '\n' bos@552: stack = [] bos@552: para = True bos@556: inlist = 0 bos@552: for line in ifp: bos@552: if line.startswith('%%% Local Variables:'): bos@552: break bos@552: line = (line.rstrip() bos@556: .replace('~', ' ') bos@552: .replace('&', '&') bos@559: .replace('---', '&emdash;') bos@552: .replace('\_', '_') bos@552: .replace('\{', '{') bos@552: .replace('\}', '}') bos@552: .replace('\$', '$') bos@552: .replace('\%', '%') bos@552: .replace('\#', '#') bos@552: .replace('<', '<') bos@552: .replace('>', '>') bos@556: .replace('``', '') bos@556: .replace("''", '') bos@552: .replace('\\', '\\')) bos@552: line = re.sub(r'\s*\\(?:centering|small)\b\s*', '', line) bos@552: line = re.sub(r'\\(?:hgrc\\|hgrc)\b', bos@552: r' /.hgrc', line) bos@552: line = re.sub(r'\\item\[(?P[^]]+)\]', r'\item \g:', line) bos@552: line = re.sub(r'\\bug{(?P\d+)}', bos@552: r'issue \g', line) bos@552: line = re.sub(r'\\cite{([^}]+)}', r'\1', line) bos@552: line = re.sub(r'\\hggopt{(?P[^}]+)}', bos@552: r'', line) bos@552: line = re.sub(r'\\hgxopt{(?P[^}]+)}{(?P[^}]+)}{(?P[^}]+)}', bos@552: r'', line) bos@552: line = re.sub(r'\\hgxcmd{(?P[^}]+)}{(?P[^}]+)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\hgext{(?P[^}]+)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\hgopt{(?P[^}]+)}{(?P[^}]+)}', bos@552: r'', bos@552: line) bos@552: line = re.sub(r'\\cmdopt{(?P[^}]+)}{(?P[^}]+)}', bos@552: r'', bos@552: line) bos@552: line = re.sub(r'\\hgcmd{(?P[^}]+)}', bos@552: r'hg \g', line) bos@552: line = re.sub(r'\\caption{(?P[^}]+?)}', bos@556: r'\g', line) bos@552: line = re.sub(r'\\grafix{(?P[^}]+)}', bos@552: r'XXX add text', line) bos@552: line = re.sub(r'\\envar{(?P[^}]+)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\rcsection{(?P[^}]+)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\rcitem{(?P[^}]+)}{(?P[^}]+)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\dirname{(?P[^}]+?)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\filename{(?P[^}]+?)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\tildefile{(?P[^}]+)}', bos@559: r'~/\g', line) bos@552: line = re.sub(r'\\sfilename{(?P[^}]+)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\sdirname{(?P[^}]+)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\interaction{(?P[^}]+)}', bos@552: r'', line) bos@552: line = re.sub(r'\\excode{(?P[^}]+)}', bos@552: r'', line) bos@552: line = re.sub(r'\\pymod{(?P[^}]+)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\pymodclass{(?P[^}]+)}{(?P[^}]+)}', bos@559: r'\g', line) bos@552: line = re.sub(r'\\url{(?P[^}]+)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\href{(?P[^}]+)}{(?P[^}]+)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\command{(?P[^}]+)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\option{(?P[^}]+)}', bos@552: r'', line) bos@556: line = re.sub(r'\\ref{(?P[^}]+)}', r'', line) bos@552: line = re.sub(r'\\emph{(?P[^}]+)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\texttt{(?P[^}]+)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\textbf{(?P[^}]+)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\hook{(?P[^}]+)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\tplfilter{(?P[^}]+)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\tplkword{(?P[^}]+)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\tplkwfilt{(?P[^}]+)}{(?P[^}]+)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\[vV]erb(.)(?P[^\1]+?)\1', bos@552: r'\g', line) bos@552: line = re.sub(r'\\package{(?P[^}]+)}', bos@552: r'\g', line) bos@552: line = re.sub(r'\\hgcmdargs{(?P[^}]+)}{(?P[^}]+)}', bos@552: r'hg \g \g', bos@552: line) bos@552: line = re.sub(r'\\cmdargs{(?P[^}]+)}{(?P[^}]+)}', bos@552: r'\g \g', bos@552: line) bos@552: m = re.match(r'\\(chapter|section|subsection|subsubsection){(.*)}', line) bos@552: if m: bos@552: kind, content = m.groups() bos@552: sec = sections[kind] bos@552: while stack and stack[-1] >= sec: bos@552: close = stack.pop() bos@552: print >> ofp, '' % close bos@552: stack.append(sec) bos@552: print >> ofp, '<%s>\n%s' % (sec, content) bos@552: else: bos@552: m = re.match(r'\s*\\(begin|end){(?P[^}]+)}', line) bos@552: if m: bos@552: if not para: bos@552: print >> ofp, '' bos@552: if inlist: bos@552: ofp.write('') bos@552: para = True bos@552: state, env = m.groups() bos@552: env = envs[env] bos@552: if state == 'begin': bos@552: ofp.write('<') bos@556: if env in ('itemizedlist', 'orderedlist'): bos@556: inlist = 1 bos@552: else: bos@552: ofp.write('> ofp, env + '>' bos@552: else: bos@552: if line.startswith('\\item '): bos@556: if inlist > 1: bos@556: print >> ofp, '' bos@556: print >> ofp, '' bos@556: else: bos@556: inlist = 2 bos@552: para = True bos@552: line = line[6:] bos@552: if line and para: bos@552: if inlist: bos@552: ofp.write('') bos@552: ofp.write('') bos@552: para = False bos@552: if not line and not para: bos@552: print >> ofp, '' bos@552: if inlist: bos@552: ofp.write('') bos@552: para = True bos@552: print >> ofp, line bos@552: while stack: bos@552: print >> ofp, '' % stack.pop() bos@556: ofp.write('\n'.join(['\n'])) bos@552: bos@552: bos@552: if __name__ == '__main__': bos@552: for name in sys.argv[1:]: bos@552: if not name.endswith('.tex'): bos@552: continue bos@552: newname = name[:-3] + 'xml' bos@552: ofp = StringIO.StringIO() bos@552: process(open(name), ofp) bos@552: s = ofp.getvalue() bos@552: s = re.sub('\n+', '', s, re.M) bos@552: open(newname, 'w').write(s)