# HG changeset patch # User Bryan O'Sullivan # Date 1233820920 28800 # Node ID cf006cabe489e775bbdbdb3845bb5625b779785b # Parent f72b7e6cbe9007f23cc14832a8aea1e33145b658 Snapshot of conversion script diff -r f72b7e6cbe90 -r cf006cabe489 tools/latex-to-docbook --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/latex-to-docbook Thu Feb 05 00:02:00 2009 -0800 @@ -0,0 +1,192 @@ +#!/usr/bin/python +# +# This is the most horrible of hacks. Pretend you're not looking. + +import cStringIO as StringIO +import re, sys + +sections = { + 'chapter': 'chapter', + 'section': 'sect1', + 'subsection': 'sect2', + 'subsubsection': 'sect3', + } + +envs = { + 'codesample2': 'programlisting', + 'codesample4': 'programlisting', + 'enumerate': 'orderedlist', + 'figure': 'figure', + 'itemize': 'itemizedlist', + 'note': 'note', + 'quote': 'blockquote', + } + +def process(ifp, ofp): + stack = [] + para = True + inlist = False + for line in ifp: + if line.startswith('%%% Local Variables:'): + break + line = (line.rstrip() + .replace(' ', ' ') + .replace('&', '&') + .replace('&emdash;', '&emdash;') + .replace('\_', '_') + .replace('\{', '{') + .replace('\}', '}') + .replace('\$', '$') + .replace('\%', '%') + .replace('\#', '#') + .replace('<', '<') + .replace('>', '>') + .replace('', '') + .replace("", '') + .replace('\\', '\\')) + line = re.sub(r'\s*\\(?:centering|small)\b\s*', '', line) + line = re.sub(r'\\(?:hgrc\\|hgrc)\b', + r' /.hgrc', line) + line = re.sub(r'\\item\[(?P[^]]+)\]', r'\item \g:', line) + line = re.sub(r'\\bug{(?P\d+)}', + r'issue \g', line) + line = re.sub(r'\\cite{([^}]+)}', r'\1', line) + line = re.sub(r'\\hggopt{(?P[^}]+)}', + r'', line) + line = re.sub(r'\\hgxopt{(?P[^}]+)}{(?P[^}]+)}{(?P[^}]+)}', + r'', line) + line = re.sub(r'\\hgxcmd{(?P[^}]+)}{(?P[^}]+)}', + r'\g', line) + line = re.sub(r'\\hgext{(?P[^}]+)}', + r'\g', line) + line = re.sub(r'\\hgopt{(?P[^}]+)}{(?P[^}]+)}', + r'', + line) + line = re.sub(r'\\cmdopt{(?P[^}]+)}{(?P[^}]+)}', + r'', + line) + line = re.sub(r'\\hgcmd{(?P[^}]+)}', + r'hg \g', line) + line = re.sub(r'\\caption{(?P[^}]+?)}', + r'\g', line) + line = re.sub(r'\\grafix{(?P[^}]+)}', + r'XXX add text', line) + line = re.sub(r'\\envar{(?P[^}]+)}', + r'\g', line) + line = re.sub(r'\\rcsection{(?P[^}]+)}', + r'\g', line) + line = re.sub(r'\\rcitem{(?P[^}]+)}{(?P[^}]+)}', + r'\g', line) + line = re.sub(r'\\dirname{(?P[^}]+?)}', + r'\g', line) + line = re.sub(r'\\filename{(?P[^}]+?)}', + r'\g', line) + line = re.sub(r'\\tildefile{(?P[^}]+)}', + r' /\g', line) + line = re.sub(r'\\sfilename{(?P[^}]+)}', + r'\g', line) + line = re.sub(r'\\sdirname{(?P[^}]+)}', + r'\g', line) + line = re.sub(r'\\interaction{(?P[^}]+)}', + r'', line) + line = re.sub(r'\\excode{(?P[^}]+)}', + r'', line) + line = re.sub(r'\\pymod{(?P[^}]+)}', + r'\g', line) + line = re.sub(r'\\pymodclass{(?P[^}]+)}{(?P[^}]+)}', + r'\g', line) + line = re.sub(r'\\url{(?P[^}]+)}', + r'\g', line) + line = re.sub(r'\\href{(?P[^}]+)}{(?P[^}]+)}', + r'\g', line) + line = re.sub(r'\\command{(?P[^}]+)}', + r'\g', line) + line = re.sub(r'\\option{(?P[^}]+)}', + r'', line) + line = re.sub(r'\\ref{(?P[^}]+)}', r'', line) + line = re.sub(r'\\emph{(?P[^}]+)}', + r'\g', line) + line = re.sub(r'\\texttt{(?P[^}]+)}', + r'\g', line) + line = re.sub(r'\\textbf{(?P[^}]+)}', + r'\g', line) + line = re.sub(r'\\hook{(?P[^}]+)}', + r'\g', line) + line = re.sub(r'\\tplfilter{(?P[^}]+)}', + r'\g', line) + line = re.sub(r'\\tplkword{(?P[^}]+)}', + r'\g', line) + line = re.sub(r'\\tplkwfilt{(?P[^}]+)}{(?P[^}]+)}', + r'\g', line) + line = re.sub(r'\\[vV]erb(.)(?P[^\1]+?)\1', + r'\g', line) + line = re.sub(r'\\package{(?P[^}]+)}', + r'\g', line) + line = re.sub(r'\\hgcmdargs{(?P[^}]+)}{(?P[^}]+)}', + r'hg \g \g', + line) + line = re.sub(r'\\cmdargs{(?P[^}]+)}{(?P[^}]+)}', + r'\g \g', + line) + m = re.match(r'\\(chapter|section|subsection|subsubsection){(.*)}', line) + if m: + kind, content = m.groups() + sec = sections[kind] + while stack and stack[-1] >= sec: + close = stack.pop() + print >> ofp, '' % close + stack.append(sec) + print >> ofp, '<%s>\n%s' % (sec, content) + else: + m = re.match(r'\s*\\(begin|end){(?P[^}]+)}', line) + if m: + if not para: + print >> ofp, '' + if inlist: + ofp.write('') + para = True + state, env = m.groups() + env = envs[env] + if state == 'begin': + ofp.write('<') + if env == 'itemizedlist': + inlist = True + else: + ofp.write('> ofp, env + '>' + else: + if line.startswith('\\item '): + para = True + line = line[6:] + if line and para: + if inlist: + ofp.write('') + ofp.write('') + para = False + if not line and not para: + print >> ofp, '' + if inlist: + ofp.write('') + para = True + print >> ofp, line + while stack: + print >> ofp, '' % stack.pop() + ofp.write('\n'.join([''])) + + +if __name__ == '__main__': + for name in sys.argv[1:]: + if not name.endswith('.tex'): + continue + newname = name[:-3] + 'xml' + ofp = StringIO.StringIO() + process(open(name), ofp) + s = ofp.getvalue() + s = re.sub('\n+', '', s, re.M) + open(newname, 'w').write(s)