hgbook
changeset 552:cf006cabe489
Snapshot of conversion script
author | Bryan O'Sullivan <bos@serpentine.com> |
---|---|
date | Thu Feb 05 00:02:00 2009 -0800 (2009-02-05) |
parents | f72b7e6cbe90 |
children | 863a82f13901 |
files | tools/latex-to-docbook |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/tools/latex-to-docbook Thu Feb 05 00:02:00 2009 -0800 1.3 @@ -0,0 +1,192 @@ 1.4 +#!/usr/bin/python 1.5 +# 1.6 +# This is the most horrible of hacks. Pretend you're not looking.</para> 1.7 + 1.8 +import cStringIO as StringIO 1.9 +import re, sys 1.10 + 1.11 +sections = { 1.12 + 'chapter': 'chapter', 1.13 + 'section': 'sect1', 1.14 + 'subsection': 'sect2', 1.15 + 'subsubsection': 'sect3', 1.16 + } 1.17 + 1.18 +envs = { 1.19 + 'codesample2': 'programlisting', 1.20 + 'codesample4': 'programlisting', 1.21 + 'enumerate': 'orderedlist', 1.22 + 'figure': 'figure', 1.23 + 'itemize': 'itemizedlist', 1.24 + 'note': 'note', 1.25 + 'quote': 'blockquote', 1.26 + } 1.27 + 1.28 +def process(ifp, ofp): 1.29 + stack = [] 1.30 + para = True 1.31 + inlist = False 1.32 + for line in ifp: 1.33 + if line.startswith('%%% Local Variables:'): 1.34 + break 1.35 + line = (line.rstrip() 1.36 + .replace(' ', ' ') 1.37 + .replace('&', '&') 1.38 + .replace('&emdash;', '&emdash;') 1.39 + .replace('\_', '_') 1.40 + .replace('\{', '{') 1.41 + .replace('\}', '}') 1.42 + .replace('\$', '$') 1.43 + .replace('\%', '%') 1.44 + .replace('\#', '#') 1.45 + .replace('<', '<') 1.46 + .replace('>', '>') 1.47 + .replace('<quote>', '<quote>') 1.48 + .replace("</quote>", '</quote>') 1.49 + .replace('\\', '\\')) 1.50 + line = re.sub(r'\s*\\(?:centering|small)\b\s*', '', line) 1.51 + line = re.sub(r'\\(?:hgrc\\|hgrc)\b', 1.52 + r'<filename role="special"> /.hgrc</filename>', line) 1.53 + line = re.sub(r'\\item\[(?P<key>[^]]+)\]', r'\item \g<key>:', line) 1.54 + line = re.sub(r'\\bug{(?P<id>\d+)}', 1.55 + r'<ulink role="hg-bug" url="http://www.selenic.com/mercurial/bts/issue\g<id>">issue \g<id></ulink>', line) 1.56 + line = re.sub(r'\\cite{([^}]+)}', r'<citation>\1</citation>', line) 1.57 + line = re.sub(r'\\hggopt{(?P<opt>[^}]+)}', 1.58 + r'<option role="hg-opt-global">\g<opt></option>', line) 1.59 + line = re.sub(r'\\hgxopt{(?P<ext>[^}]+)}{(?P<cmd>[^}]+)}{(?P<opt>[^}]+)}', 1.60 + r'<option role="hg-ext-\g<ext>-cmd-\g<cmd>-opt">\g<opt></option>', line) 1.61 + line = re.sub(r'\\hgxcmd{(?P<ext>[^}]+)}{(?P<cmd>[^}]+)}', 1.62 + r'<command role="hg-ext-\g<ext>">\g<cmd></command>', line) 1.63 + line = re.sub(r'\\hgext{(?P<ext>[^}]+)}', 1.64 + r'<literal role="hg-ext">\g<ext></literal>', line) 1.65 + line = re.sub(r'\\hgopt{(?P<cmd>[^}]+)}{(?P<opt>[^}]+)}', 1.66 + r'<option role="hg-opt-\g<cmd>">\g<opt></option>', 1.67 + line) 1.68 + line = re.sub(r'\\cmdopt{(?P<cmd>[^}]+)}{(?P<opt>[^}]+)}', 1.69 + r'<option role="cmd-opt-\g<cmd>">\g<opt></option>', 1.70 + line) 1.71 + line = re.sub(r'\\hgcmd{(?P<cmd>[^}]+)}', 1.72 + r'<command role="hg-cmd">hg \g<cmd></command>', line) 1.73 + line = re.sub(r'\\caption{(?P<text>[^}]+?)}', 1.74 + r'<caption>\g<text></caption>', line) 1.75 + line = re.sub(r'\\grafix{(?P<name>[^}]+)}', 1.76 + r'<mediaobject><imageobject><imagedata fileref="\g<name>"/></imageobject><textobject><phrase>XXX add text</phrase></textobject></mediaobject>', line) 1.77 + line = re.sub(r'\\envar{(?P<name>[^}]+)}', 1.78 + r'<envar>\g<name></envar>', line) 1.79 + line = re.sub(r'\\rcsection{(?P<sect>[^}]+)}', 1.80 + r'<literal role="rc-\g<sect>">\g<sect></literal>', line) 1.81 + line = re.sub(r'\\rcitem{(?P<sect>[^}]+)}{(?P<name>[^}]+)}', 1.82 + r'<envar role="rc-item-\g<sect>">\g<name></envar>', line) 1.83 + line = re.sub(r'\\dirname{(?P<dir>[^}]+?)}', 1.84 + r'<filename class="directory">\g<dir></filename>', line) 1.85 + line = re.sub(r'\\filename{(?P<file>[^}]+?)}', 1.86 + r'<filename>\g<file></filename>', line) 1.87 + line = re.sub(r'\\tildefile{(?P<file>[^}]+)}', 1.88 + r'<filename role="home"> /\g<file></filename>', line) 1.89 + line = re.sub(r'\\sfilename{(?P<file>[^}]+)}', 1.90 + r'<filename role="special">\g<file></filename>', line) 1.91 + line = re.sub(r'\\sdirname{(?P<dir>[^}]+)}', 1.92 + r'<filename role="special" class="directory">\g<dir></filename>', line) 1.93 + line = re.sub(r'\\interaction{(?P<id>[^}]+)}', 1.94 + r'<!-- &interaction.\g<id>; -->', line) 1.95 + line = re.sub(r'\\excode{(?P<id>[^}]+)}', 1.96 + r'<!-- &example.\g<id>; -->', line) 1.97 + line = re.sub(r'\\pymod{(?P<mod>[^}]+)}', 1.98 + r'<literal role="py-mod">\g<mod></literal>', line) 1.99 + line = re.sub(r'\\pymodclass{(?P<mod>[^}]+)}{(?P<class>[^}]+)}', 1.100 + r'<literal url="py-mod-\g<mod>">\g<class></ulink>', line) 1.101 + line = re.sub(r'\\url{(?P<url>[^}]+)}', 1.102 + r'<ulink url="\g<url>">\g<url></ulink>', line) 1.103 + line = re.sub(r'\\href{(?P<url>[^}]+)}{(?P<text>[^}]+)}', 1.104 + r'<ulink url="\g<url>">\g<text></ulink>', line) 1.105 + line = re.sub(r'\\command{(?P<cmd>[^}]+)}', 1.106 + r'<command>\g<cmd></command>', line) 1.107 + line = re.sub(r'\\option{(?P<opt>[^}]+)}', 1.108 + r'<option>\g<opt></option>', line) 1.109 + line = re.sub(r'\\ref{(?P<id>[^}]+)}', r'<xref id="\g<id>"/>', line) 1.110 + line = re.sub(r'\\emph{(?P<txt>[^}]+)}', 1.111 + r'<emphasis>\g<txt></emphasis>', line) 1.112 + line = re.sub(r'\\texttt{(?P<txt>[^}]+)}', 1.113 + r'<literal>\g<txt></literal>', line) 1.114 + line = re.sub(r'\\textbf{(?P<txt>[^}]+)}', 1.115 + r'<emphasis role="bold">\g<txt></emphasis>', line) 1.116 + line = re.sub(r'\\hook{(?P<name>[^}]+)}', 1.117 + r'<literal role="hook">\g<name></literal>', line) 1.118 + line = re.sub(r'\\tplfilter{(?P<name>[^}]+)}', 1.119 + r'<literal role="template-filter">\g<name></literal>', line) 1.120 + line = re.sub(r'\\tplkword{(?P<name>[^}]+)}', 1.121 + r'<literal role="template-keyword">\g<name></literal>', line) 1.122 + line = re.sub(r'\\tplkwfilt{(?P<tpl>[^}]+)}{(?P<name>[^}]+)}', 1.123 + r'<literal role="template-kw-filt-\g<tpl>">\g<name></literal>', line) 1.124 + line = re.sub(r'\\[vV]erb(.)(?P<txt>[^\1]+?)\1', 1.125 + r'<literal>\g<txt></literal>', line) 1.126 + line = re.sub(r'\\package{(?P<name>[^}]+)}', 1.127 + r'<literal role="package">\g<name></literal>', line) 1.128 + line = re.sub(r'\\hgcmdargs{(?P<cmd>[^}]+)}{(?P<args>[^}]+)}', 1.129 + r'<command role="hg-cmd">hg \g<cmd> \g<args></command>', 1.130 + line) 1.131 + line = re.sub(r'\\cmdargs{(?P<cmd>[^}]+)}{(?P<args>[^}]+)}', 1.132 + r'<command>\g<cmd> \g<args></command>', 1.133 + line) 1.134 + m = re.match(r'\\(chapter|section|subsection|subsubsection){(.*)}', line) 1.135 + if m: 1.136 + kind, content = m.groups() 1.137 + sec = sections[kind] 1.138 + while stack and stack[-1] >= sec: 1.139 + close = stack.pop() 1.140 + print >> ofp, '</%s>' % close 1.141 + stack.append(sec) 1.142 + print >> ofp, '<%s>\n<title>%s</title>' % (sec, content) 1.143 + else: 1.144 + m = re.match(r'\s*\\(begin|end){(?P<sect>[^}]+)}', line) 1.145 + if m: 1.146 + if not para: 1.147 + print >> ofp, '</para>' 1.148 + if inlist: 1.149 + ofp.write('</listitem>') 1.150 + para = True 1.151 + state, env = m.groups() 1.152 + env = envs[env] 1.153 + if state == 'begin': 1.154 + ofp.write('<') 1.155 + if env == 'itemizedlist': 1.156 + inlist = True 1.157 + else: 1.158 + ofp.write('</') 1.159 + if env == 'itemizedlist': 1.160 + inlist = False 1.161 + print >> ofp, env + '>' 1.162 + else: 1.163 + if line.startswith('\\item '): 1.164 + para = True 1.165 + line = line[6:] 1.166 + if line and para: 1.167 + if inlist: 1.168 + ofp.write('<listitem>') 1.169 + ofp.write('<para>') 1.170 + para = False 1.171 + if not line and not para: 1.172 + print >> ofp, '</para>' 1.173 + if inlist: 1.174 + ofp.write('</listitem>') 1.175 + para = True 1.176 + print >> ofp, line 1.177 + while stack: 1.178 + print >> ofp, '</%s>' % stack.pop() 1.179 + ofp.write('\n'.join(['<!--', 1.180 + 'local variables: ', 1.181 + 'sgml-parent-document: ("00book.xml" "book" "chapter")', 1.182 + 'end:', 1.183 + '-->'])) 1.184 + 1.185 + 1.186 +if __name__ == '__main__': 1.187 + for name in sys.argv[1:]: 1.188 + if not name.endswith('.tex'): 1.189 + continue 1.190 + newname = name[:-3] + 'xml' 1.191 + ofp = StringIO.StringIO() 1.192 + process(open(name), ofp) 1.193 + s = ofp.getvalue() 1.194 + s = re.sub('\n+</para>', '</para>', s, re.M) 1.195 + open(newname, 'w').write(s)