hgbook

annotate en/fixhtml.py @ 258:1a55ba6ceca1

Make run-example a bit more user friendly.
author Bryan O'Sullivan <bos@serpentine.com>
date Wed Jun 06 08:11:08 2007 -0700 (2007-06-06)
parents 16f02802f448
children ec6a3bb10986
rev   line source
bos@149 1 #!/usr/bin/env python
bos@251 2 #
bos@251 3 # This script attempts to work around some of the more bizarre and
bos@251 4 # quirky behaviours of htlatex.
bos@251 5 #
bos@251 6 # - We've persuaded htlatex to produce UTF-8, which unfortunately
bos@251 7 # causes it to use huge character sequences to represent even the
bos@251 8 # safe 7-bit ASCII subset of UTF-8. We fix that up.
bos@251 9 #
bos@251 10 # - BUT we have to treat angle brackets (for example, redirections in
bos@251 11 # shell script snippets) specially, otherwise they'll break the
bos@251 12 # generated HTML. (Reported by Johannes Hoff.)
bos@251 13 #
bos@251 14 # - For some reason, htlatex gives a unique ID to each fancyvrb
bos@251 15 # environment, which makes writing a sane, small CSS stylesheet
bos@251 16 # impossible. We squish all those IDs down to nothing.
bos@149 17
bos@149 18 import os
bos@149 19 import sys
bos@149 20 import re
bos@149 21
bos@251 22 angle_re = re.compile(r'(&#x003[CE];)')
bos@251 23 unicode_re = re.compile(r'&#x00([0-7][0-9A-F]);')
bos@149 24 fancyvrb_re = re.compile(r'id="fancyvrb\d+"', re.I)
bos@149 25
bos@149 26 tmpsuffix = '.tmp.' + str(os.getpid())
bos@149 27
bos@251 28 def hide_angle(m):
bos@251 29 return m.group(1).lower()
bos@251 30
bos@149 31 def fix_ascii(m):
bos@149 32 return chr(int(m.group(1), 16))
bos@149 33
bos@149 34 for name in sys.argv[1:]:
bos@149 35 tmpname = name + tmpsuffix
bos@149 36 ofp = file(tmpname, 'w')
bos@149 37 for line in file(name):
bos@251 38 line = angle_re.sub(hide_angle, line)
bos@149 39 line = unicode_re.sub(fix_ascii, line)
bos@149 40 line = fancyvrb_re.sub('id="fancyvrb"', line)
bos@149 41 ofp.write(line)
bos@149 42 ofp.close()
bos@149 43 os.rename(tmpname, name)