hgbook

annotate en/fixhtml.py @ 479:5236357a00b2

finished hook.tex
author Javier Rojas <jerojasro@devnull.li>
date Sun Jan 04 21:51:51 2009 -0500 (2009-01-04)
parents 2e73abddad21
children
rev   line source
bos@149 1 #!/usr/bin/env python
bos@251 2 #
bos@251 3 # This script attempts to work around some of the more bizarre and
bos@251 4 # quirky behaviours of htlatex.
bos@251 5 #
bos@251 6 # - We've persuaded htlatex to produce UTF-8, which unfortunately
bos@251 7 # causes it to use huge character sequences to represent even the
bos@251 8 # safe 7-bit ASCII subset of UTF-8. We fix that up.
bos@251 9 #
bos@251 10 # - BUT we have to treat angle brackets (for example, redirections in
bos@251 11 # shell script snippets) specially, otherwise they'll break the
bos@251 12 # generated HTML. (Reported by Johannes Hoff.)
bos@251 13 #
bos@251 14 # - For some reason, htlatex gives a unique ID to each fancyvrb
bos@251 15 # environment, which makes writing a sane, small CSS stylesheet
bos@251 16 # impossible. We squish all those IDs down to nothing.
bos@149 17
bos@149 18 import os
bos@149 19 import sys
bos@149 20 import re
bos@149 21
bos@251 22 angle_re = re.compile(r'(&#x003[CE];)')
bos@251 23 unicode_re = re.compile(r'&#x00([0-7][0-9A-F]);')
bos@149 24 fancyvrb_re = re.compile(r'id="fancyvrb\d+"', re.I)
bos@260 25 ligature_re = re.compile(r'&#xFB0([0-4]);')
bos@149 26
bos@149 27 tmpsuffix = '.tmp.' + str(os.getpid())
bos@149 28
bos@251 29 def hide_angle(m):
bos@251 30 return m.group(1).lower()
bos@251 31
bos@149 32 def fix_ascii(m):
bos@149 33 return chr(int(m.group(1), 16))
bos@149 34
bos@260 35 ligatures = ['ff', 'fi', 'fl', 'ffi', 'ffl']
bos@260 36
bos@260 37 def expand_ligature(m):
bos@260 38 return ligatures[int(m.group(1))]
bos@260 39
bos@149 40 for name in sys.argv[1:]:
bos@149 41 tmpname = name + tmpsuffix
bos@149 42 ofp = file(tmpname, 'w')
bos@149 43 for line in file(name):
bos@251 44 line = angle_re.sub(hide_angle, line)
bos@149 45 line = unicode_re.sub(fix_ascii, line)
bos@260 46 line = ligature_re.sub(expand_ligature, line)
bos@149 47 line = fancyvrb_re.sub('id="fancyvrb"', line)
bos@149 48 ofp.write(line)
bos@149 49 ofp.close()
bos@149 50 os.rename(tmpname, name)