hgbook
annotate en/fixhtml.py @ 251:2e73abddad21
Avoid converting UTF8-encoded angle brackets into ASCII (per Johannes Hoff).
Finally write up what fixhtml.py is actually doing.
Finally write up what fixhtml.py is actually doing.
author | Bryan O'Sullivan <bos@serpentine.com> |
---|---|
date | Wed May 30 21:50:21 2007 -0700 (2007-05-30) |
parents | 16f02802f448 |
children | ec6a3bb10986 |
rev | line source |
---|---|
bos@149 | 1 #!/usr/bin/env python |
bos@251 | 2 # |
bos@251 | 3 # This script attempts to work around some of the more bizarre and |
bos@251 | 4 # quirky behaviours of htlatex. |
bos@251 | 5 # |
bos@251 | 6 # - We've persuaded htlatex to produce UTF-8, which unfortunately |
bos@251 | 7 # causes it to use huge character sequences to represent even the |
bos@251 | 8 # safe 7-bit ASCII subset of UTF-8. We fix that up. |
bos@251 | 9 # |
bos@251 | 10 # - BUT we have to treat angle brackets (for example, redirections in |
bos@251 | 11 # shell script snippets) specially, otherwise they'll break the |
bos@251 | 12 # generated HTML. (Reported by Johannes Hoff.) |
bos@251 | 13 # |
bos@251 | 14 # - For some reason, htlatex gives a unique ID to each fancyvrb |
bos@251 | 15 # environment, which makes writing a sane, small CSS stylesheet |
bos@251 | 16 # impossible. We squish all those IDs down to nothing. |
bos@149 | 17 |
bos@149 | 18 import os |
bos@149 | 19 import sys |
bos@149 | 20 import re |
bos@149 | 21 |
bos@251 | 22 angle_re = re.compile(r'([CE];)') |
bos@251 | 23 unicode_re = re.compile(r'�([0-7][0-9A-F]);') |
bos@149 | 24 fancyvrb_re = re.compile(r'id="fancyvrb\d+"', re.I) |
bos@149 | 25 |
bos@149 | 26 tmpsuffix = '.tmp.' + str(os.getpid()) |
bos@149 | 27 |
bos@251 | 28 def hide_angle(m): |
bos@251 | 29 return m.group(1).lower() |
bos@251 | 30 |
bos@149 | 31 def fix_ascii(m): |
bos@149 | 32 return chr(int(m.group(1), 16)) |
bos@149 | 33 |
bos@149 | 34 for name in sys.argv[1:]: |
bos@149 | 35 tmpname = name + tmpsuffix |
bos@149 | 36 ofp = file(tmpname, 'w') |
bos@149 | 37 for line in file(name): |
bos@251 | 38 line = angle_re.sub(hide_angle, line) |
bos@149 | 39 line = unicode_re.sub(fix_ascii, line) |
bos@149 | 40 line = fancyvrb_re.sub('id="fancyvrb"', line) |
bos@149 | 41 ofp.write(line) |
bos@149 | 42 ofp.close() |
bos@149 | 43 os.rename(tmpname, name) |