hgbook
annotate ja/fixhtml.py @ 1114:527b86d55d4a
inotify: update installation information
inotify is shipped in Mercurial since 1.0, which greatly simplifies the installation process
inotify is shipped in Mercurial since 1.0, which greatly simplifies the installation process
author | Nicolas Dumazet <nicdumz.commits@gmail.com> |
---|---|
date | Sun Dec 13 16:35:56 2009 +0900 (2009-12-13) |
parents | |
children |
rev | line source |
---|---|
foozy@708 | 1 #!/usr/bin/env python |
foozy@708 | 2 # |
foozy@708 | 3 # This script attempts to work around some of the more bizarre and |
foozy@708 | 4 # quirky behaviours of htlatex. |
foozy@708 | 5 # |
foozy@708 | 6 # - We've persuaded htlatex to produce UTF-8, which unfortunately |
foozy@708 | 7 # causes it to use huge character sequences to represent even the |
foozy@708 | 8 # safe 7-bit ASCII subset of UTF-8. We fix that up. |
foozy@708 | 9 # |
foozy@708 | 10 # - BUT we have to treat angle brackets (for example, redirections in |
foozy@708 | 11 # shell script snippets) specially, otherwise they'll break the |
foozy@708 | 12 # generated HTML. (Reported by Johannes Hoff.) |
foozy@708 | 13 # |
foozy@708 | 14 # - For some reason, htlatex gives a unique ID to each fancyvrb |
foozy@708 | 15 # environment, which makes writing a sane, small CSS stylesheet |
foozy@708 | 16 # impossible. We squish all those IDs down to nothing. |
foozy@708 | 17 |
foozy@708 | 18 import os |
foozy@708 | 19 import sys |
foozy@708 | 20 import re |
foozy@708 | 21 |
foozy@708 | 22 angle_re = re.compile(r'([CE];)') |
foozy@708 | 23 unicode_re = re.compile(r'�([0-7][0-9A-F]);') |
foozy@708 | 24 fancyvrb_re = re.compile(r'id="fancyvrb\d+"', re.I) |
foozy@708 | 25 ligature_re = re.compile(r'ྰ([0-4]);') |
foozy@708 | 26 |
foozy@708 | 27 tmpsuffix = '.tmp.' + str(os.getpid()) |
foozy@708 | 28 |
foozy@708 | 29 def hide_angle(m): |
foozy@708 | 30 return m.group(1).lower() |
foozy@708 | 31 |
foozy@708 | 32 def fix_ascii(m): |
foozy@708 | 33 return chr(int(m.group(1), 16)) |
foozy@708 | 34 |
foozy@708 | 35 ligatures = ['ff', 'fi', 'fl', 'ffi', 'ffl'] |
foozy@708 | 36 |
foozy@708 | 37 def expand_ligature(m): |
foozy@708 | 38 return ligatures[int(m.group(1))] |
foozy@708 | 39 |
foozy@708 | 40 for name in sys.argv[1:]: |
foozy@708 | 41 tmpname = name + tmpsuffix |
foozy@708 | 42 ofp = file(tmpname, 'w') |
foozy@708 | 43 for line in file(name): |
foozy@708 | 44 line = angle_re.sub(hide_angle, line) |
foozy@708 | 45 line = unicode_re.sub(fix_ascii, line) |
foozy@708 | 46 line = ligature_re.sub(expand_ligature, line) |
foozy@708 | 47 line = fancyvrb_re.sub('id="fancyvrb"', line) |
foozy@708 | 48 ofp.write(line) |
foozy@708 | 49 ofp.close() |
foozy@708 | 50 os.rename(tmpname, name) |