rev |
line source |
foozy@708
|
1 #!/usr/bin/env python
|
foozy@708
|
2 #
|
foozy@708
|
3 # This script attempts to work around some of the more bizarre and
|
foozy@708
|
4 # quirky behaviours of htlatex.
|
foozy@708
|
5 #
|
foozy@708
|
6 # - We've persuaded htlatex to produce UTF-8, which unfortunately
|
foozy@708
|
7 # causes it to use huge character sequences to represent even the
|
foozy@708
|
8 # safe 7-bit ASCII subset of UTF-8. We fix that up.
|
foozy@708
|
9 #
|
foozy@708
|
10 # - BUT we have to treat angle brackets (for example, redirections in
|
foozy@708
|
11 # shell script snippets) specially, otherwise they'll break the
|
foozy@708
|
12 # generated HTML. (Reported by Johannes Hoff.)
|
foozy@708
|
13 #
|
foozy@708
|
14 # - For some reason, htlatex gives a unique ID to each fancyvrb
|
foozy@708
|
15 # environment, which makes writing a sane, small CSS stylesheet
|
foozy@708
|
16 # impossible. We squish all those IDs down to nothing.
|
foozy@708
|
17
|
foozy@708
|
18 import os
|
foozy@708
|
19 import sys
|
foozy@708
|
20 import re
|
foozy@708
|
21
|
foozy@708
|
22 angle_re = re.compile(r'([CE];)')
|
foozy@708
|
23 unicode_re = re.compile(r'�([0-7][0-9A-F]);')
|
foozy@708
|
24 fancyvrb_re = re.compile(r'id="fancyvrb\d+"', re.I)
|
foozy@708
|
25 ligature_re = re.compile(r'ྰ([0-4]);')
|
foozy@708
|
26
|
foozy@708
|
27 tmpsuffix = '.tmp.' + str(os.getpid())
|
foozy@708
|
28
|
foozy@708
|
29 def hide_angle(m):
|
foozy@708
|
30 return m.group(1).lower()
|
foozy@708
|
31
|
foozy@708
|
32 def fix_ascii(m):
|
foozy@708
|
33 return chr(int(m.group(1), 16))
|
foozy@708
|
34
|
foozy@708
|
35 ligatures = ['ff', 'fi', 'fl', 'ffi', 'ffl']
|
foozy@708
|
36
|
foozy@708
|
37 def expand_ligature(m):
|
foozy@708
|
38 return ligatures[int(m.group(1))]
|
foozy@708
|
39
|
foozy@708
|
40 for name in sys.argv[1:]:
|
foozy@708
|
41 tmpname = name + tmpsuffix
|
foozy@708
|
42 ofp = file(tmpname, 'w')
|
foozy@708
|
43 for line in file(name):
|
foozy@708
|
44 line = angle_re.sub(hide_angle, line)
|
foozy@708
|
45 line = unicode_re.sub(fix_ascii, line)
|
foozy@708
|
46 line = ligature_re.sub(expand_ligature, line)
|
foozy@708
|
47 line = fancyvrb_re.sub('id="fancyvrb"', line)
|
foozy@708
|
48 ofp.write(line)
|
foozy@708
|
49 ofp.close()
|
foozy@708
|
50 os.rename(tmpname, name)
|