hgbook

diff fr/fixhtml.py @ 954:2cd5d582c956

Work in progress + typos
author Romain PELISSE <belaran@gmail.com>
date Wed Feb 18 20:38:12 2009 +0100 (2009-02-18)
parents ec6a3bb10986
children
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/fr/fixhtml.py	Wed Feb 18 20:38:12 2009 +0100
     1.3 @@ -0,0 +1,50 @@
     1.4 +#!/usr/bin/env python
     1.5 +#
     1.6 +# This script attempts to work around some of the more bizarre and
     1.7 +# quirky behaviours of htlatex.
     1.8 +#
     1.9 +# - We've persuaded htlatex to produce UTF-8, which unfortunately
    1.10 +#   causes it to use huge character sequences to represent even the
    1.11 +#   safe 7-bit ASCII subset of UTF-8.  We fix that up.
    1.12 +#
    1.13 +# - BUT we have to treat angle brackets (for example, redirections in
    1.14 +#   shell script snippets) specially, otherwise they'll break the
    1.15 +#   generated HTML.  (Reported by Johannes Hoff.)
    1.16 +#
    1.17 +# - For some reason, htlatex gives a unique ID to each fancyvrb
    1.18 +#   environment, which makes writing a sane, small CSS stylesheet
    1.19 +#   impossible.  We squish all those IDs down to nothing.
    1.20 +
    1.21 +import os
    1.22 +import sys
    1.23 +import re
    1.24 +
    1.25 +angle_re = re.compile(r'(&#x003[CE];)')
    1.26 +unicode_re = re.compile(r'&#x00([0-7][0-9A-F]);')
    1.27 +fancyvrb_re = re.compile(r'id="fancyvrb\d+"', re.I)
    1.28 +ligature_re = re.compile(r'&#xFB0([0-4]);')
    1.29 +
    1.30 +tmpsuffix = '.tmp.' + str(os.getpid())
    1.31 +
    1.32 +def hide_angle(m):
    1.33 +    return m.group(1).lower()
    1.34 +
    1.35 +def fix_ascii(m):
    1.36 +    return chr(int(m.group(1), 16))
    1.37 +
    1.38 +ligatures = ['ff', 'fi', 'fl', 'ffi', 'ffl']
    1.39 +
    1.40 +def expand_ligature(m):
    1.41 +    return ligatures[int(m.group(1))]
    1.42 +
    1.43 +for name in sys.argv[1:]:
    1.44 +    tmpname = name + tmpsuffix
    1.45 +    ofp = file(tmpname, 'w')
    1.46 +    for line in file(name):
    1.47 +        line = angle_re.sub(hide_angle, line)
    1.48 +        line = unicode_re.sub(fix_ascii, line)
    1.49 +        line = ligature_re.sub(expand_ligature, line)
    1.50 +        line = fancyvrb_re.sub('id="fancyvrb"', line)
    1.51 +        ofp.write(line)
    1.52 +    ofp.close()
    1.53 +    os.rename(tmpname, name)