hgbook

annotate en/autoid.py @ 594:0b45854f0b7b

Generate and include images properly.
author Bryan O'Sullivan <bos@serpentine.com>
date Thu Mar 26 22:00:53 2009 -0700 (2009-03-26)
parents
children
rev   line source
bos@584 1 #!/usr/bin/env python
bos@584 2 #
bos@584 3 # Add unique ID attributes to para tags. This script should only be
bos@584 4 # run by one person, since otherwise it introduces the possibility of
bos@584 5 # chaotic conflicts among tags.
bos@584 6
bos@584 7 import glob, os, re, sys
bos@584 8
bos@584 9 tagged = re.compile('<para[^>]* id="x_([0-9a-f]+)"[^>]*>', re.M)
bos@584 10 untagged = re.compile('<para>')
bos@584 11
bos@584 12 names = glob.glob('ch*.xml') + glob.glob('app*.xml')
bos@584 13
bos@584 14 # First pass: find the highest-numbered paragraph ID.
bos@584 15
bos@584 16 biggest_id = 0
bos@584 17 seen = set()
bos@584 18 errs = 0
bos@584 19
bos@584 20 for name in names:
bos@584 21 for m in tagged.finditer(open(name).read()):
bos@584 22 i = int(m.group(1),16)
bos@584 23 if i in seen:
bos@584 24 print >> sys.stderr, '%s: duplication of ID %s' % (name, i)
bos@584 25 errs += 1
bos@584 26 seen.add(i)
bos@584 27 if i > biggest_id:
bos@584 28 biggest_id = i
bos@584 29
bos@584 30 def retag(s):
bos@584 31 global biggest_id
bos@584 32 biggest_id += 1
bos@584 33 return '<para id="x_%x">' % biggest_id
bos@584 34
bos@584 35 # Second pass: add IDs to paragraphs that currently lack them.
bos@584 36
bos@584 37 for name in names:
bos@584 38 f = open(name).read()
bos@584 39 f1 = untagged.sub(retag, f)
bos@584 40 if f1 != f:
bos@584 41 tmpname = name + '.tmp'
bos@584 42 fp = open(tmpname, 'w')
bos@584 43 fp.write(f1)
bos@584 44 fp.close()
bos@584 45 os.rename(tmpname, name)
bos@584 46
bos@584 47 sys.exit(errs)