hgbook

view web/hgbook/converter.py @ 911:f06879de9b01

Develop per-chapter templates and adapt the element creation script to deal with them.
author dukebody <dukebody@gmail.com>
date Tue Oct 27 00:10:56 2009 +0100 (2009-10-27)
parents 53a382ea07aa
children
line source
1 from lxml import etree
2 from lxml import html
3 from lxml.cssselect import CSSSelector
4 import md5
5 import sys
8 args = sys.argv[1:]
10 # django stuff
11 from django.core.management import setup_environ
12 import settings # Assumed to be in the same directory.
13 setup_environ(settings) # ugly django collateral effects :(
14 from comments.models import Element
16 doc_id = 'MMSC'
17 sel = CSSSelector('div.chapter p, pre, h1, table.equation')
18 chapter_sel = CSSSelector('div.chapter')
20 try:
21 filename = args[0]
22 except IndexError:
23 raise IndexError("Usage: %s <path-to-html-file>" % __file__)
25 tree = etree.parse(filename, html.HTMLParser(remove_blank_text=True))
26 root = tree.getroot()
28 chapter = chapter_sel(root)[0]
29 chapter_title = chapter.get('id').split(':')[1]
30 chapter_hash = md5.new(chapter.get('id').encode('utf8')).hexdigest()
32 chapter.set('id', chapter_hash)
34 for element in sel(root):
35 hsh_source = element.text or element.get('alt') or etree.tostring(element)
37 if hsh_source:
38 hsh_source_encoded = hsh_source.encode('utf8')
39 hsh = md5.new(hsh_source_encoded).hexdigest()
40 element.set('id', '%s-%s' % (chapter_hash, hsh))
42 # create the commentable element in the DB
43 e = Element()
44 e.id = '%s-%s' % (chapter_hash, hsh)
45 e.chapter = chapter_hash
46 e.title = chapter_title
47 e.save()
51 print etree.tostring(root) # pipe to a file if you wish