hgbook

view web/hgbook/converter.py @ 892:86073756fe77

Register script to insert ids and generate the associated elements in the database.
author dukebody <dukebody@gmail.com>
date Mon Oct 19 20:01:15 2009 +0200 (2009-10-19)
parents
children 40e41b3eeaf6
line source
1 from lxml import etree
2 from lxml import html
3 from lxml.cssselect import CSSSelector
4 import md5
5 import sys
8 args = sys.argv[1:]
10 # django stuff
11 from django.core.management import setup_environ
12 import settings # Assumed to be in the same directory.
13 setup_environ(settings) # ugly django collateral effects :(
14 from comments.models import Element
16 doc_id = 'MMSC'
17 sel = CSSSelector('p, pre, h1, table.equation')
18 body = CSSSelector('body')
20 filename = args[0]
21 tree = etree.parse(filename, html.HTMLParser())
22 root = tree.getroot()
25 body(root)[0].set('id', doc_id)
27 for element in sel(root):
28 hsh_source = element.text or element.get('alt') or etree.tostring(element)
30 if hsh_source:
31 hsh_source_encoded = hsh_source.encode('utf8')
32 hsh = md5.new(hsh_source_encoded).hexdigest()
33 element.set('id', '%s-%s' % (doc_id, hsh))
35 # create the commentable element in the DB
36 e = Element()
37 e.id = '%s-%s' % (doc_id, hsh)
38 e.chapter = doc_id
39 e.title = hsh
40 e.save()
44 print etree.tostring(root) # pipe to a file if you wish