hgbook

changeset 892:86073756fe77

Register script to insert ids and generate the associated elements in the database.
author dukebody <dukebody@gmail.com>
date Mon Oct 19 20:01:15 2009 +0200 (2009-10-19)
parents 2aebffe8609d
children 40e41b3eeaf6
files web/hgbook/converter.py web/hgbook/urls.py
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/web/hgbook/converter.py	Mon Oct 19 20:01:15 2009 +0200
     1.3 @@ -0,0 +1,45 @@
     1.4 +from lxml import etree
     1.5 +from lxml import html
     1.6 +from lxml.cssselect import CSSSelector
     1.7 +import md5
     1.8 +import sys
     1.9 +
    1.10 +
    1.11 +args = sys.argv[1:]
    1.12 +
    1.13 +# django stuff
    1.14 +from django.core.management import setup_environ
    1.15 +import settings # Assumed to be in the same directory.
    1.16 +setup_environ(settings)       # ugly django collateral effects :(
    1.17 +from comments.models import Element
    1.18 +
    1.19 +doc_id = 'MMSC'
    1.20 +sel = CSSSelector('p, pre, h1, table.equation')
    1.21 +body = CSSSelector('body')
    1.22 +
    1.23 +filename = args[0]
    1.24 +tree = etree.parse(filename, html.HTMLParser())
    1.25 +root = tree.getroot()
    1.26 +
    1.27 +
    1.28 +body(root)[0].set('id', doc_id)
    1.29 +
    1.30 +for element in sel(root):
    1.31 +    hsh_source = element.text or element.get('alt') or etree.tostring(element)
    1.32 +
    1.33 +    if hsh_source:
    1.34 +        hsh_source_encoded = hsh_source.encode('utf8')
    1.35 +        hsh = md5.new(hsh_source_encoded).hexdigest()
    1.36 +        element.set('id', '%s-%s' % (doc_id, hsh))
    1.37 +    
    1.38 +        # create the commentable element in the DB
    1.39 +        e = Element()
    1.40 +        e.id = '%s-%s' % (doc_id, hsh)
    1.41 +        e.chapter = doc_id
    1.42 +        e.title = hsh
    1.43 +        e.save()
    1.44 +
    1.45 +
    1.46 +
    1.47 +print etree.tostring(root)      # pipe to a file if you wish
    1.48 +
     2.1 --- a/web/hgbook/urls.py	Mon Oct 19 20:00:01 2009 +0200
     2.2 +++ b/web/hgbook/urls.py	Mon Oct 19 20:01:15 2009 +0200
     2.3 @@ -1,4 +1,4 @@
     2.4 -import os
     2.5 +import os, sys
     2.6  from django.conf.urls.defaults import *
     2.7  import hgbook.comments.feeds as feeds
     2.8  from django.contrib import admin
     2.9 @@ -16,9 +16,12 @@
    2.10       {'feed_dict': feeds}),          
    2.11  
    2.12      # Only uncomment this for local testing without Apache.
    2.13 -    # (r'^html/(?P<path>.*)$', 'django.views.static.serve',
    2.14 -    # {'document_root': os.path.realpath(os.path.dirname(
    2.15 -    #    sys.modules[__name__].__file__) + '/../../en/html')}),
    2.16 +     (r'^html/(?P<path>.*)$', 'django.views.static.serve',
    2.17 +     {'document_root': os.path.realpath(os.path.dirname(
    2.18 +        sys.modules[__name__].__file__) + '/../html')}),
    2.19 +     (r'^support/(?P<path>.*)$', 'django.views.static.serve',
    2.20 +     {'document_root': os.path.realpath(os.path.dirname(
    2.21 +        sys.modules[__name__].__file__) + '/../javascript')}),
    2.22  
    2.23      # Uncomment this for admin:
    2.24      (r'^admin/(.*)', admin.site.root),