theonlineoasis

As part of a literature survey on multimedia forensics, I compiled a multimedia forensics bibliography.

The pages are generated using the Django web framework for python, using content from an SQLite database which I populate using a custom Python script that parses a BibTeX file. This page shows the code listings for (1) cleaning a BibTeX file (including heuristics to make author names consistent, and reformat the file), (2) storing the cleaned entries in an SQLite database and (3) a Django web application which shows the contents of the database. The web application is unsuitable for handling many requests for the bibliography – I use it to generate pages which are then served as static HTML.

To use this code you will need to set up a Django web application and provide user interface images. I plan to provide it in a more conveniently packaged form soon.

Code samples on this website come without any guarantees – use them at your own risk! I am grateful for comments or patches sent to my email address: first name.last name@cl.cam.ac.uk.

The first step in setting up the bibliography scripts is to create a Django web application. Use the following as your models.py:

from django.db import models

class Entry(models.Model):
  identifier = models.CharField(max_length=255, db_column='id', primary_key=True)
  typename = models.CharField(max_length=255, db_column='type', editable=False)
  processed = models.BooleanField()

  def __str__(self):
    return self.identifier
    
  class Admin:
    pass

class Attribute(models.Model):
  identifier = models.ForeignKey(Entry)
  key = models.CharField(max_length=255)
  value = models.CharField(max_length=255)
  
  def __str__(self):
    return self.key
    
  class Admin:
    pass

Synchronise the database to create an empty file. This script cleans up BibTeX source code and puts the entries into the web application database generated on synchronisation.

#!/usr/bin/python
# -*- coding: utf-8 -*-

import sqlite3 as sqlite
import sys
import os
import re

# Prefix to custom BibTeX entries
customPrefix = "!"

# Keys to omit (nb. keys starting customPrefix will not be added either.)
keysOmitBibTeX = []
keysOmitWeb = []

# Whether to download all the URLs
downloadUrls = True
downloadDestination = '/tmp/bibliography_papers/'

if len(sys.argv) != 3:
  print "Usage: %s bibtexfile databasefile" % sys.argv[0]
  sys.exit(0)
  
f = open(sys.argv[1], "r")

rewriteIDs = False
entries = []
entry = {}
combine = ""

authorHomes = {}    # Author -> Home page
highlight = {}      # www_section -> colour
omitSectionWeb = [] # List of categories to omit
names = []

# Canonicalise author names with first initial only
firstInitialOnly = True

# Whether authors in double braces {{ }} should be left alone.
keepProtectedAuthors = False

# Whether to deal with authors in preformatted form A A, B B, and C C
allowPreformattedAuthors = True

# Keys which should not be included in the BibTeX code.
bibTeXExcludeKeys = ['abstract', 'www_section']

# If the entries must be in the same order as the file, store the sort order here.
storeEntrySortOrder = False
entrySortOrder = 0

# Whether to store the order of the keys in each BibTeX entry.
storeKeyOrder = False
keyOrder = []

# A utility function to apply several regular expression substitutions provided as a list of [pattern, replacement]
def applySubstitutions(entries, v):
  for (pattern, replacement) in entries:
    v = re.sub(pattern, replacement, v)
  return v

contentSubstitutions = [
  ("([^\\\\])\\\\\"", "\\1\""),           # Quotation marks
  ("--", "–"),                      # En-dash               #!
  ("---", "—"),                     # Em-dash               #!
  ("\\\\\\$", "$"),                       # Escaped dollar
  ("\\\\o", "ø"),                  # o with slash
  ("(``|'')", "\""),                      # Double quotations     #!
  ("^\\{", ""),                           # Parenthesis at start  #!
  ("([^\\\\])\\{", "\\1"),                # Parentheses           #!
  ("([^\\\\])\\}", "\\1"),                # Parentheses           #!
  ("([^\\\\])\\\\{", "\\1{"),             # Parentheses
  ("([^\\\\])\\\\}", "\\1}"),             # Parentheses
  ("{\\\\.({)?I(})?}", "İ"),         # Extended accent
  ("Ş", "Ş"),                 # Extended accent
  ("ş", "ş"),                 # Extended accent
  ("ć", "ć"),                 # Extended accent
  ("ě", "ě"),                 # Extended accent
  ("ž", "ž"),                 # Extended accent
  ("ź", "ź"),                 # Extended accent
  ("{\"\\\\i\"}", "ı"),              # Extended accent (undotted i in Azeri)
  ("(?:{)?\\\\'({)?(.)(})?(?:})?", "&\\2acute;"),   # Acute
  ("{\\\\`({)?(.)(})?}", "&\\2grave;"),   # Grave
  ("{\\\\\^({)?(.)(})?}", "&\\2circ;"),   # Circumflex
  ("(?:{)?\\\\\"({)?(.)(})?(?:})?", "&\\2uml;"),    # Umlauts
  ("{\\\\~({)?(.)(})?}", "&\\2tilde;"),   # Tilde
  ("{\\\\v({)?(.)(})?}", "&\\2caron;"),   # Caron
  ("{\\\\c({)?(.)(})?}", "&\\2cedil;"),   # Cedilla
  ("{\\\\ss{}}", "ß"),              # German sz ligature
  ("\\\\protect{(.*?)}", "\\1"),          # Remove LaTeX instructions
  ].reverse()

valueSubstitutions = [
  ("–", "--"),              # En-dash               #!
  ("—", "---"),             # Em-dash               #!
  ("ć", "\\\\'{c}"),         # Extended accent.
  ("ě", "\\\\v{e}"),         # Extended accent.
  ("ž", "\\\\v{z}"),         # Extended accent.
  ("&(.)acute;", "\\\\'{\\1}"),   # Acute
  ("&(.)grave;", "\\\\`{\\1}"),   # Grave
  ("&(.)circ;", "\\\\^{\\1}"),    # Circumflex
  ("&(.)uml;", "\\\\\"{\\1}"),    # Umlauts
  ("&(.)tilde;", "\\\\~{\\1}"),   # Tilde
  ("&(.)caron;", "\\\\v{\\1}"),   # Caron
  ("&(.)cedil;", "\\\\c{\\1}"),   # Cedilla
  ("ß", "\\\\ss{}"),        # Sz
  ].reverse()
  
titleSubstitutions = [
  ("–", "--"),              # En-dash               #!
  ("—", "---"),             # Em-dash               #!
  ("ć", "\\\\'{c}"),         # Extended accent.
  ("ě", "\\\\v{e}"),         # Extended accent.
  ("ž", "\\\\v{z}"),         # Extended accent.
  ("&(.)acute;", "\\\\'{\\1}"),   # Acute
  ("&(.)grave;", "\\\\`{\\1}"),   # Grave
  ("&(.)circ;", "\\\\^{\\1}"),    # Circumflex
  ("&(.)uml;", "\\\\\"{\\1}"),    # Umlauts
  ("&(.)tilde;", "\\\\~{\\1}"),   # Tilde
  ("&(.)caron;", "\\\\v{\\1}"),   # Caron
  ("&(.)cedil;", "\\\\c{\\1}"),   # Cedilla
  ].reverse()
  
for line in f:
  # Combining lines
  if combine != None:
    line = combine + line
    combine = None

  # Check for custom entries.
  match = re.match("^% Home: (.*?), (.*)$", line)
  if match != None:
    authorHomes[match.group(1).strip()] = match.group(2).strip()
  match = re.match("^% HighlightSection: (.*?), (.*)$", line)
  if match != None:
    highlight[match.group(1).strip()] = match.group(2).strip()
  match = re.match("^% OmitSectionWeb: (.*?)$", line)
  if match != None:
    omitSectionWeb.append(match.group(1).strip())
  match = re.match("^% StoreEntrySortOrder\s*$", line)
  if match != None:
    storeEntrySortOrder = True
  match = re.match("^% StoreKeyOrder\s*$", line)
  if match != None:
    storeKeyOrder = True
  
  # Check for the beginning of an entry.
  match = re.match("\s*@(.*?){(.*?),\s*", line)
  
  if match != None:
    entry[customPrefix + "type"] = match.group(1).lower().strip()
    entry[customPrefix + "id"] = match.group(2).strip()
    continue
  
  # End of an entry
  match = re.match("\s*}\s*$", line)
  if match != None:
    # Store the identifier if we must sort.
    if storeEntrySortOrder:
      entry['sortCounter'] = entrySortOrder
      entrySortOrder = entrySortOrder + 1
    
    if storeKeyOrder:
      entry[customPrefix + 'KeyOrder'] = ','.join(keyOrder)
      keyOrder = []
    
    # Prepare blank BibTeX entry to append later.
    entry['bibtex'] = ''
    entries.append(entry)
    entry = {}
    continue
  
  # Check for a whole entry attribute.
  match = re.match("\s*(.*?)\s*=\s*(\"|\{)(.*?[^\\\\])?(\"\s*(,\s*)?|\}\s*(,\s*)?)$", line, re.I)
  if match != None:
    content = match.group(3) if match.group(3) != None else ""
    originalContent = content
    latextitle = ""
    key = match.group(1).lower().strip()
    if storeKeyOrder:
      keyOrder.append(key)
    
    if key == 'title':
      # Fix the capitalization of the title before altering the string, so that LaTeX instructions remain.
      # nb. depend on locale here for string lengths.
      original = content
      #content = content.capitalize()
      content = re.sub("^([^A-Za-z]*)(.)", lambda m: m.group(1) + m.group(2).capitalize(), content.lower())
      latextitle = re.sub("\\{.*?\\}", lambda m: original[m.start(0):m.end(0)], content)
      content = re.sub("\\{.*?\\}", lambda m: original[m.start(0):m.end(0)], content)
      if latextitle == "":
        latextitle = " "
      
    # Process content for accents etc.
    # TODO: Synchronize with replacements below.
    latexcontent = content.strip()
    content = applySubstitutions(contentSubstitutions, content)
    entry[key] = content.strip()
    
    if latextitle != "":
      entry[customPrefix + 'latexTitle'] = latextitle
    
    if key == 'author':
      if not (originalContent.startswith('{') and originalContent.endswith('}') and keepProtectedAuthors):
        content = content.replace('.', ' ').replace('  ', ' ').replace(',,', ',')
        authors = content.split(' and ')
        newauthors = []
        
        if allowPreformattedAuthors and len(authors) == 2 and len(content.split(',')) > 3:
          authors = content.replace(' and ', ' ').split(',')
          
        if len(authors) == 1 and len(content.split(',')) > 1:
          # Detect list of author names separated by ',' with no 'and', and not last, first name.
          n = content.split(',')
          if len(n[0].strip().split(' ')) > 1 and len(n[1].strip().split(' ')) > 1:
            authors = content.split(',')
          
        for a in authors:
          # Deal with names of the form "Surname, A.", remove whitespace and control characters.
          name = a
          csep = name.split(',')
          if len(csep) == 2:
            name = "%s %s" % (csep[1].strip(), csep[0].strip())
          name = name.replace('~', '').strip()
          name = re.sub("^(.) ", "\\1. ", name)
          name = re.sub(" (.) ", " \\1. ", name)
          names.append(name)
          newauthors.append(name)
        entry[key] = " and ".join(newauthors)
      else:
        entry[key] = originalContent.replace(" and ", " \\and ")
    
    continue

  # Check for an incomplete entry attribute.
  match = re.match("\s*(.*?)\s*=\s*(\"|\{(\{)?)(.*?)\s*$", line, re.I)
  if match != None:
    combine = line
    continue

f.close()

uniquenames = {}
for name in names:
  # Canonicalise to initial(s) then surname.
  comp = name.split(' ')
  canon = ''
  if firstInitialOnly:
    canon = comp[0].strip()[0]
  else:
    canon = ' '.join(map(lambda n: n.strip()[0], comp[0 : len(comp) - 1]))
  canon = "%s %s" % (canon.strip(), comp[len(comp) - 1])
  canon = re.sub("&(.).*?;", "\\1", canon);
  if not uniquenames.has_key(canon):
    uniquenames[canon] = [canon]
  if canon != name:
    uniquenames[canon].append(name)
    
uniquefullnames = {}
for (k, v) in uniquenames.iteritems():
  longest = v[0]
  for n in v:
    if len(n) > len(longest) or (len(n) == len(longest) and '.' in longest):
      longest = n
  uniquefullnames[longest] = v

for i in range(0, len(entries)):
  entry = entries[i]
  if entry.has_key("author"):
    if not (entry["author"].startswith("{") and entry["author"].endswith("}")):
      author = entry["author"]
      newauthors = []
      for n in author.split(' and '):
        for (k, v) in uniquefullnames.iteritems():
          if n in v:
            newauthors.append(k)
            break
      entry["author"] = " and ".join(newauthors)
    else:
      # Remove parentheses.
      entry["author"] = entry["author"][1 : len(entry["author"]) - 1]

# Fix up IDs. Author1Author2..._PaperTitle
if rewriteIDs:
  for entry in entries:
    if not entry.has_key('author'):
      entry['author'] = ""
    newid = re.sub("\W", "", re.sub("\s", "_", ("%s_%s" % ('-'.join(entry["author"].split(" and ")), entry['title']))))
    entry[customPrefix + "id"] = newid
  
# Whenever required keys weren't specified, set them to a default.
for entry in entries:
  if not entry.has_key('www_section') or entry['www_section'].strip() == "":
    entry['www_section'] = "Uncategorized"

# Put cleaned entries in a new file
processed = open(sys.argv[1] + ".cleaned.bib", "w")
for entry in entries:
  if entry.has_key('options'):
    content = entry['options']
    omitLaTeX = False
    for opt in content.split(','):
      if opt.strip().lower() == 'omitlatex':
        omitLaTeX = True
    if omitLaTeX:
      continue

  line = "@%s{%s,\n" % (entry[customPrefix + "type"], entry[customPrefix + "id"])
  processed.write(line)
  entry['bibtex'] += line
  keyOrderLines = {}
  for (key, value) in entry.iteritems():
    # Format attributes, except for bibtex and customPrefix entries.
    if key == 'bibtex' or key == 'sortCounter':
      continue

    if not key.startswith(customPrefix) and key != "title":
      v = applySubstitutions(valueSubstitutions, value)
      
      if key == 'author':
        v = v.replace(" \\and ", " and ")
      
      line = "  %s%s= {%s},\n" % (key, " " * (13 - len(key)), v)
      processed.write(line)
      # TODO: Replace with more efficient concatenation using StringIO.
      # Omit abstract and www_section from entry stored in database.
      if not key in bibTeXExcludeKeys:
        if not storeKeyOrder:
          entry['bibtex'] += line
        else:
          keyOrderLines[key] = line

    if key == customPrefix + 'latexTitle':
      v = applySubstitutions(titleSubstitutions, value)

      line = "  %s%s= {%s},\n" % ("title", " " * (13 - len("title")), v)
      if not storeKeyOrder:
        # TODO: Replace with more efficient concatenation using StringIO.
        entry['bibtex'] += line
      else:
        keyOrderLines['title'] = line
      processed.write(line)
  
  # Write out ordered entry if requested.
  if storeKeyOrder:
    for key in entry[customPrefix + 'KeyOrder'].split(','):
      if not key in bibTeXExcludeKeys:
        v = keyOrderLines[key]
        entry['bibtex'] += v
  
  line = "}\n\n"
  processed.write(line)
  entry['bibtex'] += line

processed.close()

# Add in author web pages.
for entry in entries:
  if entry.has_key('author'):
    authors = entry['author'].split(' and ')
    for author in authors:
      if authorHomes.has_key(author.strip()):
        a = re.sub('[^\\w.-]', '', re.sub('\\s+', '_', author)).strip('_.- ').lower()
        entry['authorPage%s' % a] = "%s, %s" % (author, authorHomes[author])

# Add in highlighting.
# Remove sections where appropriate.
for entry in entries:
  if entry.has_key('www_section'):
    sections = entry['www_section'].split(',')
    sections = map(lambda s : s.strip(), sections)
    for s in sections:
      if highlight.has_key(s):
        entry['highlight'] = highlight[s]
        break
    for o in omitSectionWeb:
      if o in sections:
        sections.remove(o)
    entry['www_section'] = ','.join(sections)

# Put the entries in the database.
db = sys.argv[2]

if os.path.exists(db):
  i = raw_input("Database file exists. Overwrite attributes present in input file? [Y/n] ")
  if i != "" and i != "Y":
    print "Nothing changed."
    sys.exit(0)
else:
  print "Synchronize the Django model to create tables in the database before running this script. Nothing was modified."
  sys.exit(0)

connection = sqlite.connect(db)
cursor = connection.cursor()
for entry in entries:
  if entry.has_key('options'):
    content = entry['options']
    omitWeb = False
    for opt in content.split(','):
      if opt.strip().lower() == 'omitweb':
        omitWeb = True
    if omitWeb:
      continue

  identifier = entry[customPrefix + "id"].decode('utf-8')
  typename = entry[customPrefix + "type"]
  if len(cursor.execute("SELECT id FROM main_entry WHERE id = ?", (identifier,)).fetchall()) > 0:
    cursor.execute("UPDATE main_entry SET type = ? WHERE id = ?", (typename, identifier))
  else:
    cursor.execute("INSERT INTO main_entry (id, type, processed) VALUES (?, ?, ?)", (identifier, typename, 0))
  for (key, value) in entry.iteritems():
    key = key.decode('utf-8')
    value = value.decode('utf-8')
    if not key.startswith(customPrefix):
      if len(cursor.execute("SELECT identifier_id FROM main_attribute WHERE identifier_id = ? AND key = ?", (identifier, key)).fetchall()) > 0:
        cursor.execute("UPDATE main_attribute SET value = ? WHERE identifier_id = ? AND key = ?", (value, identifier, key))
      else:
        cursor.execute("INSERT INTO main_attribute (identifier_id, key, value) VALUES (?, ?, ?)", (identifier, key, value))

connection.commit()
connection.close()

#import os

#if downloadUrls:
#  for entry in entries:
#    if entry.has_key('url'):
#      #if entry['url'].endswith('pdf'):
#      #  os.system('wget -O %s %s' % (downloadDestination + entry[customPrefix + 'id'], entry['url'],))
#      if not entry['url'].endswith('pdf'):
#        print entry['url']

Here is an example urls.py to match queries with sorting.

from django.conf.urls.defaults import *
from bibliography.main.views import index, abstract

urlpatterns = patterns('',
  (r'^main/static/(.*)$', 'django.views.static.serve', {'document_root': 'template/'}),
  (r'^main/abstract/(.*?).html$', abstract),
  (r'^main/(?:(sort)?(?:/(category|author|title|type|year|subscription)(?:/(category|author|title|type|year|subscription))?)?)?(?:/)?$', index),
)

The pages are generated in this views.py file:

from django.shortcuts import render_to_response
from django.template import Context
from bibliography.main.models import Entry, Attribute
from copy import copy
import re

# Serve pages with abstracts and BibTeX entries.
def abstract(request, entryID):
  result = ""
  e = Entry.objects.filter(identifier=entryID)
  if len(e) < 1:
    abresult = btresult = "This entry is not in the database."
  else:
    # TODO: Replace this with test rather than exception handling.
    e = e[0].attribute_set.select_related()

    # Abstract
    try:
      ab = e.get(key='abstract')
      if ab == None or len(ab.value) == 0:
        abresult = "There is no abstract in the database for this entry."
      else:
        abresult = ab.value
    except:
      abresult = "There is no abstract in the database for this entry."

    # BibTeX
    try:
      bt = e.get(key='bibtex')
      if bt == None or len(bt.value) == 0:
        btresult = "There is no BibTeX in the database for this entry."
      else:
        btresult = bt.value
    except:
      btresult = "There is no BibTeX in the database for this entry."

  return render_to_response('abstract.txt', {'abstract' : abresult, 'bibtex' : btresult})

def index(request, requestType, *params):
  # Set attributes as a dictionary, and construct forms.
  entries = []
  for entry in Entry.objects.all():
    e = {}
    e['type'] = entry.typename
    e['id'] = entry.identifier

    e['attributes'] = {}
    e['processed'] = entry.processed
    authorPages = []
    for a in entry.attribute_set.select_related():
      if a.key in ['title', 'author', 'year', 'abstract', 'url', 'urltype']:
        e[a.key] = a.value.strip()
      elif a.key.startswith('authorPage'):
        authorPages.append(a.value)
      else:
        e['attributes'][a.key] = a.value

    if len(authorPages) > 0:
      e['authorPages'] = authorPages
    
    entries.append(e)
  
  # Calculate the number of entries before sorting, as they may be copied.
  entriesBeforeSort = len(entries)
  
  # Restore any custom sort order from the BibTeX.
  customSort = customCategorySort = False
  if params[0] == None and params[1] == None and entries[0].has_key('attributes') and entries[0]['attributes'].has_key('sortCounter'):
    for e in entries:
      e['attributes']['sortCounter'] = "%04d" % int(e['attributes']['sortCounter'])
    customSort = True
    customCategorySort = True
    entries.sort(key=(lambda x : x['attributes']['sortCounter']))
  
  # Whether to show extra headings.
  headings = None
  
  # Sorted links
  sorting = {'author'       : {'url' : 'author', 'text' : 'author', 'sortKeyFunction' : (lambda a : lastWord(a['sort']).lower() + a['sort'].lower()), 'entryList' : (lambda entry : entry['author'].split(' and ') if entry.has_key('author') else ['']), 'sortHeadingText' : {None : 'None'}}, 
#             'firstauthor'  : {'url' : 'firstauthor', 'text' : 'first author', 'sortKeyFunction' : (lambda a : lastWord(a['sort'].split(' and ', 1)[0])), 'sortKey' : 'author', 'suppressHeadings' : True},
#             'read'         : {'url' : 'read', 'text' : 'read/unread', 'sortKey' : 'processed', 'sortHeadingText' : {True : 'Read', False : 'Unread'}},
             'title'        : {'url' : 'title', 'text' : 'title', 'sortKey' : 'title', 'suppressHeadings' : True},
             # 'type'         : {'url' : 'type', 'text' : 'type', 'sortKey' : 'type'},
             'year'         : {'url' : 'year', 'text' : 'year', 'sortKey' : 'year', 'reverse' : True},
             # 'subscription' : {'url' : 'subscription', 'text' : 'subscription', 'sortKey' : 'urltype', 'suppressHeadings' : False, 'sortHeadingText' : {None : 'Open access'}},
             'category'     : {'url' : 'category', 'text' : 'category', 'entryList' : (lambda entry : map(lambda s: s.strip(), entry['attributes']['www_section'].split(',')))}}
             
  # Defaults
  if requestType == None:
    requestType = 'sort'
  params = list(params)
  if params[0] == None and requestType == 'sort':
    params[0] = 'category'
  if params[1] == None and requestType == 'sort':
    params[1] = 'year'
  
  # Custom category sort
  csort = {}
  if customCategorySort:
    for e in entries:
      cats = e['attributes']['www_section'].split(',')
      for c in cats:
        if not c.strip() in csort.keys():
          csort[c.strip()] = "%04d%s" % (int(e['attributes']['sortCounter']), c.strip())
    
    for e in entries:
      cats = e['attributes']['www_section'].split(',')
      for c in cats:
        e['attributes']['www_section'] = e['attributes']['www_section'].replace(c, csort[c.strip()])
  
  # Sort the entries.
  sortHeadings = []
  if requestType == 'sort':
    primarySort = None if params[0] == None else sorting[params[0]]
    secondarySort = None if params[1] == None else sorting[params[1]]

    for s in (secondarySort, primarySort):
      if s == None:
        continue
      
      if s.has_key('entryList'):
        sortEntries = []
        for entry in entries:
          for a in s['entryList'](entry):
            e = copy(entry)
            e['sort'] = a
            sortEntries.append(e)
        entries = sortEntries
      elif s.has_key('sortKey'):
        # Copy sort key into 'sort' attribute value.
        for e in entries:
          if not e.has_key(s['sortKey']):
            e[s['sortKey']] = None
          e['sort'] = e[s['sortKey']]

      if not s.has_key('reverse'):
        s['reverse'] = False
      
      # Do the sort, then set the sorting by that parameter as enabled.
      if s.has_key('sortKeyFunction'):
        entries.sort(key=s['sortKeyFunction'], reverse=s['reverse'])
      else:
        entries.sort(key=(lambda x : x['sort']), reverse=s['reverse'])
    
    # Remove annotations if this is custom category sorting.
    if customCategorySort:
      for e in entries:
        cats = e['attributes']['www_section'].split(',')
        for c in cats:
          # Remove four digit number annotation.
          e['attributes']['www_section'] = e['attributes']['www_section'].replace(c, c[4 : len(c)])
          e['sort'] = e['sort'].replace(c, c[4 : len(c)])
  
    # Remove escaping of 'and' in atomic author names.
    for e in entries:
      if e.has_key('author'):
        # Unescape '\and' in authors
        if e['author'].find(' \\and ') != -1:
          if e.has_key('author'):
            e['author'] = e['author'].replace(' \\and ', ' and ')
          if e.has_key('sort'):
            e['sort'] = e['sort'].replace(' \\and ', ' and ')
        else:
          # Reformat author string.
          e['author'] = e['author'].replace(' and ', ', ')
          if e['author'].rfind(',') != -1:
            e['author'] = e['author'][0 : e['author'].rfind(',')] + ' and' + e['author'][e['author'].rfind(',') + 1:]

    # Put in headings for sort as required.
    if primarySort.has_key('sortHeadingText'):
      for e in entries:
        if primarySort['sortHeadingText'].has_key(e['sort']):
          e['sort'] = primarySort['sortHeadingText'][e['sort']]
    elif primarySort.has_key('suppressHeadings') and primarySort['suppressHeadings'] == True:
      for e in entries:
        e['sort'] = None

    # Make headings list.
    for e in entries:
      if not e['sort'] == None:
        # Use non-breaking spaces in sort text.
        item = [e['sort'], e['sort'].replace(' ', ' ')]
        if not item in sortHeadings:
          sortHeadings.append(item)
    
    primarySort['enabled'] = True
    if secondarySort != None:
      secondarySort['secondaryEnabled'] = True
    
  # Remaining post-processing.
  for e in entries:
    # Author home pages
    if e.has_key('authorPages'):
      authorPages = e['authorPages']
      for a in authorPages:
        s = a.split(', ', 1)
        e['author'] = e['author'].replace(s[0], '<a href=\"%s\">%s</a>' % (s[1], s[0]))
  
  # Link icons:
  iconMapping = {'Web page' : 'reportlink', 'Default' : 'report', 'Subscription' : 'reportkey', 'Book' : 'book'}
  for e in entries:
    e['urlIcon'] = iconMapping[e['urltype']] if iconMapping.has_key(e['urltype'] if e.has_key('urltype') else None) else iconMapping['Default']
  
  return render_to_response('main.html', Context({'entries': entries, 'entriesBeforeSort': entriesBeforeSort, 'sorting': sorting, 'sortHeadings': sortHeadings, 'primarySort': params[0], 'secondarySort': params[1]}));

# Returns the last word of an entities sort value, having replaced character entities.
def lastWord(a):
  b = re.sub('&(.)(.*?);','\\1', a).rsplit(' ', 1)
  return b[len(b) - 1]

I use the following template for my bibliography:

<html>
  <head>
    <title>Bibliography</title>
    <script type="text/javascript" src="http://www.theonlineoasis.co.uk/main/static/scripts.js"></script>
    <link rel="stylesheet" href="http://www.theonlineoasis.co.uk/main/static/styles.css"></link>
  </head>
  <body>
    <a name="top"></a><div id="central">
      {% autoescape off %}

      <p>There are {{ entriesBeforeSort }} entries.</p>
      <div id="sortOptions">
        <ul class="sortList">
          <li><b>Sort by</b></li>
          {% for kv in sorting.items %}
            <li class="{% if forloop.last %}sortItemLast{% else %}sortItem{% endif %}">
              {% if kv.1.enabled %}
                <span class="sortEnabled">{{ kv.1.text }}</span>
              {% else %}
                <a href="http://www.theonlineoasis.co.uk/main/sort/{{ kv.1.url }}.html">{{ kv.1.text }}</a>
              {% endif %}
            </li>
          {% endfor %}
        </ul>
      </div>
      
      {% if sortHeadings %}
        <div id="sectionList">
          <ul class="sortList">
            {% for sortHeading in sortHeadings %}
              <li class="{% if forloop.last %}sortItemLast{% else %}sortItem{% endif %}">
                <a href="#{{ sortHeading.0|slugify }}">{{ sortHeading.1 }}</a>
              </li>
            {% endfor %}
          </ul>
        </div>
      {% endif %}
      
      {% for entry in entries %}

        {% if entry.sort %}
          {% ifchanged entry.sort %}
            <p class="up">
              <a href="#top">top</a>
            </p>
            <a name="{{ entry.sort|slugify }}"></a><h2>{{ entry.sort }}</h2>
          {% endifchanged %}
        {% else %}
          {% if forloop.first %}
          <h2></h2>
          {% endif %}
        {% endif %}
        
        <div class="entryContainer">
          <a name="entry-{{ entry.id }}"></a>
          <h3>
            <div class="author">
              <span onclick="toggleView(event); if (document.getElementById('abstract{{ forloop.counter0 }}').innerHTML.length == 0) {loadAbstract('{{ entry.id }}', 'abstract{{ forloop.counter0 }}')}; return false;"><img src="http://www.theonlineoasis.co.uk/main/static/expand.png" alt="Expand this entry" /><img style="display: none;" src="http://www.theonlineoasis.co.uk/main/static/collapse.png" /></span>{{ entry.author }}
            </div>

            <img src="http://www.theonlineoasis.co.uk/main/static/{{ entry.urlIcon }}.png" alt="View document" /><a onclick="event.cancelBubble = true;" class="unread" href="{% if entry.url %}{{ entry.url }}{% else %}#entry-{{ entry.id }}{% endif %}">{{ entry.title }}</a>

            {% ifnotequal entry.year None %}
              <span class="entryYear">
                {{ entry.year }}
              </span>
            {% endifnotequal %}
          </h3>
          <div class="expandCollapse">
            <p style="display: none;"><a href="http://www.theonlineoasis.co.uk/main/abstract/{{ entry.id }}.html">View abstract and BibTeX</a></p>
            <div id="abstract{{ forloop.counter0 }}"></div>
          </div>
        </div>

      {% endfor %}

      {% endautoescape %}
    </div>
  </body>
</html>

Abstracts use this template:

<p class="abstract"><b>Abstract:</b> {{ abstract }}</p>

<pre class="bibtex"><code>{{ bibtex }}</code></pre>

A link next to each entry title shows/hides its abstract. This is implemented using HTTP requests in Javascript:

// Toggle viewing an abstract.
function toggleView(e)
{
  var p = e.target;
  var im = p;
  if (p == null)
    p = e.srcElement;
  var n = null;
  while (n == null)
  {
    p = p.parentNode;
    var nodeList = p.childNodes;
    for (var i = 0; i < nodeList.length; i++)
    {
      if (nodeList[i].className == "expandCollapse")
      {
        n = nodeList[i];
        break;
      }
    }
  }

  var element = n;
  c = n.style.display;
  if (c == 'block')
  {
    element.style.display = 'none';
    im.src = '/main/static/expand.png';
  }
  else
  {
    element.style.display = 'block';
    im.src = '/main/static/collapse.png';
  }
}

function loadAbstract(identifier, destination)
{
  var pr = false;
  if (window.XMLHttpRequest)
  {
    pr = new XMLHttpRequest()
  }
  else if (window.ActiveXObject)
  {
    // Microsoft IE 6
    try
    {
      pr = new ActiveXObject("Msxml2.XMLHTTP");
    }
    catch (e)
    {
      try
      {
        pr = new ActiveXObject("Microsoft.XMLHTTP");
      }
      catch (e)
      {
      }
    }
  }
  else
    return false;
  pr.onreadystatechange = function() {loadPage(pr, destination);};
  document.getElementById(destination).innerHTML = "Loading";
  pr.open("GET", "/main/abstract/" + identifier + ".html", true);
  pr.send(null);
}

function loadPage(pr, destination)
{
  if (pr.readyState == 4 && (pr.status == 200 || window.location.href.indexOf("http") == -1))
  {
    document.getElementById(destination).innerHTML = pr.responseText;
  }
}

I use the following stylesheet in my bibliography:

body
{
  font-family: sans-serif;
  font-size: 13px;
}

a
{
  color: #2030A0;
  text-decoration: none;
}

img
{
  border: 0;
  margin: 0px 5px 6px 2px;
  vertical-align: top;
}

p img
{
  margin: 0px;
  vertical-align: top;
}

h1
{
  color: #304050;
  font-size: 1.3em;
}

h2
{
  border-bottom: 2px solid #E0E0E0;
  color: #304050;
  font-size: 13px;
  padding-bottom: 2px;
  margin-bottom: 0px;
}

h3
{
  font-size: 13px;
  margin: 0;
}

h3 a
{
  text-decoration: none;
}

.author span
{
  cursor: pointer;
}

.author
{
  font-weight: normal;
}

.entryContainer 
{
  border-bottom: 1px solid #E0E0E0;
  padding-top: 5px;
  margin: 0;
}

.entryYear
{
  font-style: italic;
  font-weight: normal;
}

.expandCollapse
{
  display: none;
}

.expandLink
{
  float: left;
  margin-left: -10px;
  margin-bottom: 10px;
}

.sortEnabled
{
  border-bottom: 1px solid #E0E0E0;
  padding: 2px 2px;
}

pre.bibtex
{
  font-size: 0.8em;
  margin: 5px auto 10px auto;
  width: 1040px;
}

p.abstract
{
  font-size: 0.8em;
  text-align: justify;
  margin: 5px auto 10px auto;
  width: 1040px;
}

p.up
{
  float: right;
  text-align: right;
}

p.up a
{
  color: #909090;
  text-decoration: none;
}

ul.sortList li
{
  display: inline;
  list-style: none;
  margin-left: 5px;
}

ul.sortList li a
{
}

ul.sortList li.sortItem
{
  border-right: 1px solid black;
  padding: 0 5px;
}

ul.sortList li.sortItemLast
{
  border-right: 0;
}

ul.sortList
{
  display: inline;
  margin-left: 0;
  padding-left: 0;
}

#central
{
  margin: 0 auto;
  width: 1100px;
}

#sectionList
{
  padding: 10px;
  text-align: center;
}

#sectionList a
{
  text-decoration: none;
}

#sortOptions 
{
  margin-bottom: 5px;
  text-align: center;
}

#sortOptions2
{
  margin-top: 5px;
  text-align: center;
}