#!/usr/bin/env python import sys try: import markdown import jinja2 except ImportError: print "You must have the Markdown and Jinja2 python libraries installed." sys.exit(-1) import os.path from HTMLParser import HTMLParser BASE_DIR = os.path.abspath(os.path.dirname(__file__)) SRC_DIR = os.path.join(BASE_DIR, 'src') OUT_DIR = os.path.join(BASE_DIR, 'out', 'doc') class TOCParser(HTMLParser): def __init__(self, tags=['h1', 'h2', 'h3']): HTMLParser.__init__(self) self.tags = tags self.toc = [] self.output = [] self.accum = None self.header_index = 1 self.online = True # Are we outputting the content we find? self.title = None def _convert_attr(self, attr): return "".join([ ' %s="%s"' % (key, value) for key, value in attr ]) def handle_starttag(self, tag, attr): if tag in self.tags: # Start tracking content assert self.accum is None self.accum = [] # Check if we're the first top level (interpret that as # the page title). level = self.tags.index(tag) if level == 0 and self.title is None: self.online = False return # Add a reference to this header id = 'h_%d' % self.header_index attr.append(('id', id)) # Link back to the top, unless we're already there. if self.header_index > 1: self.output.append('
') self.output.append( '
'+ '▲
' ) # Accumulate the transformed version content = "<%s%s>" % (tag, self._convert_attr(attr)) self.output.append(content) def handle_startendtag(self, tag, attr): if self.online: content = "<%s%s>" % (tag, self._convert_attr(attr)) self.output.append(content) def handle_entityref(self, name): if self.online: self.output.append("&%s;" % name) def handle_charref(self, name): if self.online: self.output.append("&#%s;" % name) def handle_data(self, data): if self.accum is not None: self.accum.append(data) if self.online: self.output.append(data) def handle_endtag(self, tag): if tag in self.tags: content = "".join(self.accum) level = self.tags.index(tag) self.accum = None if level == 0 and self.title is None: self.online = True self.title = content return self.toc.append((level, '#h_%d' % self.header_index, content)) self.output.append("" % tag) if self.header_index > 1: self.output.append('
') self.header_index += 1 else: self.output.append("" % tag) def get_html(self): return "".join(self.output) def build(): # Make sure we have the output directory if not os.path.exists(OUT_DIR): os.makedirs(OUT_DIR) # Find the template we'll use env = jinja2.Environment(loader=jinja2.FileSystemLoader(SRC_DIR)) template = env.get_template('base.html') # Convert all our markdown source files. for filename in os.listdir(SRC_DIR): if filename.endswith('.md'): # Load the markdown, convert it md = open(os.path.join(SRC_DIR, filename),'r').read() html = markdown.markdown(md) # Parse the generated HTML to extract the TOC parser = TOCParser() parser.feed(html) parser.close() # Create the final result result = template.render( title = parser.title, content = parser.get_html(), toc = parser.toc ) # Write it out destination = os.path.join(OUT_DIR, "%s.html" % filename[:-3]) open(destination, 'w').write(result) print "Built: %s" % destination if __name__ == '__main__': build()