import os import time import sys import string # all characters besides lowercase characters NLOWER = [i for i in string.printable if i not in string.ascii_lowercase] HTML_HEADER = """ &_FTITLE """ HTML_BOILER_MAINHEAD = """ &_FCONTENTS """ HTML_BOILER_BODY = """ &_FBODY """ def read_file(name): try: x = open(name, 'r') c = x.read() x.close() return c except: return None def parse_article(filepath): c = read_file(filepath) if c is None: return None metadata = {} have_content = False metadata['raw_body'] = '' metadata['sections'] = [] metadata['content_list'] = '