|
|
#!/usr/bin/env python3
# Assumes that there's a directory named ~/src/www-home which is a git repo # that the contents of output/ can be copied to, committed, & pushed to the # production server.
# TODO: in template.html, add apple touch icon, maybe other favicon sizes.
# Requirements: # sudo apt install python3-markdown # sudo apt install python3-smartypants # sudo apt install python3-bs4
import argparse from bs4 import BeautifulSoup import datetime import glob import html from io import StringIO import markdown import operator import os import re import shutil
input_directory = 'content' static_directory = 'static' output_directory = 'output' deploy_directory = '~/src/www-home'
md_extensions = [ 'fenced_code', 'codehilite', 'nl2br', 'toc', 'smarty', 'tables', 'linkify']
blog_entries = []
def print_file(in_file, out_file): print('%-62s -> %s' % (in_file, out_file))
def copy_static_files(): for (dirpath, _, filenames) in os.walk(static_directory): for filename in filenames: source = os.path.join(dirpath, filename) out_path = dirpath.replace(static_directory, '', 1) out_path = out_path.lstrip('/') dest_dir = os.path.join(output_directory, out_path) os.makedirs(dest_dir, exist_ok=True) dest = os.path.join(dest_dir, filename) print_file(source, dest) shutil.copy2(source, dest)
def copy_style_css(): style_source = os.path.join(static_directory, 'style.css') style_dest = os.path.join(output_directory, 'style.css') print_file(style_source, style_dest) shutil.copy2(style_source, style_dest)
def find_update_date(text): match = re.search(r'^\*?Posted (\d{4}-\d{2}-\d{2})', text, re.MULTILINE) if not match: return None return match.group(1)
def find_summary(html_content): text = BeautifulSoup(html_content, features='lxml').get_text() lines = text.split('\n') result = ' '.join(lines[2:4]) return html.escape(result, quote=False)
def process_markdown_files(): template = open('template.html').read() for (dirpath, _, filenames) in os.walk(input_directory): for filename in filenames: markdown_filename = os.path.join(dirpath, filename) if not markdown_filename.endswith('.md'): continue
blog_entry = {}
markdown_file = open(markdown_filename) text = markdown_file.read() markdown_file.close()
if not text.startswith('# '): text = '# ' + text
match = re.match(r'^(.*?)\n', text) if match: title = match.group(1).lstrip('# ') else: title = text
blog_entry['title'] = html.escape(title, quote=False)
title += ' | Colin McMillen' if markdown_filename == os.path.join(input_directory, 'index.md'): title = 'Colin McMillen'
out_filename = os.path.basename(markdown_filename).replace('.md', '.html')
out_dirpath = os.path.join(output_directory, dirpath) out_dirpath = out_dirpath.replace('/content', '', 1) out_fullpath = os.path.join(out_dirpath, out_filename) page_url = out_fullpath.replace('output/', '', 1) if page_url.endswith('index.html'): # strip off index.html page_url = page_url[:-len('index.html')]
html_content = markdown.markdown( text, extensions=md_extensions, output_format='html5') output = template.format( title=title, content=html_content, page_url=page_url)
update_date = find_update_date(text) if update_date: blog_entry['url'] = 'https://www.mcmillen.dev/' + page_url blog_entry['date'] = update_date blog_entry['summary'] = find_summary(html_content) blog_entry['content'] = html_content blog_entries.append(blog_entry)
os.makedirs(out_dirpath, exist_ok=True) print_file(markdown_filename, out_fullpath) out_file = open(out_fullpath, 'w') out_file.write(output) out_file.close()
def make_sitemap(): sitemap_command = ' '.join("""
find output -regextype posix-extended -regex '.*.(html|pdf)$' | grep -v ^output/google | grep -v ^output/drafts | perl -pe 's|output|https://www.mcmillen.dev|' > output/sitemap.txt""".split('\n'))
print_file('', 'output/sitemap.txt') os.system(sitemap_command)
def make_atom_feed(): atom_template = '''<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Colin McMillen's Blog</title> <link href="https://www.mcmillen.dev"/> <link rel="self" href="https://www.mcmillen.dev/feed.atom"/> <updated>{last_update}</updated> <author> <name>Colin McMillen</name> </author> <id>https://www.mcmillen.dev/</id>
{entries} </feed> '''
entry_template = '''
<entry> <title>{title}</title> <id>{url}</id> <link rel="alternate" href="{url}"/> <content type="html"> <![CDATA[ {content} ]]> </content> <updated>{updated}</updated> </entry> '''
blog_entries.sort(key=operator.itemgetter('date'))
entries_io = StringIO() for entry in blog_entries: # We lie and pretend that all entries were written at noon EST. update_date = entry['date'] + 'T12:00:00-04:00' entries_io.write(entry_template.format( url=entry['url'], title=entry['title'], updated=update_date, summary=entry['summary'], content=entry['content']))
entries_text = entries_io.getvalue()
now = datetime.datetime.now() last_update = datetime.datetime.isoformat(now, 'T', 'seconds') + '-04:00' atom_feed = atom_template.format( last_update=last_update, entries=entries_io.getvalue()) entries_io.close()
atom_filename = os.path.join(output_directory, 'feed.atom') print_file('', atom_filename) atom_file = open(atom_filename, 'w') atom_file.write(atom_feed) atom_file.close()
def copy_site(): os.system('cp -r output/* %s' % deploy_directory)
def deploy_site(): copy_site() os.chdir(os.path.expanduser(deploy_directory)) os.system('git add .') os.system('git commit -m "automated update from build.py"') os.system('git push')
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--clean', action='store_true', help='wipe the output directory before running') parser.add_argument( '--fast', action='store_true', help='only rebuild content files') parser.add_argument( '--copy', action='store_true', help='copy output files to www-home git repo') parser.add_argument( '--deploy', action='store_true', help='deploy the site by pushing the www-home git repo to production') args = parser.parse_args()
if args.clean: shutil.rmtree(output_directory) os.makedirs(output_directory, exist_ok=True) if args.fast: copy_style_css() else: copy_static_files() process_markdown_files() make_sitemap() make_atom_feed()
if args.copy and not args.deploy: copy_site()
if args.deploy: if args.fast: print('cowardly refusing to deploy a site that was built with --fast') else: deploy_site()
if __name__ == '__main__': main()
|