www-builder/build.py


								#!/usr/bin/env python3


								# Assumes that there's a directory named ~/src/www-home which is a git repo

								# that the contents of output/ can be copied to, committed, & pushed to the

								# production server.


								# TODO: in template.html, add apple touch icon, maybe other favicon sizes.


								# Requirements:

								# sudo apt install python3-markdown

								# sudo apt install python3-smartypants

								# sudo apt install python3-bs4


								import argparse

								from bs4 import BeautifulSoup

								import datetime

								import glob

								import html

								from io import StringIO

								import markdown

								import operator

								import os

								import re

								import shutil


								input_directory = 'content'

								static_directory = 'static'

								output_directory = 'output'

								deploy_directory = '~/src/www-home'


								md_extensions = [

								    'fenced_code', 'codehilite', 'nl2br', 'toc', 'smarty', 'tables', 'linkify']


								blog_entries = []


								def print_file(in_file, out_file):

								  print('%-62s -> %s' % (in_file, out_file))


								def copy_static_files():

								  for (dirpath, _, filenames) in os.walk(static_directory):

								    for filename in filenames:

								      source = os.path.join(dirpath, filename)

								      out_path = dirpath.replace(static_directory, '', 1)

								      out_path = out_path.lstrip('/')

								      dest_dir = os.path.join(output_directory, out_path)

								      os.makedirs(dest_dir, exist_ok=True)

								      dest = os.path.join(dest_dir, filename)

								      print_file(source, dest)

								      shutil.copy2(source, dest)


								def copy_style_css():

								  style_source = os.path.join(static_directory, 'style.css')

								  style_dest = os.path.join(output_directory, 'style.css')

								  print_file(style_source, style_dest)

								  shutil.copy2(style_source, style_dest)


								def find_update_date(text):

								  match = re.search(r'^\*?Posted (\d{4}-\d{2}-\d{2})', text, re.MULTILINE)

								  if not match:

								    return None

								  return match.group(1)


								def find_summary(html_content):

								  text = BeautifulSoup(html_content, features='lxml').get_text()

								  lines = text.split('\n')

								  result = ' '.join(lines[2:4])

								  return html.escape(result, quote=False)


								def process_markdown_files():

								  template = open('template.html').read()

								  for (dirpath, _, filenames) in os.walk(input_directory):

								    for filename in filenames:

								      markdown_filename = os.path.join(dirpath, filename)

								      if not markdown_filename.endswith('.md'):

								        continue


								      blog_entry = {}


								      markdown_file = open(markdown_filename)

								      text = markdown_file.read()

								      markdown_file.close()


								      if not text.startswith('# '):

								        text = '# ' + text


								      match = re.match(r'^(.*?)\n', text)

								      if match:

								        title = match.group(1).lstrip('# ')

								      else:

								        title = text


								      blog_entry['title'] = html.escape(title, quote=False)


								      title += ' | Colin McMillen'

								      if markdown_filename == os.path.join(input_directory, 'index.md'):

								        title = 'Colin McMillen'


								      out_filename = os.path.basename(markdown_filename).replace('.md', '.html')


								      out_dirpath = os.path.join(output_directory, dirpath)

								      out_dirpath = out_dirpath.replace('/content', '', 1)

								      out_fullpath = os.path.join(out_dirpath, out_filename)

								      page_url = out_fullpath.replace('output/', '', 1)

								      if page_url.endswith('index.html'):  # strip off index.html

								        page_url = page_url[:-len('index.html')]


								      html_content = markdown.markdown(

								          text, extensions=md_extensions, output_format='html5')

								      summary = find_summary(html_content)

								      output = template.format(

								          title=title, content=html_content, page_url=page_url, summary=summary)


								      update_date = find_update_date(text)

								      if update_date:

								        blog_entry['url'] = 'https://www.mcmillen.dev/' + page_url

								        blog_entry['date'] = update_date

								        blog_entry['summary'] = summary

								        blog_entry['content'] = html_content

								        blog_entries.append(blog_entry)


								      os.makedirs(out_dirpath, exist_ok=True)

								      print_file(markdown_filename, out_fullpath)

								      out_file = open(out_fullpath, 'w')

								      out_file.write(output)

								      out_file.close()


								def make_sitemap():

								  sitemap_command = ' '.join("""

								find output -regextype posix-extended -regex '.*.(html|pdf)$' |

								grep -v ^output/google |

								grep -v ^output/drafts |

								perl -pe 's|output|https://www.mcmillen.dev|'

								> output/sitemap.txt""".split('\n'))

								  print_file('', 'output/sitemap.txt')

								  os.system(sitemap_command)


								def make_atom_feed():

								  atom_template = '''<?xml version="1.0" encoding="utf-8"?>

								<feed xmlns="http://www.w3.org/2005/Atom">


								  <title>Colin McMillen's Blog</title>

								  <link href="https://www.mcmillen.dev"/>

								  <link rel="self" href="https://www.mcmillen.dev/feed.atom"/>

								  <updated>{last_update}</updated>

								  <author>

								    <name>Colin McMillen</name>

								  </author>

								  <id>https://www.mcmillen.dev/</id>


								  {entries}

								</feed>

								'''


								  entry_template = '''

								  <entry>

								    <title>{title}</title>

								    <id>{url}</id>

								    <link rel="alternate" href="{url}"/>

								    <content type="html">

								    <![CDATA[

								      {content}

								    ]]>

								    </content>

								    <updated>{updated}</updated>

								  </entry>

								'''


								  blog_entries.sort(key=operator.itemgetter('date'))


								  entries_io = StringIO()

								  for entry in blog_entries:

								    # We lie and pretend that all entries were written at noon EST.

								    update_date = entry['date'] + 'T12:00:00-04:00'

								    entries_io.write(entry_template.format(

								        url=entry['url'],

								        title=entry['title'],

								        updated=update_date,

								        summary=entry['summary'],

								        content=entry['content']))


								  entries_text = entries_io.getvalue()


								  now = datetime.datetime.now()

								  last_update = datetime.datetime.isoformat(now, 'T', 'seconds') + '-04:00'

								  atom_feed = atom_template.format(

								      last_update=last_update,

								      entries=entries_io.getvalue())

								  entries_io.close()


								  atom_filename = os.path.join(output_directory, 'feed.atom')

								  print_file('', atom_filename)

								  atom_file = open(atom_filename, 'w')

								  atom_file.write(atom_feed)

								  atom_file.close()


								def make_blog_index():

								  blog_entries.sort(key=operator.itemgetter('date'), reverse=True)

								  markdown_text = "# mcmillen's blog\n\n"

								  markdown_text += "Don't forget to follow with your favorite "

								  markdown_text += "[RSS reader](https://mcmillen.dev/feed.atom)!\n\n"

								  for blog_entry in blog_entries:

								    if blog_entry['date'].startswith('2007'):

								      continue

								    url = blog_entry['url'].replace('https://www.mcmillen.dev/blog/', '')

								    markdown_text += '### [%s](%s) (%s)\n\n%s\n\n' % (

								        blog_entry['title'], url, blog_entry['date'], blog_entry['summary'])

								  html_content = markdown.markdown(

								      markdown_text, extensions=md_extensions, output_format='html5')


								  template = open('template.html').read()

								  output = template.format(

								      title="Colin McMillen's Blog",

								      content=html_content,

								      page_url='https://mcmillen.dev/blog/',

								      summary="Colin McMillen's Blog")


								  index_filename = os.path.join(output_directory, 'blog', 'index.html')

								  index_file = open(index_filename, 'w')

								  index_file.write(output)

								  index_file.close()


								def copy_site():

								  os.system('cp -r output/* %s' % deploy_directory)


								def deploy_site():

								  copy_site()

								  os.chdir(os.path.expanduser(deploy_directory))

								  os.system('git add .')

								  os.system('git commit -m "automated update from build.py"')

								  os.system('git push')


								def main():

								  parser = argparse.ArgumentParser()

								  parser.add_argument(

								      '--clean', action='store_true',

								      help='wipe the output directory before running')

								  parser.add_argument(

								      '--fast', action='store_true',

								      help='only rebuild content files')

								  parser.add_argument(

								      '--copy', action='store_true',

								      help='copy output files to www-home git repo')

								  parser.add_argument(

								      '--deploy', action='store_true',

								      help='deploy the site by pushing the www-home git repo to production')

								  args = parser.parse_args()


								  if args.clean:

								    shutil.rmtree(output_directory)

								  os.makedirs(output_directory, exist_ok=True)

								  if args.fast:

								    copy_style_css()

								  else:

								    copy_static_files()

								  process_markdown_files()

								  make_sitemap()

								  make_atom_feed()

								  make_blog_index()


								  if args.copy and not args.deploy:

								    copy_site()


								  if args.deploy:

								    if args.fast:

								      print('cowardly refusing to deploy a site that was built with --fast')

								    else:

								      deploy_site()


								if __name__ == '__main__':

								  main()