2021-06-28 17:30:52 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
2021-06-30 20:22:51 +00:00
|
|
|
# Assumes that there's a directory named ~/src/www-home which is a git repo
|
|
|
|
# that the contents of output/ can be copied to, committed, & pushed to the
|
|
|
|
# production server.
|
2021-06-29 16:13:30 +00:00
|
|
|
|
2021-06-30 16:25:57 +00:00
|
|
|
# TODO: in template.html, add apple touch icon, maybe other favicon sizes.
|
|
|
|
|
2021-07-01 05:00:33 +00:00
|
|
|
# Requirements:
|
|
|
|
# sudo apt install python3-markdown
|
|
|
|
# sudo apt install python3-smartypants
|
|
|
|
# sudo apt install python3-bs4
|
|
|
|
|
|
|
|
|
2021-06-30 20:22:51 +00:00
|
|
|
import argparse
|
2021-07-01 05:00:33 +00:00
|
|
|
from bs4 import BeautifulSoup
|
2021-06-28 17:30:52 +00:00
|
|
|
import glob
|
2021-06-30 22:06:34 +00:00
|
|
|
import html
|
|
|
|
from io import StringIO
|
2021-06-28 17:30:52 +00:00
|
|
|
import markdown
|
2021-06-30 22:06:34 +00:00
|
|
|
import operator
|
2021-06-28 17:30:52 +00:00
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import shutil
|
|
|
|
|
2021-06-30 19:17:28 +00:00
|
|
|
|
2021-06-28 17:30:52 +00:00
|
|
|
input_directory = 'content'
|
|
|
|
static_directory = 'static'
|
|
|
|
output_directory = 'output'
|
2021-06-30 20:22:51 +00:00
|
|
|
deploy_directory = '~/src/www-home'
|
2021-06-28 17:30:52 +00:00
|
|
|
|
2021-06-30 20:32:43 +00:00
|
|
|
md_extensions = [
|
|
|
|
'fenced_code', 'codehilite', 'nl2br', 'toc', 'smarty', 'tables', 'linkify']
|
2021-06-28 17:30:52 +00:00
|
|
|
|
2021-06-30 22:06:34 +00:00
|
|
|
blog_entries = []
|
|
|
|
|
2021-06-28 17:30:52 +00:00
|
|
|
|
|
|
|
def print_file(in_file, out_file):
|
2021-06-28 20:16:43 +00:00
|
|
|
print('%-62s -> %s' % (in_file, out_file))
|
2021-06-28 17:30:52 +00:00
|
|
|
|
2021-06-30 19:37:21 +00:00
|
|
|
|
|
|
|
def copy_static_files():
|
|
|
|
for (dirpath, _, filenames) in os.walk(static_directory):
|
|
|
|
for filename in filenames:
|
|
|
|
source = os.path.join(dirpath, filename)
|
|
|
|
out_path = dirpath.replace(static_directory, '', 1)
|
|
|
|
out_path = out_path.lstrip('/')
|
|
|
|
dest_dir = os.path.join(output_directory, out_path)
|
|
|
|
os.makedirs(dest_dir, exist_ok=True)
|
|
|
|
dest = os.path.join(dest_dir, filename)
|
|
|
|
print_file(source, dest)
|
|
|
|
shutil.copy2(source, dest)
|
|
|
|
|
|
|
|
|
2021-06-30 22:06:34 +00:00
|
|
|
def find_update_date(text):
|
2021-07-01 05:00:33 +00:00
|
|
|
match = re.search(r'^\*?Posted (\d{4}-\d{2}-\d{2})', text, re.MULTILINE)
|
2021-06-30 22:06:34 +00:00
|
|
|
if not match:
|
|
|
|
return None
|
|
|
|
return match.group(1)
|
|
|
|
|
|
|
|
|
2021-07-01 05:00:33 +00:00
|
|
|
def find_summary(html_content):
|
|
|
|
text = BeautifulSoup(html_content, features='lxml').get_text()
|
|
|
|
lines = text.split('\n')
|
|
|
|
result = ' '.join(lines[2:4])
|
|
|
|
return html.escape(result, quote=False)
|
|
|
|
|
|
|
|
|
2021-06-30 19:37:21 +00:00
|
|
|
def process_markdown_files():
|
|
|
|
template = open('template.html').read()
|
|
|
|
for (dirpath, _, filenames) in os.walk(input_directory):
|
|
|
|
for filename in filenames:
|
|
|
|
markdown_filename = os.path.join(dirpath, filename)
|
|
|
|
if not markdown_filename.endswith('.md'):
|
|
|
|
continue
|
|
|
|
|
2021-06-30 22:06:34 +00:00
|
|
|
blog_entry = {}
|
|
|
|
|
2021-06-30 19:37:21 +00:00
|
|
|
markdown_file = open(markdown_filename)
|
|
|
|
text = markdown_file.read()
|
|
|
|
markdown_file.close()
|
|
|
|
|
|
|
|
if not text.startswith('# '):
|
|
|
|
text = '# ' + text
|
|
|
|
|
|
|
|
match = re.match(r'^(.*?)\n', text)
|
|
|
|
if match:
|
|
|
|
title = match.group(1).lstrip('# ')
|
|
|
|
else:
|
|
|
|
title = text
|
2021-06-30 22:06:34 +00:00
|
|
|
|
2021-07-01 05:00:33 +00:00
|
|
|
blog_entry['title'] = html.escape(title, quote=False)
|
2021-06-30 22:06:34 +00:00
|
|
|
|
2021-06-30 19:37:21 +00:00
|
|
|
title += ' | Colin McMillen'
|
|
|
|
if markdown_filename == os.path.join(input_directory, 'index.md'):
|
|
|
|
title = 'Colin McMillen'
|
|
|
|
|
|
|
|
out_filename = os.path.basename(markdown_filename).replace('.md', '.html')
|
|
|
|
|
|
|
|
out_dirpath = os.path.join(output_directory, dirpath)
|
|
|
|
out_dirpath = out_dirpath.replace('/content', '', 1)
|
|
|
|
out_fullpath = os.path.join(out_dirpath, out_filename)
|
|
|
|
page_url = out_fullpath.replace('output/', '', 1)
|
|
|
|
if page_url.endswith('index.html'): # strip off index.html
|
|
|
|
page_url = page_url[:-len('index.html')]
|
|
|
|
|
2021-07-01 05:00:33 +00:00
|
|
|
html_content = markdown.markdown(
|
|
|
|
text, extensions=md_extensions, output_format='html5')
|
|
|
|
output = template.format(
|
|
|
|
title=title, content=html_content, page_url=page_url)
|
|
|
|
|
2021-06-30 22:06:34 +00:00
|
|
|
update_date = find_update_date(text)
|
|
|
|
if update_date:
|
|
|
|
blog_entry['url'] = 'https://www.mcmillen.dev/' + page_url
|
|
|
|
blog_entry['date'] = update_date
|
2021-07-01 05:00:33 +00:00
|
|
|
blog_entry['summary'] = find_summary(html_content)
|
2021-07-01 14:36:45 +00:00
|
|
|
blog_entry['content'] = html_content
|
2021-06-30 22:06:34 +00:00
|
|
|
blog_entries.append(blog_entry)
|
|
|
|
|
2021-06-30 19:37:21 +00:00
|
|
|
os.makedirs(out_dirpath, exist_ok=True)
|
|
|
|
print_file(markdown_filename, out_fullpath)
|
|
|
|
out_file = open(out_fullpath, 'w')
|
|
|
|
out_file.write(output)
|
|
|
|
out_file.close()
|
2021-06-28 17:30:52 +00:00
|
|
|
|
2021-06-30 19:17:28 +00:00
|
|
|
|
|
|
|
def make_sitemap():
|
|
|
|
sitemap_command = ' '.join("""
|
|
|
|
find output -regextype posix-extended -regex '.*.(html|pdf)$' |
|
|
|
|
grep -v ^output/google |
|
|
|
|
grep -v ^output/drafts |
|
|
|
|
perl -pe 's|output|https://www.mcmillen.dev|'
|
|
|
|
> output/sitemap.txt""".split('\n'))
|
2021-06-30 22:06:34 +00:00
|
|
|
print_file('', 'output/sitemap.txt')
|
2021-06-30 19:17:28 +00:00
|
|
|
os.system(sitemap_command)
|
|
|
|
|
|
|
|
|
2021-06-30 22:06:34 +00:00
|
|
|
def make_atom_feed():
|
|
|
|
atom_template = '''<?xml version="1.0" encoding="utf-8"?>
|
|
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
|
|
|
|
|
|
<title>Colin McMillen's Blog</title>
|
|
|
|
<link href="https://www.mcmillen.dev"/>
|
2021-07-01 14:36:45 +00:00
|
|
|
<link rel="self" href="https://www.mcmillen.dev/feed.atom"/>
|
2021-06-30 22:06:34 +00:00
|
|
|
<updated>{last_update}</updated>
|
|
|
|
<author>
|
|
|
|
<name>Colin McMillen</name>
|
|
|
|
</author>
|
|
|
|
<id>https://www.mcmillen.dev/</id>
|
|
|
|
|
|
|
|
{entries}
|
|
|
|
</feed>
|
|
|
|
'''
|
|
|
|
|
|
|
|
entry_template = '''
|
|
|
|
<entry>
|
|
|
|
<title>{title}</title>
|
|
|
|
<id>{url}</id>
|
|
|
|
<link rel="alternate" href="{url}"/>
|
2021-07-01 14:36:45 +00:00
|
|
|
<content type="html">
|
|
|
|
<![CDATA[
|
|
|
|
{content}
|
|
|
|
]]>
|
|
|
|
</content>
|
2021-06-30 22:06:34 +00:00
|
|
|
<updated>{updated}</updated>
|
|
|
|
</entry>
|
|
|
|
'''
|
|
|
|
|
|
|
|
blog_entries.sort(key=operator.itemgetter('date'))
|
|
|
|
|
|
|
|
entries_io = StringIO()
|
|
|
|
last_update = None
|
|
|
|
for entry in blog_entries:
|
2021-07-01 05:00:33 +00:00
|
|
|
# We lie and pretend that all entries were written at noon EST.
|
|
|
|
update_date = entry['date'] + 'T12:00:00-04:00'
|
2021-06-30 22:06:34 +00:00
|
|
|
last_update = update_date
|
|
|
|
entries_io.write(entry_template.format(
|
|
|
|
url=entry['url'],
|
|
|
|
title=entry['title'],
|
|
|
|
updated=update_date,
|
2021-07-01 14:36:45 +00:00
|
|
|
summary=entry['summary'],
|
|
|
|
content=entry['content']))
|
2021-06-30 22:06:34 +00:00
|
|
|
|
|
|
|
entries_text = entries_io.getvalue()
|
|
|
|
|
|
|
|
atom_feed = atom_template.format(
|
|
|
|
last_update=last_update,
|
|
|
|
entries=entries_io.getvalue())
|
|
|
|
entries_io.close()
|
|
|
|
|
2021-07-01 14:36:45 +00:00
|
|
|
atom_filename = os.path.join(output_directory, 'feed.atom')
|
2021-06-30 22:06:34 +00:00
|
|
|
print_file('', atom_filename)
|
|
|
|
atom_file = open(atom_filename, 'w')
|
|
|
|
atom_file.write(atom_feed)
|
|
|
|
atom_file.close()
|
|
|
|
|
|
|
|
|
|
|
|
def copy_site():
|
|
|
|
os.system('cp -r output/* %s' % deploy_directory)
|
2021-06-30 19:17:28 +00:00
|
|
|
|
|
|
|
|
2021-06-30 20:22:51 +00:00
|
|
|
def deploy_site():
|
2021-06-30 22:06:34 +00:00
|
|
|
copy_site()
|
2021-06-30 20:22:51 +00:00
|
|
|
os.chdir(os.path.expanduser(deploy_directory))
|
|
|
|
os.system('git add .')
|
|
|
|
os.system('git commit -m "automated update from build.py"')
|
|
|
|
os.system('git push')
|
|
|
|
|
|
|
|
|
2021-06-30 19:17:28 +00:00
|
|
|
def main():
|
2021-06-30 20:22:51 +00:00
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument(
|
|
|
|
'--clean', action='store_true',
|
|
|
|
help='wipe the output directory before running')
|
|
|
|
parser.add_argument(
|
|
|
|
'--fast', action='store_true',
|
|
|
|
help='only rebuild content files')
|
2021-06-30 22:06:34 +00:00
|
|
|
parser.add_argument(
|
|
|
|
'--copy', action='store_true',
|
|
|
|
help='copy output files to www-home git repo')
|
2021-06-30 20:22:51 +00:00
|
|
|
parser.add_argument(
|
|
|
|
'--deploy', action='store_true',
|
2021-06-30 22:06:34 +00:00
|
|
|
help='deploy the site by pushing the www-home git repo to production')
|
2021-06-30 20:22:51 +00:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
if args.clean:
|
|
|
|
shutil.rmtree(output_directory)
|
2021-06-30 19:37:21 +00:00
|
|
|
os.makedirs(output_directory, exist_ok=True)
|
2021-06-30 20:22:51 +00:00
|
|
|
if not args.fast:
|
|
|
|
copy_static_files()
|
2021-06-30 19:37:21 +00:00
|
|
|
process_markdown_files()
|
2021-06-30 19:17:28 +00:00
|
|
|
make_sitemap()
|
2021-06-30 22:06:34 +00:00
|
|
|
make_atom_feed()
|
|
|
|
|
|
|
|
if args.copy and not args.deploy:
|
|
|
|
copy_site()
|
2021-06-30 19:17:28 +00:00
|
|
|
|
2021-06-30 20:22:51 +00:00
|
|
|
if args.deploy:
|
2021-06-30 20:32:43 +00:00
|
|
|
if args.fast:
|
|
|
|
print('cowardly refusing to deploy a site that was built with --fast')
|
|
|
|
else:
|
|
|
|
deploy_site()
|
2021-06-30 20:22:51 +00:00
|
|
|
|
2021-06-30 19:17:28 +00:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|