You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

257 lines
7.0 KiB

  1. #!/usr/bin/env python3
  2. # Assumes that there's a directory named ~/src/www-home which is a git repo
  3. # that the contents of output/ can be copied to, committed, & pushed to the
  4. # production server.
  5. # TODO: in template.html, add apple touch icon, maybe other favicon sizes.
  6. # Requirements:
  7. # sudo apt install python3-markdown
  8. # sudo apt install python3-smartypants
  9. # sudo apt install python3-bs4
  10. import argparse
  11. from bs4 import BeautifulSoup
  12. import datetime
  13. import glob
  14. import html
  15. from io import StringIO
  16. import markdown
  17. import operator
  18. import os
  19. import re
  20. import shutil
  21. input_directory = 'content'
  22. static_directory = 'static'
  23. output_directory = 'output'
  24. deploy_directory = '~/src/www-home'
  25. md_extensions = [
  26. 'fenced_code', 'codehilite', 'nl2br', 'toc', 'smarty', 'tables', 'linkify']
  27. blog_entries = []
  28. def print_file(in_file, out_file):
  29. print('%-62s -> %s' % (in_file, out_file))
  30. def copy_static_files():
  31. for (dirpath, _, filenames) in os.walk(static_directory):
  32. for filename in filenames:
  33. source = os.path.join(dirpath, filename)
  34. out_path = dirpath.replace(static_directory, '', 1)
  35. out_path = out_path.lstrip('/')
  36. dest_dir = os.path.join(output_directory, out_path)
  37. os.makedirs(dest_dir, exist_ok=True)
  38. dest = os.path.join(dest_dir, filename)
  39. print_file(source, dest)
  40. shutil.copy2(source, dest)
  41. def copy_style_css():
  42. style_source = os.path.join(static_directory, 'style.css')
  43. style_dest = os.path.join(output_directory, 'style.css')
  44. print_file(style_source, style_dest)
  45. shutil.copy2(style_source, style_dest)
  46. def find_update_date(text):
  47. match = re.search(r'^\*?Posted (\d{4}-\d{2}-\d{2})', text, re.MULTILINE)
  48. if not match:
  49. return None
  50. return match.group(1)
  51. def find_summary(html_content):
  52. text = BeautifulSoup(html_content, features='lxml').get_text()
  53. lines = text.split('\n')
  54. result = ' '.join(lines[2:4])
  55. return html.escape(result, quote=False)
  56. def process_markdown_files():
  57. template = open('template.html').read()
  58. for (dirpath, _, filenames) in os.walk(input_directory):
  59. for filename in filenames:
  60. markdown_filename = os.path.join(dirpath, filename)
  61. if not markdown_filename.endswith('.md'):
  62. continue
  63. blog_entry = {}
  64. markdown_file = open(markdown_filename)
  65. text = markdown_file.read()
  66. markdown_file.close()
  67. if not text.startswith('# '):
  68. text = '# ' + text
  69. match = re.match(r'^(.*?)\n', text)
  70. if match:
  71. title = match.group(1).lstrip('# ')
  72. else:
  73. title = text
  74. blog_entry['title'] = html.escape(title, quote=False)
  75. title += ' | Colin McMillen'
  76. if markdown_filename == os.path.join(input_directory, 'index.md'):
  77. title = 'Colin McMillen'
  78. out_filename = os.path.basename(markdown_filename).replace('.md', '.html')
  79. out_dirpath = os.path.join(output_directory, dirpath)
  80. out_dirpath = out_dirpath.replace('/content', '', 1)
  81. out_fullpath = os.path.join(out_dirpath, out_filename)
  82. page_url = out_fullpath.replace('output/', '', 1)
  83. if page_url.endswith('index.html'): # strip off index.html
  84. page_url = page_url[:-len('index.html')]
  85. html_content = markdown.markdown(
  86. text, extensions=md_extensions, output_format='html5')
  87. summary = find_summary(html_content)
  88. output = template.format(
  89. title=title, content=html_content, page_url=page_url, summary=summary)
  90. update_date = find_update_date(text)
  91. if update_date:
  92. blog_entry['url'] = 'https://www.mcmillen.dev/' + page_url
  93. blog_entry['date'] = update_date
  94. blog_entry['summary'] = summary
  95. blog_entry['content'] = html_content
  96. blog_entries.append(blog_entry)
  97. os.makedirs(out_dirpath, exist_ok=True)
  98. print_file(markdown_filename, out_fullpath)
  99. out_file = open(out_fullpath, 'w')
  100. out_file.write(output)
  101. out_file.close()
  102. def make_sitemap():
  103. sitemap_command = ' '.join("""
  104. find output -regextype posix-extended -regex '.*.(html|pdf)$' |
  105. grep -v ^output/google |
  106. grep -v ^output/drafts |
  107. perl -pe 's|output|https://www.mcmillen.dev|'
  108. > output/sitemap.txt""".split('\n'))
  109. print_file('', 'output/sitemap.txt')
  110. os.system(sitemap_command)
  111. def make_atom_feed():
  112. atom_template = '''<?xml version="1.0" encoding="utf-8"?>
  113. <feed xmlns="http://www.w3.org/2005/Atom">
  114. <title>Colin McMillen's Blog</title>
  115. <link href="https://www.mcmillen.dev"/>
  116. <link rel="self" href="https://www.mcmillen.dev/feed.atom"/>
  117. <updated>{last_update}</updated>
  118. <author>
  119. <name>Colin McMillen</name>
  120. </author>
  121. <id>https://www.mcmillen.dev/</id>
  122. {entries}
  123. </feed>
  124. '''
  125. entry_template = '''
  126. <entry>
  127. <title>{title}</title>
  128. <id>{url}</id>
  129. <link rel="alternate" href="{url}"/>
  130. <content type="html">
  131. <![CDATA[
  132. {content}
  133. ]]>
  134. </content>
  135. <updated>{updated}</updated>
  136. </entry>
  137. '''
  138. blog_entries.sort(key=operator.itemgetter('date'))
  139. entries_io = StringIO()
  140. for entry in blog_entries:
  141. # We lie and pretend that all entries were written at noon EST.
  142. update_date = entry['date'] + 'T12:00:00-04:00'
  143. entries_io.write(entry_template.format(
  144. url=entry['url'],
  145. title=entry['title'],
  146. updated=update_date,
  147. summary=entry['summary'],
  148. content=entry['content']))
  149. entries_text = entries_io.getvalue()
  150. now = datetime.datetime.now()
  151. last_update = datetime.datetime.isoformat(now, 'T', 'seconds') + '-04:00'
  152. atom_feed = atom_template.format(
  153. last_update=last_update,
  154. entries=entries_io.getvalue())
  155. entries_io.close()
  156. atom_filename = os.path.join(output_directory, 'feed.atom')
  157. print_file('', atom_filename)
  158. atom_file = open(atom_filename, 'w')
  159. atom_file.write(atom_feed)
  160. atom_file.close()
  161. def copy_site():
  162. os.system('cp -r output/* %s' % deploy_directory)
  163. def deploy_site():
  164. copy_site()
  165. os.chdir(os.path.expanduser(deploy_directory))
  166. os.system('git add .')
  167. os.system('git commit -m "automated update from build.py"')
  168. os.system('git push')
  169. def main():
  170. parser = argparse.ArgumentParser()
  171. parser.add_argument(
  172. '--clean', action='store_true',
  173. help='wipe the output directory before running')
  174. parser.add_argument(
  175. '--fast', action='store_true',
  176. help='only rebuild content files')
  177. parser.add_argument(
  178. '--copy', action='store_true',
  179. help='copy output files to www-home git repo')
  180. parser.add_argument(
  181. '--deploy', action='store_true',
  182. help='deploy the site by pushing the www-home git repo to production')
  183. args = parser.parse_args()
  184. if args.clean:
  185. shutil.rmtree(output_directory)
  186. os.makedirs(output_directory, exist_ok=True)
  187. if args.fast:
  188. copy_style_css()
  189. else:
  190. copy_static_files()
  191. process_markdown_files()
  192. make_sitemap()
  193. make_atom_feed()
  194. if args.copy and not args.deploy:
  195. copy_site()
  196. if args.deploy:
  197. if args.fast:
  198. print('cowardly refusing to deploy a site that was built with --fast')
  199. else:
  200. deploy_site()
  201. if __name__ == '__main__':
  202. main()