@ -8,7 +8,14 @@
# TODO: in template.html, add apple touch icon, maybe other favicon sizes.
# TODO: in template.html, add apple touch icon, maybe other favicon sizes.
# TODO: local mirrors of all papers in publications.html
# TODO: local mirrors of all papers in publications.html
# Requirements:
# sudo apt install python3-markdown
# sudo apt install python3-smartypants
# sudo apt install python3-bs4
import argparse
import argparse
from bs4 import BeautifulSoup
import glob
import glob
import html
import html
from io import StringIO
from io import StringIO
@ -48,12 +55,19 @@ def copy_static_files():
def find_update_date ( text ) :
def find_update_date ( text ) :
match = re . search ( r ' ^Published:? ( \ d{4}- \ d{2}- \ d{2}) ' , text , re . MULTILINE )
match = re . search ( r ' ^ \ *?Posted (\ d{4}- \ d{2}- \ d{2}) ' , text , re . MULTILINE )
if not match :
if not match :
return None
return None
return match . group ( 1 )
return match . group ( 1 )
def find_summary ( html_content ) :
text = BeautifulSoup ( html_content , features = ' lxml ' ) . get_text ( )
lines = text . split ( ' \n ' )
result = ' ' . join ( lines [ 2 : 4 ] )
return html . escape ( result , quote = False )
def process_markdown_files ( ) :
def process_markdown_files ( ) :
template = open ( ' template.html ' ) . read ( )
template = open ( ' template.html ' ) . read ( )
for ( dirpath , _ , filenames ) in os . walk ( input_directory ) :
for ( dirpath , _ , filenames ) in os . walk ( input_directory ) :
@ -77,7 +91,7 @@ def process_markdown_files():
else :
else :
title = text
title = text
blog_entry [ ' title ' ] = html . escape ( title )
blog_entry [ ' title ' ] = html . escape ( title , quote = False )
title + = ' | Colin McMillen '
title + = ' | Colin McMillen '
if markdown_filename == os . path . join ( input_directory , ' index.md ' ) :
if markdown_filename == os . path . join ( input_directory , ' index.md ' ) :
@ -92,17 +106,18 @@ def process_markdown_files():
if page_url . endswith ( ' index.html ' ) : # strip off index.html
if page_url . endswith ( ' index.html ' ) : # strip off index.html
page_url = page_url [ : - len ( ' index.html ' ) ]
page_url = page_url [ : - len ( ' index.html ' ) ]
html_content = markdown . markdown (
text , extensions = md_extensions , output_format = ' html5 ' )
output = template . format (
title = title , content = html_content , page_url = page_url )
update_date = find_update_date ( text )
update_date = find_update_date ( text )
if update_date :
if update_date :
blog_entry [ ' url ' ] = ' https://www.mcmillen.dev/ ' + page_url
blog_entry [ ' url ' ] = ' https://www.mcmillen.dev/ ' + page_url
blog_entry [ ' date ' ] = update_date
blog_entry [ ' date ' ] = update_date
blog_entry [ ' summary ' ] = find_summary ( html_content )
blog_entries . append ( blog_entry )
blog_entries . append ( blog_entry )
html_content = markdown . markdown (
text , extensions = md_extensions , output_format = ' html5 ' )
output = template . format (
title = title , content = html_content , page_url = page_url )
os . makedirs ( out_dirpath , exist_ok = True )
os . makedirs ( out_dirpath , exist_ok = True )
print_file ( markdown_filename , out_fullpath )
print_file ( markdown_filename , out_fullpath )
out_file = open ( out_fullpath , ' w ' )
out_file = open ( out_fullpath , ' w ' )
@ -143,8 +158,9 @@ def make_atom_feed():
< title > { title } < / title >
< title > { title } < / title >
< id > { url } < / id >
< id > { url } < / id >
< link rel = " alternate " href = " {url} " / >
< link rel = " alternate " href = " {url} " / >
< content type = " text/html " src = " {url} " / >
< updated > { updated } < / updated >
< updated > { updated } < / updated >
< summary > { summary } < / summary >
< summary > { summary } ( . . . ) < / summary >
< / entry >
< / entry >
'''
'''
@ -153,14 +169,14 @@ def make_atom_feed():
entries_io = StringIO ( )
entries_io = StringIO ( )
last_update = None
last_update = None
for entry in blog_entries :
for entry in blog_entries :
# We lie and pretend that all entries were written at noon UTC .
update_date = entry [ ' date ' ] + ' T12:00:00+00 :00 '
# We lie and pretend that all entries were written at noon EST .
update_date = entry [ ' date ' ] + ' T12:00:00-04 :00 '
last_update = update_date
last_update = update_date
entries_io . write ( entry_template . format (
entries_io . write ( entry_template . format (
url = entry [ ' url ' ] ,
url = entry [ ' url ' ] ,
title = entry [ ' title ' ] ,
title = entry [ ' title ' ] ,
updated = update_date ,
updated = update_date ,
summary = ' TODO: fill this out. ' ) )
summary = entry [ ' summary ' ] ) )
entries_text = entries_io . getvalue ( )
entries_text = entries_io . getvalue ( )