better atom summaries / links; used "posted" instead of "published"

This commit is contained in:
Colin McMillen 2021-07-01 01:00:33 -04:00
parent bfba1c36d0
commit 13cfc84d8b
4 changed files with 31 additions and 15 deletions

View File

@ -8,7 +8,14 @@
# TODO: in template.html, add apple touch icon, maybe other favicon sizes. # TODO: in template.html, add apple touch icon, maybe other favicon sizes.
# TODO: local mirrors of all papers in publications.html # TODO: local mirrors of all papers in publications.html
# Requirements:
# sudo apt install python3-markdown
# sudo apt install python3-smartypants
# sudo apt install python3-bs4
import argparse import argparse
from bs4 import BeautifulSoup
import glob import glob
import html import html
from io import StringIO from io import StringIO
@ -48,12 +55,19 @@ def copy_static_files():
def find_update_date(text): def find_update_date(text):
match = re.search(r'^Published:? (\d{4}-\d{2}-\d{2})', text, re.MULTILINE) match = re.search(r'^\*?Posted (\d{4}-\d{2}-\d{2})', text, re.MULTILINE)
if not match: if not match:
return None return None
return match.group(1) return match.group(1)
def find_summary(html_content):
text = BeautifulSoup(html_content, features='lxml').get_text()
lines = text.split('\n')
result = ' '.join(lines[2:4])
return html.escape(result, quote=False)
def process_markdown_files(): def process_markdown_files():
template = open('template.html').read() template = open('template.html').read()
for (dirpath, _, filenames) in os.walk(input_directory): for (dirpath, _, filenames) in os.walk(input_directory):
@ -77,7 +91,7 @@ def process_markdown_files():
else: else:
title = text title = text
blog_entry['title'] = html.escape(title) blog_entry['title'] = html.escape(title, quote=False)
title += ' | Colin McMillen' title += ' | Colin McMillen'
if markdown_filename == os.path.join(input_directory, 'index.md'): if markdown_filename == os.path.join(input_directory, 'index.md'):
@ -92,17 +106,18 @@ def process_markdown_files():
if page_url.endswith('index.html'): # strip off index.html if page_url.endswith('index.html'): # strip off index.html
page_url = page_url[:-len('index.html')] page_url = page_url[:-len('index.html')]
update_date = find_update_date(text)
if update_date:
blog_entry['url'] = 'https://www.mcmillen.dev/' + page_url
blog_entry['date'] = update_date
blog_entries.append(blog_entry)
html_content = markdown.markdown( html_content = markdown.markdown(
text, extensions=md_extensions, output_format='html5') text, extensions=md_extensions, output_format='html5')
output = template.format( output = template.format(
title=title, content=html_content, page_url=page_url) title=title, content=html_content, page_url=page_url)
update_date = find_update_date(text)
if update_date:
blog_entry['url'] = 'https://www.mcmillen.dev/' + page_url
blog_entry['date'] = update_date
blog_entry['summary'] = find_summary(html_content)
blog_entries.append(blog_entry)
os.makedirs(out_dirpath, exist_ok=True) os.makedirs(out_dirpath, exist_ok=True)
print_file(markdown_filename, out_fullpath) print_file(markdown_filename, out_fullpath)
out_file = open(out_fullpath, 'w') out_file = open(out_fullpath, 'w')
@ -143,8 +158,9 @@ def make_atom_feed():
<title>{title}</title> <title>{title}</title>
<id>{url}</id> <id>{url}</id>
<link rel="alternate" href="{url}"/> <link rel="alternate" href="{url}"/>
<content type="text/html" src="{url}"/>
<updated>{updated}</updated> <updated>{updated}</updated>
<summary>{summary}</summary> <summary>{summary} (...)</summary>
</entry> </entry>
''' '''
@ -153,14 +169,14 @@ def make_atom_feed():
entries_io = StringIO() entries_io = StringIO()
last_update = None last_update = None
for entry in blog_entries: for entry in blog_entries:
# We lie and pretend that all entries were written at noon UTC. # We lie and pretend that all entries were written at noon EST.
update_date = entry['date'] + 'T12:00:00+00:00' update_date = entry['date'] + 'T12:00:00-04:00'
last_update = update_date last_update = update_date
entries_io.write(entry_template.format( entries_io.write(entry_template.format(
url=entry['url'], url=entry['url'],
title=entry['title'], title=entry['title'],
updated=update_date, updated=update_date,
summary='TODO: fill this out.')) summary=entry['summary']))
entries_text = entries_io.getvalue() entries_text = entries_io.getvalue()

View File

@ -1,6 +1,6 @@
# My first paper in 10 years?! # My first paper in 10 years?!
Published: 2019-04-03. *Posted 2019-04-03.*
It's been nearly two months since my last day at Google, so I guess I should finally make use of this newsletter :) It's been nearly two months since my last day at Google, so I guess I should finally make use of this newsletter :)

View File

@ -1,6 +1,6 @@
# A new year & a sneaky new project # A new year & a sneaky new project
Published: 2020-02-09 *Posted 2020-02-09.*
I can't believe it's here so quickly, but: today marks a year since my last day at Google. That seemed like a good occasion to dust off this newsletter & let you know what I've been up to: making a videogame! I can't believe it's here so quickly, but: today marks a year since my last day at Google. That seemed like a good occasion to dust off this newsletter & let you know what I've been up to: making a videogame!

View File

@ -1,6 +1,6 @@
# 93% of Paint Splatters are Valid Perl Programs # 93% of Paint Splatters are Valid Perl Programs
Published 2019-04-01. *Posted 2019-04-01.*
TLDR: [read the paper](2019.pdf) and [view the gallery of pretty Perl programs](splatters.html). TLDR: [read the paper](2019.pdf) and [view the gallery of pretty Perl programs](splatters.html).