Since last April I’ve been posting collections of links to Netbehaviour . These are links that I’ve found during my web browsing that are on the subject of art, technology and society. I try to arrange them to create associations or narratives wherever possible.
I’ve written a script to convert a calendar year’s worth of links from emails to an HTML page for browsing.
Here it is:
#!/usr/bin/env python
# Copyright 2012 Rhea Myers <[email protected]>
# Licenced GPLv3 or later
################################################################################
# Imports
################################################################################
import cgi
import email
import mailbox
import re
import sys
import time
################################################################################
# Configuration
################################################################################
links_year = "2011"
mailbox_path = "/path/to/mailbox/2011"
################################################################################
# The messages
################################################################################
messages = [message for message in mailbox.mbox(mailbox_path).itervalues() \
if message['subject'] \
and message['subject'].startswith('[NetBehaviour] Links') \
and links_year in message['date']]
# Sort messages by date. As they may have been files out of order
# Wasteful as we parse it again later
messages.sort(key=lambda m: time.mktime(email.utils.parsedate(m['Date'])))
################################################################################
# Reformat and print the links with their commentary
################################################################################
print "<html><head><title>Links For %s</title></head><body>" % links_year
print "<h1>Links For %s</h1><hr />" % links_year
for message in messages:
# Keep track of whether the last line was commentary (or links/whitespace)
last_line_was_commentary = False
# Print a YYYY-MM-DD date as the title
date = email.utils.parsedate(message['Date'])
print '<h2>%s-%s-%s</h2><br />' % (date[0], date[1], date[2])
# Email structure is...interesting...
for part in message.walk():
if part.get_content_type() == "text/plain":
body = part.get_payload(decode=True)
break
elif part.get_content_type() == "text/html":
body = part.get_payload(decode=True)
# Strip html tags to give plain text
body = re.sub(r'<.*?>', '', body)
# Keep trying to find text
# Strip footer
try:
body = body.split('_______________________')[0]
except:
print >> sys.stderr, "Can't get body for %s %s" % (message['date'],
message['subject'])
pass
# Regularize leading and trailing whitespace
body = body.strip()
for line in body.split('\n'):
stripped = line.strip()
if '://' in stripped:
print '<br /><br /><br />'
print '<a href="%s">%s</a>' % (stripped, stripped)
print '<br /><br /><br />'
last_line_was_commentary = False
elif stripped != '':
# Join multi-line commentary into single line
if last_line_was_commentary:
print ' ',
print '%s' % cgi.escape(line)
last_line_was_commentary = True
else:
last_line_was_commentary = False
print '<hr />'
print 'Links curated by <a href="/">Rhea Myers.</a><hr />'
print '</body></html>'
And you can download an archive of the links here: links-2011.html.gz
There are a couple of glitches in the file as a result of the ad-hoc nature of the original emails. Finding them is left as an exercise for the reader.