Since last April I’ve been posting collections of links to Netbehaviour . These are links that I’ve found during my web browsing that are on the subject of art, technology and society. I try to arrange them to create associations or narratives wherever possible.
I’ve written a script to convert a calendar year’s worth of links from emails to an HTML page for browsing.
Here it is:
#!/usr/bin/env python # Copyright 2012 Rhea Myers <[email protected]> # Licenced GPLv3 or later ################################################################################ # Imports ################################################################################ import cgi import email import mailbox import re import sys import time ################################################################################ # Configuration ################################################################################ links_year = "2011" mailbox_path = "/path/to/mailbox/2011" ################################################################################ # The messages ################################################################################ messages = [message for message in mailbox.mbox(mailbox_path).itervalues() \ if message['subject'] \ and message['subject'].startswith('[NetBehaviour] Links') \ and links_year in message['date']] # Sort messages by date. As they may have been files out of order # Wasteful as we parse it again later messages.sort(key=lambda m: time.mktime(email.utils.parsedate(m['Date']))) ################################################################################ # Reformat and print the links with their commentary ################################################################################ print "<html><head><title>Links For %s</title></head><body>" % links_year print "<h1>Links For %s</h1><hr />" % links_year for message in messages: # Keep track of whether the last line was commentary (or links/whitespace) last_line_was_commentary = False # Print a YYYY-MM-DD date as the title date = email.utils.parsedate(message['Date']) print '<h2>%s-%s-%s</h2><br />' % (date[0], date[1], date[2]) # Email structure is...interesting... for part in message.walk(): if part.get_content_type() == "text/plain": body = part.get_payload(decode=True) break elif part.get_content_type() == "text/html": body = part.get_payload(decode=True) # Strip html tags to give plain text body = re.sub(r'<.*?>', '', body) # Keep trying to find text # Strip footer try: body = body.split('_______________________')[0] except: print >> sys.stderr, "Can't get body for %s %s" % (message['date'], message['subject']) pass # Regularize leading and trailing whitespace body = body.strip() for line in body.split('\n'): stripped = line.strip() if '://' in stripped: print '<br /><br /><br />' print '<a href="%s">%s</a>' % (stripped, stripped) print '<br /><br /><br />' last_line_was_commentary = False elif stripped != '': # Join multi-line commentary into single line if last_line_was_commentary: print ' ', print '%s' % cgi.escape(line) last_line_was_commentary = True else: last_line_was_commentary = False print '<hr />' print 'Links curated by <a href="/">Rhea Myers.</a><hr />' print '</body></html>'
And you can download an archive of the links here: links-2011.html.gz
There are a couple of glitches in the file as a result of the ad-hoc nature of the original emails. Finding them is left as an exercise for the reader.