User:Matt Crypto/aotd-update.py

A script to create an "Article of the Day" box

#!/usr/bin/python
# 
# Script to update an "article of the day" box on Wikipedia.  Cobbled
# together by [[User:Matt Crypto]] (matt_crypto@yahoo.co.uk) and
# released into the public domain.
#
# OPTIONS:

# A file containing a list of articles
articleListFile = "./articles.dat"

# A file to output the table
outputTable = "./table.txt"

# How many rows to include before today's article (0 or more)
previousDays = 2

# How many rows to include after (0 or more)
nextDays = 2 

# Scramble the order of the articles?
scrambleOrder = True

# Current article pointer
articleIndexFile = "./index.dat"

# (Set to "False" if you're generating before midnight for the following day)
afterMidnight = True

# Table of scores
scoreBase = dict([ \
    ("Beale ciphers", 6),
    ("Caesar cipher", 8),
    ("Certificate authority", 6),
    ("Cryptanalysis of the Enigma", 3),
    ("Data Encryption Standard", 10),
    ("El Gamal", 2),
    ("Enigma machine", 10),
    ("Frequency analysis", 8),
    ("International Data Encryption Algorithm", 6),
    ("Plaintext", 7),
    ("Public key infrastructure", 5),
    ("RC5", 5),
    ("Rotor machine", 6),
    ("ROT13", 10),
    ("Snake oil (cryptography)", 4),
    ("Substitution cipher", 6),
    ("Timeline of cryptography", 0),
    ("Vigenère cipher", 8),
    ])

# END OF OPTIONS

from time import time, gmtime, strftime
from random import shuffle, seed
from urllib import *
import urllib
import re

oneDayInSeconds = 24 * 60 * 60
timeFmt = "%A, %d %B"

# Regexp for scraping page names
r1 = re.compile(r'\[\[(.*?)\]\]', re.MULTILINE)

def urlSafe(articleName):
    return urllib.quote(re.sub(' ', '_', articleName), safe='() :/\'')

print "Creating an ``article of the day'' box."

# Get a list of articles
try:
    f = open(articleListFile)
    text = "".join(f.readlines())
    f.close()
except:
    print "Error: Couldn't get an article list..."
    raise
articleList = r1.findall(text)
numberOfArticles = len(articleList)

# Scramble the article order in a deterministic way
if scrambleOrder:
    seed(42)
    shuffle(articleList)

print "Found %d articles:" % len(articleList)
print " - ".join(articleList)

# Read in index
try:
    f = open(articleIndexFile)
    articleIndex = int("".join(f.readlines())) % numberOfArticles
    f.close()
except:
    print "Couldn't find a current article pointer, so starting at 0."
print "Today's article is [[%s]]." %  articleList[articleIndex]

# Wikicode for the table header
header = \
"""{| <!-- This table is generated using a script -->
|
<div style="position:relative;left:0px;z-index:10;font-size:85%;padding:.3em 0 .3em 0;margin:0">
{| border="1" cellspacing="0" width="640px"
|- style="background:#deffad;font-size:120%"
| colspan="4" align="center" | <div>'''[[Wikipedia:WikiReader/Cryptography|WikiReader Cryptography]] — article of the day''' <small>{{ed|WikiReaderCryptographyAOTD-Verbose|}}</small></div>
| align="center" | [[User:Matt Crypto/MRR|MRR]]
"""
#"""

rowList = []

# Produce a row for each article
for dayShift in range(-previousDays, nextDays + 1):

    # Calculate the name of the day for this row
    dateStr = strftime(timeFmt, gmtime(time() + \
               (dayShift + afterMidnight) * oneDayInSeconds))

    # Find the article name for this day
    index = (articleIndex + dayShift) % numberOfArticles
    article = articleList[index]

    # For the current article (dayShift==0), do some different formatting
    # A different background and font weight
    if dayShift == 0:
        style = 'style="background:#fffd89;font-weight:bold"'
        t1 = "background:#fffd89"
        t2 = 'style="background:#eeec78"|'
    else:
        style = ''
        t1 = ''
        t2 = 'style="background:#eeeeee"|'

    # Get the 0-10 score for this article
    try:
        articleScore = scoreBase[article]
    except KeyError:
        articleScore = 0
    
    print "Row: %s\t(score %d)\t[[%s]]." % (dateStr, articleScore, article)

    # Use this information to produce wikicode using the following linenoise:
    tableRow = '|- %s\n|%s\n|[[%s]]\n|align="center"|[[Talk:%s|(Talk)]]\n|align="center"|[http://en.wikipedia.org/w/wiki.phtml?title=%s&action=history (History)]\n' % (style, dateStr, article, article, urlSafe(article))   
    mrr = ('|\n{| style="font-size:50%%;%s"\n' % t1) + '|style="background:#ff0000"| \n' * articleScore + ('|%s \n' % t2) * (10 - articleScore) + '|}\n'
    rowList.append(tableRow + mrr)


# The end of the table
footer = \
"""|-
| colspan="5" | '''Notes:''' If you find problems that you can't fix (or it's too much effort), it would be very helpful of you could place a note on the Talk: page. Articles need to be checked for 1) ''Accuracy'' (Factchecking: Are there any mistakes? Is the writing precise? Are sources cited?), 2) ''Completeness'' (Any obvious omissions? Does it need illustration?) 3) ''Quality of writing'' (Copyedits: Grammar and spelling, phrasing, structure) 4) ''Neutrality'' (Is it written from the [[NPOV]]? Do we document all relevant points of view?)
|- 
| colspan="5" align="center" style="background:#deffad;font-size:120%"  | [[Image:Evolution-tasks.png]] — '''To-do list for %s'''<small> {{ed2|Talk:%s/to do|}}</small>
|-
| colspan="5" |
{{Talk:%s/to do}}
|-
| colspan="5" align="center" style="background:#deffad" |  
|-
|}
</div>
|}
"""
#"""
footer = re.sub('%s', articleList[articleIndex], footer)

table = header + "".join(rowList) + footer

# Write out the article table
try:
    f = open(outputTable, "w")
    f.write(table)
    f.close()
except:
    print "Error: Couldn't write article table..."
    raise

print "Table written to %s" % outputTable 

# Update the index pointing to the current article
try:
    f = open(articleIndexFile, "w")
    f.write("%d" % ((articleIndex + 1) % numberOfArticles))
    f.close()
except:
    print "Error: Couldn't write article index pointer..."
    raise

print "Done."