#!/usr/bin/env python """ Usage: pic_roster.py [-show] [infile [outfile]] Read infile is generated by saving an Oncourse roster with photos, which may be obtained in Firefox with right click in picture frame > this frame > show only this frame > save page as > . Writes to outfile a comptact photo gallery with pic_roster.num_cols columns. The heading is (excludes file extension). -show invokes Firefox on outfile infile defaults to roster.xhtml and outfile to .htm """ # dependence imports import amara # http://uche.ogbuji.net/tech/4suite/amara/ import tidy # http://utidylib.berlios.de/ import xmlhelp # http://www.cs.indiana.edu/~chaynes/lib/xmlhelp.py import sys, re, os, os.path num_cols = 8 show = False def main(in_file, out_file): text = file(in_file).read() text = str(tidy.parseString(text, output_xhtml=1, indent=1)) text = re.sub(r'xmlns=".*?"', '', text) # keep xml_xpath happy doc = amara.parse(text) rows = doc.xml_xpath(u"//table[@id='_id2:allUserRoster']/tbody/tr") students = [] for tr in rows: _, img_td, name_td, userid, email, status = tr.td if str(status).strip() != u'student': continue img = ['img', {'src': str(img_td.img.src)}] name = str(name_td).strip() last, first = re.match(r'(.*?), (\S*)', name).groups() td = ['td', img, ['br'], ['strong', first, ['br'], last], ['br'], str(userid)] students.append([name, td]) students.sort() table = ['table', {'cellpadding': '5'}] for index in range(len(students)): if index % num_cols == 0: row = ['tr'] table.append(row) row.append(students[index][1]) title = os.path.splitext(in_file)[0] pics = [['html', ['head', ['title', title + ' pics']], ['body', ['h2', title], table]]] fo = file(out_file, 'w') fo.write(xmlhelp.make_document(pics).xml(indent=u'yes')) fo.close() if show: os.system(r'"C:\Program Files\Mozilla Firefox\firefox.exe" ' + out_file) if __name__ == '__main__': if len(sys.argv) >= 2 and sys.argv[1] == '-show': show = True del sys.argv[1] if len(sys.argv) >= 2: in_file = sys.argv[1] else: in_file = 'roster.html' if len(sys.argv) == 3: out_file = sys.argv[2] else: out_file = os.path.splitext(in_file)[0] + '.htm' main(in_file, out_file)