# ebert_movies.py, by chaynes@indiana.edu ''' Get the text of Roger Ebert's recommendations page and print the recommended movie titles. ''' import http ebert_recommends_url = 'http://rogerebert.suntimes.com/apps/pbcs.dll/' ebert_recommends_url += 'section?category=REVIEWS05' # Search between the first occurrence of '-- main content--' and the next # '!Middle1' for movie names. Names start after the first b> after # 'href=/apps/pbcs.dll' and end with the following '<'. def main(): text = http.get(ebert_recommends_url) begin = text.index('-- main content --') endRecs = text.index('!Middle1', begin) while True: i = text.find('href="/apps/pbcs.dll', begin) if i == -1 or i > endRecs: break begin = text.find('b>', i) + 2 end = text.find('<', begin) print text[begin : end] main()