Revision: 6942
Author: purodha
Date: 2009-06-07 05:24:19 +0000 (Sun, 07 Jun 2009)
Log Message:
-----------
Initial version of a recentchenges page generator (via API)
Modified Paths:
--------------
trunk/pywikipedia/pagegenerators.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2009-06-07 04:40:46 UTC (rev 6941)
+++ trunk/pywikipedia/pagegenerators.py 2009-06-07 05:24:19 UTC (rev 6942)
@@ -133,6 +133,10 @@
-gorandom Specifies that the robot should starting at the random pages
returned by [[Special:Random]].
+
+-recentchanges Work on new and edited pages returned by [[Special:Recentchanges]].
+ Can also be given as "-recentchanges:n" where n is the
number
+ of pages to be returned, else 100 pages are returned.
"""
@@ -403,6 +407,12 @@
site = wikipedia.getSite()
for i in range(number):
yield site.randomredirectpage()
+
+def RecentchangesPageGenerator(number = 100, site = None):
+ if site is None:
+ site = wikipedia.getSite()
+ for page in site.recentchanges(number=number):
+ yield page[0]
def TextfilePageGenerator(filename=None, site=None):
'''
@@ -954,6 +964,11 @@
gen = RandomPageGenerator()
else:
gen = RandomPageGenerator(number = int(arg[8:]))
+ elif arg.startswith('-recentchanges'):
+ if len(arg) == 14:
+ gen = RecentchangesPageGenerator()
+ else:
+ gen = RecentchangesPageGenerator(number = int(arg[15:]))
elif arg.startswith('-file'):
textfilename = arg[6:]
if not textfilename:
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2009-06-07 04:40:46 UTC (rev 6941)
+++ trunk/pywikipedia/wikipedia.py 2009-06-07 05:24:19 UTC (rev 6942)
@@ -4274,6 +4274,7 @@
deadendpages(): Special:Deadendpages
ancientpages(): Special:Ancientpages
lonelypages(): Special:Lonelypages
+ recentchanges(): Special:Recentchanges
unwatchedpages(): Special:Unwatchedpages (sysop accounts only)
uncategorizedcategories(): Special:Uncategorizedcategories (yields
Category objects)
@@ -5342,6 +5343,76 @@
if not repeat:
break
+ def recentchanges(self, number = 100, rcstart = None, rcend = None, rcshow = None,
rctype ='edit|new', repeat = False):
+ """
+ Yield ImagePages from APIs, call:
action=query&list=recentchanges&rctype=edit|new&rclimit=500
+
+ Options directly from APIs:
+ ---
+ Parameters:
+ rcstart - The timestamp to start enumerating from.
+ rcend - The timestamp to end enumerating.
+ rcdir - In which direction to enumerate.
+ One value: newer, older
+ Default: older
+ rcprop - Include additional pieces of information
+ Values (separate with '|'):
+ user, comment, flags, timestamp, title, ids, sizes,
+ redirect, patrolled, loginfo
+ Default: title|timestamp|ids
+ rctoken - Which tokens to obtain for each change
+ Values (separate with '|'): patrol
+ rcshow - Show only items that meet this criteria.
+ For example, to see only minor edits done by
+ logged-in users, set show=minor|!anon
+ Values (separate with '|'):
+ minor, !minor, bot, !bot, anon, !anon,
+ redirect, !redirect, patrolled, !patrolled
+ rclimit - How many total changes to return.
+ No more than 500 (5000 for bots) allowed.
+ Default: 10
+ rctype - Which types of changes to show.
+ Values (separate with '|'): edit, new, log
+ """
+ if rctype is None:
+ rctype = 'edit|new'
+ params = {
+ 'action' : 'query',
+ 'list' : 'recentchanges',
+ 'rctype' : rctype,
+ 'rcprop' : 'user|comment|timestamp|title|ids|loginfo',
#|flags|sizes|redirect|patrolled'
+ 'rclimit' : int(number),
+ }
+ if rcstart is not None: params['rcstart'] = rcstart
+ if rcend is not None: params['rcend'] = rcend
+ if rcshow is not None: params['rcshow'] = rcshow
+ if rctype is not None: params['rctype'] = rctype
+ while True:
+ data = query.GetData(params,
+ useAPI = True, encodeTitle = False)
+ try:
+ rcData = data['query']['recentchanges']
+ except KeyError:
+ raise ServerError("The APIs don't return data, the site may be
down")
+
+ for rcItem in rcData:
+ try:
+ comment = rcItem['comment']
+ except KeyError:
+ comment = ''
+ try:
+ loginfo = rcItem['loginfo']
+ except KeyError:
+ loginfo = ''
+ # pageid = rcItem['pageid']
+ title = rcItem['title']
+ timestamp = rcItem['timestamp']
+ # logid = rcItem['logid']
+ user = rcItem['user']
+ yield Page(self, title), timestamp, user, comment, loginfo
+ if not repeat:
+ break
+
def uncategorizedimages(self, number = 10, repeat = False):
"""Yield ImagePages from
Special:Uncategorizedimages."""
seen = set()