Revision: 8717
Author: shizhao
Date: 2010-11-09 15:54:51 +0000 (Tue, 09 Nov 2010)
Log Message:
-----------
fullVersionHistory() add API query
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2010-11-07 22:20:04 UTC (rev 8716)
+++ trunk/pywikipedia/wikipedia.py 2010-11-09 15:54:51 UTC (rev 8717)
@@ -2780,33 +2780,89 @@
result += '|}\n'
return result
- def fullVersionHistory(self):
+ def fullVersionHistory(self, getAll=False, skipFirst=False, reverseOrder=False,
+ revCount=500):
"""Iterate previous versions including wikitext.
Gives a list of tuples consisting of revision ID, edit date/time, user name and
content
"""
- address = self.site().export_address()
- predata = {
- 'action': 'submit',
- 'pages': self.title()
- }
- get_throttle(requestsize = 10)
- now = time.time()
- response, data = self.site().postForm(address, predata)
- data = data.encode(self.site().encoding())
+ if not self.site().has_api() or self.site().versionnumber() < 8:
+ address = self.site().export_address()
+ predata = {
+ 'action': 'submit',
+ 'pages': self.title()
+ }
+ get_throttle(requestsize = 10)
+ now = time.time()
+ response, data = self.site().postForm(address, predata)
+ data = data.encode(self.site().encoding())
# get_throttle.setDelay(time.time() - now)
- output = []
+ output = []
# TODO: parse XML using an actual XML parser instead of regex!
- r =
re.compile("\<revision\>.*?\<id\>(?P<id>.*?)\<\/id\>.*?\<timestamp\>(?P<timestamp>.*?)\<\/timestamp\>.*?\<(?:ip|username)\>(?P<user>.*?)\</(?:ip|username)\>.*?\<text.*?\>(?P<content>.*?)\<\/text\>",re.DOTALL)
+ r =
re.compile("\<revision\>.*?\<id\>(?P<id>.*?)\<\/id\>.*?\<timestamp\>(?P<timestamp>.*?)\<\/timestamp\>.*?\<(?:ip|username)\>(?P<user>.*?)\</(?:ip|username)\>.*?\<text.*?\>(?P<content>.*?)\<\/text\>",re.DOTALL)
#r =
re.compile("\<revision\>.*?\<timestamp\>(.*?)\<\/timestamp\>.*?\<(?:ip|username)\>(.*?)\<",re.DOTALL)
- return [ (match.group('id'),
- match.group('timestamp'),
- unescape(match.group('user')),
- unescape(match.group('content')))
- for match in r.finditer(data) ]
+ return [ (match.group('id'),
+ match.group('timestamp'),
+ unescape(match.group('user')),
+ unescape(match.group('content')))
+ for match in r.finditer(data) ]
+
+ """Load history informations by API query. """
+ dataQ = []
+ thisHistoryDone = False
+ params = {
+ 'action': 'query',
+ 'prop': 'revisions',
+ 'titles': self.title(),
+ 'rvprop': 'ids|timestamp|user|content',
+ 'rvlimit': revCount,
+ }
+ while not thisHistoryDone:
+ if reverseOrder:
+ params['rvdir'] = 'newer'
+
+ result = query.GetData(params, self.site())
+ if 'error' in result:
+ raise RuntimeError("%s" % result['error'])
+ pageInfo = result['query']['pages'].values()[0]
+ if result['query']['pages'].keys()[0] == "-1":
+ if 'missing' in pageInfo:
+ raise NoPage(self.site(), self.aslink(forceInterwiki=True),
+ "Page does not exist.")
+ elif 'invalid' in pageInfo:
+ raise BadTitle('BadTitle: %s' % self)
+
+ if 'query-continue' in result and getAll:
+ params['rvstartid'] =
result['query-continue']['revisions']['rvstartid']
+ else:
+ thisHistoryDone = True
+
+ if skipFirst:
+ skipFirst = False
+ else:
+ for r in pageInfo['revisions']:
+ c = ''
+ if 'comment' in r:
+ c = r['comment']
+ #revision id, edit date/time, user name, edit summary
+ (revidStrr, timestampStrr, userStrr) = (None, None, None)
+ if 'revid' in r:
+ revidStrr = r['revid']
+ if 'timestamp' in r:
+ timestampStrr = r['timestamp']
+ if 'user' in r:
+ userStrr = r['user']
+ s='' #Will return -1 if not found
+ if '*' in r:
+ s = r['*']
+ dataQ.append((revidStrr, timestampStrr, userStrr, s))
+ if
len(result['query']['pages'].values()[0]['revisions']) <
revCount:
+ thisHistoryDone = True
+ return dataQ
+
def contributingUsers(self, step=None, total=None):
"""Return a set of usernames (or IPs) of users who edited this
page.
Show replies by date