Revision: 7991
Author: xqt
Date: 2010-03-12 09:03:31 +0000 (Fri, 12 Mar 2010)
Log Message:
-----------
site.getmagicwords() from site.siteinfo() instead from family file
Modified Paths:
--------------
trunk/pywikipedia/family.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/family.py
===================================================================
--- trunk/pywikipedia/family.py 2010-03-12 08:58:10 UTC (rev 7990)
+++ trunk/pywikipedia/family.py 2010-03-12 09:03:31 UTC (rev 7991)
@@ -1,9 +1,17 @@
# -*- coding: utf-8 -*-
-import config, urllib, re
-from datetime import timedelta, datetime
+#
+# (C) Pywikipedia bot team, 2004-2010
+#
+# Distributed under the terms of the MIT license.
+#
__version__='$Id$'
+import config
+import re
+import urllib
+from datetime import timedelta, datetime
+
# Parent class for all wiki families
class Family:
@@ -2678,8 +2686,8 @@
},
}
- # letters that can follow a wikilink and are regarded as part
- # of this link
+ # letters that can follow a wikilink and are regarded as part of
+ # this link
# This depends on the linktrail setting in LanguageXx.php and on
# [[MediaWiki:Linktrail]].
# Note: this is a regular expression.
@@ -3113,7 +3121,9 @@
# A list with the name for cross-project cookies.
# default for wikimedia centralAuth extensions.
- self.cross_projects_cookies = ['centralauth_Session', 'centralauth_Token', 'centralauth_User']
+ self.cross_projects_cookies = ['centralauth_Session',
+ 'centralauth_Token',
+ 'centralauth_User']
self.cross_projects_cookie_username = 'centralauth_User'
# A list with the name in the cross-language flag permissions
@@ -3449,51 +3459,6 @@
def category_namespaces(self, code):
return self.namespace(code, 14, all = True)
- # So can be pagename code
- pagename = {
- 'bg': [u'СТРАНИЦА'],
- 'he': [u'שם הדף'],
- 'kk': [u'БЕТАТАУЫ'],
- 'nn': ['SIDENAMN', 'SIDENAVN'],
- 'ru': [u'НАЗВАНИЕСТРАНИЦЫ'],
- 'sr': [u'СТРАНИЦА'],
- 'tt': [u'BİTİSEME']
- }
-
- pagenamee = {
- 'he': [u'שם הדף מקודד'],
- 'kk': [u'БЕТАТАУЫ2'],
- 'nn': ['SIDENAMNE', 'SIDENAVNE'],
- 'ru': [u'НАЗВАНИЕСТРАНИЦЫ2'],
- 'sr': [u'СТРАНИЦЕ']
- }
-
- def pagenamecodes(self, code):
- pos = ['PAGENAME']
- pos2 = []
- if code in self.pagename:
- pos = pos + self.pagename[code]
- elif code == 'als':
- return self.pagenamecodes('de')
- elif code == 'bm':
- return self.pagenamecodes('fr')
- for p in pos:
- pos2 += [p, p.lower()]
- return pos2
-
- def pagename2codes(self, code):
- pos = ['PAGENAME']
- pos2 = []
- if code in self.pagenamee:
- pos = pos + self.pagenamee[code]
- elif code == 'als':
- return self.pagename2codes('de')
- elif code == 'bm':
- return self.pagename2codes('fr')
- for p in pos:
- pos2 += [p, p.lower()]
- return pos2
-
# Methods
def protocol(self, code):
"""
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2010-03-12 08:58:10 UTC (rev 7990)
+++ trunk/pywikipedia/wikipedia.py 2010-03-12 09:03:31 UTC (rev 7991)
@@ -2156,7 +2156,7 @@
text = self.get()
# Replace {{PAGENAME}} by its value
- for pagenametext in self.site().family.pagenamecodes(
+ for pagenametext in self.site().pagenamecodes(
self.site().language()):
text = text.replace(u"{{%s}}" % pagenametext, self.title())
@@ -2363,8 +2363,8 @@
# {{#if: }}
if name.startswith('#'):
continue
- # {{DEFAULTSORT:...}} or {{#if: }}
- defaultKeys = self.site().siteinfo('magicwords')['defaultsort']
+ # {{DEFAULTSORT:...}}
+ defaultKeys = self.site().getmagicwords('defaultsort')
found = False
for key in defaultKeys:
if name.startswith(key):
@@ -6757,19 +6757,18 @@
return True
return False
- def redirect(self, default = False):
+ def getmagicwords(self, word):
+ """Return list of localized "word" magic words for the site."""
+ return self.siteinfo('magicwords').get(word)
+
+ def redirect(self, default=False):
"""Return the localized redirect tag for the site.
- If default is True, falls back to 'REDIRECT' if the site has no
- special redirect tag.
+ Argument is ignored (but maintained for backwards-compatibility).
"""
- tag = self.siteinfo('magicwords').get('redirect')
- if tag:
- # remove first "#" letter
- return tag[0][1:]
- elif default:
- return u'REDIRECT'
+ # return the magic word without the preceding '#' character
+ return self.getmagicwords('redirect')[0].lstrip("#")
def redirectRegex(self):
"""Return a compiled regular expression matching on redirect pages.
@@ -6780,7 +6779,7 @@
#NOTE: this is needed, since the API can give false positives!
default = 'REDIRECT'
try:
- keywords = self.siteinfo('magicwords')['redirect']
+ keywords = self.getmagicwords('redirect')
pattern = r'(?:' + '|'.join(keywords) + ')'
except KeyError:
# no localized keyword for redirects
@@ -6797,6 +6796,14 @@
+ '\s*:?\s*\[\[(.+?)(?:\|.*?)?\]\]',
re.IGNORECASE | re.UNICODE | re.DOTALL)
+ def pagenamecodes(self, default=True):
+ """Return list of localized PAGENAME tags for the site."""
+ return self.getmagicwords('pagename')
+
+ def pagename2codes(self, default=True):
+ """Return list of localized PAGENAMEE tags for the site."""
+ return self.getmagicwords('pagenamee')
+
def resolvemagicwords(self, wikitext):
"""Replace the {{ns:xx}} marks in a wikitext with the namespace names"""
Revision: 7988
Author: xqt
Date: 2010-03-11 17:44:03 +0000 (Thu, 11 Mar 2010)
Log Message:
-----------
test api with new has_api() method; move throttle.log to contol file
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Property Changed:
----------------
trunk/pywikipedia/pywikibot/
Property changes on: trunk/pywikipedia/pywikibot
___________________________________________________________________
Modified: svn:ignore
- *.pyc
+ *.pyc
*.ctrl
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2010-03-11 15:01:00 UTC (rev 7987)
+++ trunk/pywikipedia/wikipedia.py 2010-03-11 17:44:03 UTC (rev 7988)
@@ -4144,12 +4144,13 @@
self.releasepid = 1200 # Free the process id
self.lastwait = 0.0
self.delay = 0
- if multiplydelay:
+ self.multiplydelay = multiplydelay
+ if self.multiplydelay:
self.checkMultiplicity()
self.setDelay(mindelay)
def logfn(self):
- return config.datafilepath('logs', 'throttle.log')
+ return config.datafilepath('pywikibot', 'throttle.ctrl')
def checkMultiplicity(self):
self.lock.acquire()
@@ -4211,7 +4212,7 @@
def getDelay(self):
thisdelay = self.delay
- if self.pid: # If self.pid, we're checking for multiple processes
+ if self.multiplydelay: # If self.pid, we're checking for multiple processes
if time.time() > self.checktime + self.checkdelay:
self.checkMultiplicity()
if thisdelay < (self.mindelay * self.next_multiplicity):
@@ -4595,6 +4596,7 @@
mediawiki_message: Retrieve the text of a specified MediaWiki message
has_mediawiki_message: True if this site defines specified MediaWiki
message
+ has_api: True if this site's family provides api interface
shared_image_repository: Return tuple of image repositories used by this
site.
@@ -5677,21 +5679,13 @@
"""Return the MediaWiki message text for key "key" """
# Allmessages is retrieved once for all per created Site object
if (not self._mediawiki_messages) or forceReload:
- api = False
+ api = self.has_api()
if verbose:
output(u"Retrieving mediawiki messages from Special:Allmessages")
# Only MediaWiki r27393/1.12 and higher support XML output for Special:Allmessages
if self.versionnumber() < 12:
usePHP = True
else:
- try:
- if config.use_api:
- x = self.api_address()
- del x
- api = True
- except NotImplementedError:
- api = False
-
usePHP = False
elementtree = True
try:
@@ -5797,6 +5791,17 @@
return True
except KeyError:
return False
+
+ def has_api(self):
+ """Return True if this sites family has api interface."""
+ try:
+ if config.use_api:
+ x = self.apipath()
+ del x
+ return True
+ except NotImplementedError:
+ pass
+ return False
def _load(self, sysop = False, force = False):
"""
@@ -5814,16 +5819,10 @@
if verbose:
output(u'Getting information for site %s' % self)
- try:
- api_url = self.api_address()
- del api_url
- except NotImplementedError:
- config.use_api = False
-
# Get data
# API Userinfo is available from version 1.11
# preferencetoken available from 1.14
- if config.use_api and self.versionnumber() >= 11:
+ if self.has_api() and self.versionnumber() >= 11:
#Query userinfo
params = {
'action': 'query',
@@ -5860,66 +5859,47 @@
Use API when enabled use_api and version >= 1.11,
or use Special:Search.
"""
- try:
- if config.use_api and self.versionnumber() >= 11:
- apiUrl = self.site().api_address()
- del apiUrl
- else:
- raise NotImplementedError
- except NotImplementedError:
- _search = self._search_without_api
+ if self.has_api() and self.versionnumber() >= 11:
+ #Yield search results (using api) for query.
+ params = {
+ 'action': 'query',
+ 'list': 'search',
+ 'srsearch': q,
+ 'srlimit': number
+ }
+ if namespaces:
+ params['srnamespace'] = namespaces
+
+ offset = 0
+ while True:
+ params['sroffset'] = offset
+ data = query.GetData(params, self)['query']
+ if 'error' in data:
+ raise RuntimeError('%s' % data['error'])
+ if not data['search']:
+ break
+ for s in data['search']:
+ offset += 1
+ page = Page(self, s['title'])
+ yield page, s['snippet'], '', s['size'], s['wordcount'], s['timestamp']
else:
- _search = self._search_with_api
- return _search(query, number, namespaces)
+ #Yield search results (using Special:Search page) for query.
+ throttle = True
+ path = self.search_address(urllib.quote_plus(query.encode('utf-8')),
+ n=number, ns=namespaces)
+ get_throttle()
+ html = self.getUrl(path)
+ entryR = re.compile(ur'<li><a href=".+?" title="(?P<title>.+?)">.+?</a>',
+ re.DOTALL)
+ for m in entryR.finditer(html):
+ page = Page(self, m.group('title'))
+ yield page, '', '', '', '', ''
- def _search_with_api(self, q, number, namespaces):
- """Yield search results (using api) for query."""
- params = {
- 'action': 'query',
- 'list': 'search',
- 'srsearch': q,
- 'srlimit': number
- }
- if namespaces:
- params['srnamespace'] = namespaces
-
- offset = 0
- while True:
- params['sroffset'] = offset
- data = query.GetData(params, self)['query']
- if 'error' in data:
- raise RuntimeError('%s' % data['error'])
- if not data['search']:
- break
- for s in data['search']:
- offset += 1
- page = Page(self, s['title'])
- yield page, s['snippet'], '', s['size'], s['wordcount'], s['timestamp']
-
- def _search_without_api(self, query, number, namespaces):
- """Yield search results (using Special:Search page) for query."""
- throttle = True
- path = self.search_address(urllib.quote_plus(query.encode('utf-8')),
- n=number, ns=namespaces)
- get_throttle()
- html = self.getUrl(path)
-
- entryR = re.compile(ur'<li><a href=".+?" title="(?P<title>.+?)">.+?</a>',
- re.DOTALL)
-
- for m in entryR.finditer(html):
- page = Page(self, m.group('title'))
- yield page, '', '', '', '', ''
-
# TODO: avoid code duplication for the following methods
def logpages(self, number=50, mode='', user=None, repeat=False, namespace=[], offset=-1):
- if config.use_api:
- apiURL = self.api_address()
- del apiURL
- else:
- raise NotImplementedError
- if mode not in ('block', 'protect', 'rights', 'delete', 'upload',
+ if not self.has_api() or \
+ mode not in ('block', 'protect', 'rights', 'delete', 'upload',
'move', 'import', 'patrol', 'merge', 'suppress',
'review', 'stable', 'gblblock', 'renameuser',
'globalauth', 'gblrights', 'abusefilter', 'newusers'):
@@ -5986,14 +5966,9 @@
# should use both offset and limit parameters, and have an
# option to fetch older rather than newer pages
seen = set()
- try:
- d = self.apipath()
- del d
- except NotImplementedError:
- config.use_api = False
-
+ api = self.has_api()
while True:
- if config.use_api and self.versionnumber() >= 10:
+ if api and self.versionnumber() >= 10:
params = {
'action': 'query',
'list': 'recentchanges',
@@ -6668,18 +6643,12 @@
"""Yield Pages from results of Special:Linksearch for 'siteurl'."""
cache = []
R = re.compile('title ?=\"([^<>]*?)\">[^<>]*</a></li>')
- #Check API can work
- if config.use_api:
- try:
- d = self.api_address()
- del d
- except NotImplementedError:
- config.use_api = False
-
+ api = self.has_api()
urlsToRetrieve = [siteurl]
if not siteurl.startswith('*.'):
urlsToRetrieve.append('*.' + siteurl)
- if config.use_api and self.versionnumber() >= 11:
+
+ if api and self.versionnumber() >= 11:
output(u'Querying API exturlusage...')
for url in urlsToRetrieve:
params = {
Revision: 7986
Author: russblau
Date: 2010-03-11 14:54:20 +0000 (Thu, 11 Mar 2010)
Log Message:
-----------
Merge recent changes from trunk.
Modified Paths:
--------------
branches/rewrite/scripts/solve_disambiguation.py
Modified: branches/rewrite/scripts/solve_disambiguation.py
===================================================================
--- branches/rewrite/scripts/solve_disambiguation.py 2010-03-10 15:45:02 UTC (rev 7985)
+++ branches/rewrite/scripts/solve_disambiguation.py 2010-03-11 14:54:20 UTC (rev 7986)
@@ -27,6 +27,9 @@
-just only use the alternatives given on the command line, do not
read the page for other possibilities
+ -dnskip Skip links already marked with a disambiguation-needed
+ template (e.g., {{dn}})
+
-primary "primary topic" disambiguation (Begriffsklärung nach Modell 2).
That's titles where one topic is much more important, the
disambiguation page is saved somewhere else, and the important
@@ -55,7 +58,7 @@
wiki that is defined (to the bot) as the category containing
disambiguation pages, starting at XY. If only '-start' or
'-start:' is given, it starts at the beginning.
-
+
-min:XX (XX being a number) only work on disambiguation pages for which
at least XX are to be worked on.
@@ -191,6 +194,21 @@
'uk': u'Виправлення посилання на багатозначність за допомогою бота: %s вилучено',
}
+# Disambiguation Needed template
+dn_template = {
+ 'en' : u'{{dn}}',
+ }
+
+# Summary message when adding Disambiguation Needed template
+msg_dn = {
+ 'en' : u'Robot-assisted disambiguation: %s - Marked as needing expert attention',
+ }
+
+# Summary message when adding Disambiguation Needed template to a redirect link
+msg_redir_dn = {
+ 'en' : u'Robot-assisted disambiguation: %s - Marked as needing expert attention',
+ }
+
# Summary message to (unknown)
unknown_msg = {
'ar' : u'(غير معروف)',
@@ -417,7 +435,7 @@
self.primaryIgnoreManager = PrimaryIgnoreManager(disambPage,
enabled=primary)
self.minimum = minimum
-
+
def __iter__(self):
# TODO: start yielding before all referring pages have been found
refs = [page for page in
@@ -510,17 +528,18 @@
u'{{[Pp]rocessing}}',
),
}
-
+
primary_redir_template = {
# Page.templates() format, first letter uppercase
'hu': u'Egyért-redir',
}
-
- def __init__(self, always, alternatives, getAlternatives, generator,
+
+ def __init__(self, always, alternatives, getAlternatives, dnSkip, generator,
primary, main_only, minimum = 0):
self.always = always
self.alternatives = alternatives
self.getAlternatives = getAlternatives
+ self.dnSkip = dnSkip
self.generator = generator
self.primary = primary
self.main_only = main_only
@@ -579,9 +598,12 @@
# group linktrail is the link trail, that's letters after ]] which
# are part of the word.
# note that the definition of 'letter' varies from language to language.
- self.linkR = re.compile(
- r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>'
- + linktrail + ')')
+ self.linkR = re.compile(r'''
+ \[\[ (?P<title> [^\[\]\|#]*)
+ (?P<section> \#[^\]\|]*)?
+ (\|(?P<label> [^\]]*))? \]\]
+ (?P<linktrail>%s)''' % linktrail,
+ flags=re.X)
def treat(self, refPage, disambPage):
"""
@@ -591,10 +613,11 @@
refPage - A page linking to disambPage
Returns False if the user pressed q to completely quit the program.
Otherwise, returns True.
-
+
"""
# TODO: break this function up into subroutines!
+ dn_template_str = pywikibot.translate(self.mysite, dn_template)
include = False
unlink = False
new_targets = []
@@ -647,6 +670,7 @@
original_text = text
n = 0
curpos = 0
+ dn = False
edited = False
# This loop will run until we have finished the current page
while True:
@@ -670,11 +694,23 @@
if foundlink.site != disambPage.site():
continue
# check whether the link found is to disambPage
- if foundlink.canonical_title() != disambPage.title():
+ try:
+ if foundlink.canonical_title() != disambPage.title():
+ continue
+ except pywikibot.Error:
+ # must be a broken link
+ pywikibot.output("Invalid link [[%s]] in page [[%s]]"
+ % (m.group('title'), refPage.title()),
+ level=pywikibot.VERBOSE)
continue
n += 1
# how many bytes should be displayed around the current link
context = 60
+ #there's a {{dn}} here already
+ already_dn = text[m.end() : m.end() + 8].find(dn_template_str[:4]) > -1
+ if already_dn and self.dnSkip:
+ continue
+
# This loop will run while the user doesn't choose an option
# that will actually change the page
while True:
@@ -697,10 +733,12 @@
if edited:
choice = pywikibot.input(
u"Option (#, r#, s=skip link, e=edit page, n=next page, u=unlink, q=quit,\n"
+u" ?=tag with " + dn_template_str + ",\n"
u" m=more context, l=list, a=add new, x=save in this form):")
else:
choice = pywikibot.input(
u"Option (#, r#, s=skip link, e=edit page, n=next page, u=unlink, q=quit,\n"
+u" ?=tag with " + dn_template_str + ",\n"
u" m=more context, d=show disambiguation page, l=list, a=add new):")
else:
choice = self.always
@@ -775,7 +813,21 @@
if trailing_chars:
link_text += trailing_chars
- if choice in ['u', 'U']:
+ if choice in ['?', '/']:
+ #small chunk of text to search
+ search_text = text[m.end() : m.end() + context]
+ #figure out where the link (and sentance) ends, put note there
+ end_of_word_match = re.search("\s", search_text)
+ if end_of_word_match:
+ position_split = end_of_word_match.start(0)
+ else:
+ position_split = 0
+ #insert dab needed template
+ text = text[:m.end() + position_split] + dn_template_str \
+ + text[m.end() + position_split:]
+ dn = True
+ continue
+ elif choice in ['u', 'U']:
# unlink - we remove the section if there's any
text = text[:m.start()] + link_text + text[m.end():]
unlink = True
@@ -852,7 +904,7 @@
pywikibot.showDiff(original_text, text)
pywikibot.output(u'')
# save the page
- self.setSummaryMessage(disambPage, new_targets, unlink)
+ self.setSummaryMessage(disambPage, new_targets, unlink, dn)
try:
refPage.put_async(text,comment=self.comment)
except pywikibot.LockedPage:
@@ -936,7 +988,8 @@
self.alternatives += links
return True
- def setSummaryMessage(self, disambPage, new_targets=[], unlink=False):
+ def setSummaryMessage(self, disambPage, new_targets=[], unlink=False,
+ dn=False):
# make list of new targets
targets = ''
for page_title in new_targets:
@@ -968,23 +1021,25 @@
elif disambPage.isRedirectPage():
# when working on redirects, there's another summary message
if unlink and not new_targets:
- self.comment = pywikibot.translate(
- self.mysite,
- msg_redir_unlink
- ) % disambPage.title()
+ self.comment = pywikibot.translate(self.mysite,
+ msg_redir_unlink) \
+ % disambPage.title()
+ elif dn and not new_targets:
+ self.comment = pywikibot.translate(self.mysite, msg_redir_dn) \
+ % disambPage.title()
else:
- self.comment = pywikibot.translate(
- self.mysite, msg_redir
- ) % (disambPage.title(), targets)
+ self.comment = pywikibot.translate(self.mysite, msg_redir) \
+ % (disambPage.title(), targets)
else:
if unlink and not new_targets:
- self.comment = pywikibot.translate(
- self.mysite, msg_unlink
- ) % disambPage.title()
+ self.comment = pywikibot.translate(self.mysite, msg_unlink) \
+ % disambPage.title()
+ elif dn and not new_targets:
+ self.comment = pywikibot.translate(self.mysite, msg_dn) \
+ % disambPage.title()
else:
- self.comment = pywikibot.translate(
- self.mysite, msg
- ) % (disambPage.title(), targets)
+ self.comment = pywikibot.translate(self.mysite, msg) \
+ % (disambPage.title(), targets)
def run(self):
if self.main_only:
@@ -1029,6 +1084,7 @@
always = None
alternatives = []
getAlternatives = True
+ dnSkip = False
# if the -file argument is used, page titles are dumped in this array.
# otherwise it will only contain one page.
generator = None
@@ -1074,6 +1130,8 @@
alternatives.append(arg[5:])
elif arg == '-just':
getAlternatives = False
+ elif arg == '-dnskip':
+ dnSkip = True
elif arg == '-main':
main_only = True
elif arg.startswith('-min:'):
@@ -1106,7 +1164,7 @@
page = pywikibot.Page(pywikibot.Link(pageTitle, pywikibot.getSite()))
generator = iter([page])
- # if no disambiguation pages was given as an argument, and none was
+ # if no disambiguation page was given as an argument, and none was
# read from a file, query the user
if not generator:
pageTitle = pywikibot.input(
@@ -1114,12 +1172,12 @@
page = pywikibot.Page(pywikibot.Link(pageTitle, pywikibot.getSite()))
generator = iter([page])
- bot = DisambiguationRobot(always, alternatives, getAlternatives, generator,
- primary, main_only, minimum=minimum)
+ bot = DisambiguationRobot(always, alternatives, getAlternatives, dnSkip,
+ generator, primary, main_only,
+ minimum=minimum)
bot.run()
-
if __name__ == "__main__":
try:
main()
Revision: 7984
Author: russblau
Date: 2010-03-09 19:07:34 +0000 (Tue, 09 Mar 2010)
Log Message:
-----------
Implement Assert Edit extension (note: using assert=user, *not* assert=bot, because it may be possible to use the framework without a bot flag on some wikis); also convert various debug statements to pywikibot.output calls.
Modified Paths:
--------------
branches/rewrite/pywikibot/data/api.py
Modified: branches/rewrite/pywikibot/data/api.py
===================================================================
--- branches/rewrite/pywikibot/data/api.py 2010-03-09 08:14:34 UTC (rev 7983)
+++ branches/rewrite/pywikibot/data/api.py 2010-03-09 19:07:34 UTC (rev 7984)
@@ -79,7 +79,7 @@
Returns a dict containing the JSON data returned by the wiki. Normally,
one of the dict keys will be equal to the value of the 'action'
parameter. Errors are caught and raise an APIError exception.
-
+
Example:
>>> r = Request(site=mysite, action="query", meta="userinfo")
@@ -99,7 +99,7 @@
[u'query']
>>> data[u'query'].keys()
[u'userinfo', u'namespaces']
-
+
@param site: The Site to which the request will be submitted. If not
supplied, uses the user's configured default Site.
@param mime: If true, send in "multipart/form-data" format (default False)
@@ -123,6 +123,15 @@
if "action" not in kwargs:
raise ValueError("'action' specification missing from Request.")
self.update(**kwargs)
+ self.write = self.params["action"] in (
+ "edit", "move", "rollback", "delete", "undelete",
+ "protect", "block", "unblock", "watch", "patrol",
+ "import", "userrights", "upload"
+ )
+ if self.write:
+ pywikibot.output(u"Adding user assertion",
+ level=pywikibot.DEBUG)
+ self.params["assert"] = "user" # make sure user is logged in
# implement dict interface
def __getitem__(self, key):
@@ -151,7 +160,13 @@
for key in self.params:
if isinstance(self.params[key], basestring):
+ # convert a stringified sequence into a list
self.params[key] = self.params[key].split("|")
+ try:
+ iter(self.params[key])
+ except TypeError:
+ # convert any non-iterable value into a single-element list
+ self.params[key] = [str(self.params[key])]
if self.params["action"] == ['query']:
meta = self.params.get("meta", [])
if "userinfo" not in meta:
@@ -187,27 +202,23 @@
+ "/api.php?"
+ self.http_params()
)
-
+
def submit(self):
"""Submit a query and parse the response.
@return: The data retrieved from api.php (a dict)
-
+
"""
from pywikibot.comms import http
from email.mime.multipart import MIMEMultipart
from email.mime.nonmultipart import MIMENonMultipart
- params = self.http_params()
+ paramstring = self.http_params()
if self.site._loginstatus == -3:
self.site.login(False)
while True:
action = self.params.get("action", "")
- write = action in (
- "edit", "move", "rollback", "delete", "undelete",
- "protect", "block", "unblock"
- )
- self.site.throttle(write=write)
+ self.site.throttle(write=self.write)
uri = self.site.scriptpath() + "/api.php"
try:
ssl = False
@@ -256,22 +267,24 @@
rawdata = http.request(self.site, uri, ssl, method="POST",
headers={'Content-Type':
'application/x-www-form-urlencoded'},
- body=params)
+ body=paramstring)
except Server504Error:
- logger.debug(u"Caught 504 error")
+ pywikibot.output(u"Caught 504 error",
+ level=pywikibot.DEBUG)
raise
#TODO: what other exceptions can occur here?
except Exception, e:
# for any other error on the http request, wait and retry
pywikibot.output(traceback.format_exc(),
level=pywikibot.ERROR)
- pywikibot.output(u"%s, %s" % (uri, params),
+ pywikibot.output(u"%s, %s" % (uri, paramstring),
level=pywikibot.VERBOSE)
self.wait()
continue
if not isinstance(rawdata, unicode):
rawdata = rawdata.decode(self.site.encoding())
- logger.debug(u"API response received:\n" + rawdata)
+ pywikibot.output(u"API response received:\n" + rawdata,
+ level=pywikibot.DEBUG)
if rawdata.startswith(u"unknown_action"):
raise APIError(rawdata[:14], rawdata[16:])
try:
@@ -281,8 +294,10 @@
# problem. Wait a few seconds and try again
pywikibot.output(
"Non-JSON response received from server %s; the server may be down."
- % self.site, level=pywikibot.WARNING)
- logger.debug(rawdata)
+ % self.site,
+ level=pywikibot.WARNING)
+ pywikibot.output(rawdata,
+ level=pywikibot.DEBUG)
self.wait()
continue
if not result:
@@ -389,6 +404,7 @@
self.site = kwargs["site"]
except KeyError:
self.site = pywikibot.Site()
+ kwargs["site"] = self.site
# make sure request type is valid, and get limit key if any
for modtype in ("generator", "list", "prop", "meta"):
if modtype in kwargs:
@@ -417,7 +433,7 @@
def get_module(self):
"""Query api on self.site for paraminfo on querymodule=self.module"""
-
+
paramreq = Request(site=self.site, action="paraminfo",
querymodules=self.module)
data = paramreq.submit()
@@ -456,7 +472,7 @@
"""
self.limit = int(value)
-
+
def update_limit(self):
"""Set query_limit for self.module based on api response"""
@@ -472,9 +488,10 @@
self.query_limit = int(param["max"])
if self.prefix is None:
self.prefix = _modules[mod]["prefix"]
- logger.debug(u"%s: Set query_limit to %i."
- % (self.__class__.__name__,
- self.query_limit))
+ pywikibot.output(u"%s: Set query_limit to %i."
+ % (self.__class__.__name__,
+ self.query_limit),
+ level=pywikibot.DEBUG)
return
def set_namespace(self, namespaces):
@@ -522,27 +539,34 @@
self.set_query_increment(old_limit // 2)
continue
if not self.data or not isinstance(self.data, dict):
- logger.debug(
+ pywikibot.output(
u"%s: stopped iteration because no dict retrieved from api."
- % self.__class__.__name__)
+ % self.__class__.__name__,
+ level=pywikibot.DEBUG)
return
if not ("query" in self.data
and self.resultkey in self.data["query"]):
- logger.debug(
+ pywikibot.output(
u"%s: stopped iteration because 'query' and '%s' not found in api response."
- % (self.__class__.__name__, self.resultkey))
- logger.debug(unicode(self.data))
+ % (self.__class__.__name__, self.resultkey),
+ level=pywikibot.DEBUG)
+ pywikibot.output(unicode(self.data),
+ level=pywikibot.DEBUG)
return
resultdata = self.data["query"][self.resultkey]
if isinstance(resultdata, dict):
- logger.debug(u"%s received %s; limit=%s"
- % (self.__class__.__name__, resultdata.keys(),
- self.limit))
+ pywikibot.output(u"%s received %s; limit=%s"
+ % (self.__class__.__name__,
+ resultdata.keys(),
+ self.limit),
+ level=pywikibot.DEBUG)
resultdata = [resultdata[k] for k in sorted(resultdata.keys())]
else:
- logger.debug(u"%s received %s; limit=%s"
- % (self.__class__.__name__, resultdata,
- self.limit))
+ pywikibot.output(u"%s received %s; limit=%s"
+ % (self.__class__.__name__,
+ resultdata,
+ self.limit),
+ level=pywikibot.DEBUG)
if "normalized" in self.data["query"]:
self.normalized = dict((item['to'], item['from'])
for item in
@@ -581,13 +605,13 @@
This class can be used for any of the query types that are listed in the
API documentation as being able to be used as a generator. Instances of
this class iterate Page objects.
-
+
"""
def __init__(self, generator, **kwargs):
"""
Required and optional parameters are as for C{Request}, except that
action=query is assumed and generator is required.
-
+
@param generator: the "generator=" type from api.php
@type generator: str
@@ -614,7 +638,7 @@
This can be overridden in subclasses to return a different type
of object.
-
+
"""
p = pywikibot.Page(self.site, pagedata['title'], pagedata['ns'])
update_page(p, pagedata)
@@ -657,7 +681,7 @@
"""
Required and optional parameters are as for C{Request}, except that
action=query is assumed and prop is required.
-
+
@param prop: the "property=" type from api.php
@type prop: str
@@ -684,7 +708,7 @@
"""
Required and optional parameters are as for C{Request}, except that
action=query is assumed and listaction is required.
-
+
@param listaction: the "list=" type from api.php
@type listaction: str
@@ -700,7 +724,7 @@
def __init__(self, logtype, **kwargs):
ListGenerator.__init__(self, "logevents", **kwargs)
- import logentries
+ import logentries
self.entryFactory = logentries.LogEntryFactory(logtype)
def result(self, pagedata):
@@ -715,7 +739,7 @@
Parameters are all ignored.
Returns cookie data if succesful, None otherwise.
-
+
"""
if hasattr(self, '_waituntil'):
if datetime.now() < self._waituntil: