http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9912
Revision: 9912
Author: drtrigon
Date: 2012-02-19 20:39:42 +0000 (Sun, 19 Feb 2012)
Log Message:
-----------
removed artifact from DrTrigonBot
Modified Paths:
--------------
trunk/pywikipedia/clean_sandbox.py
Modified: trunk/pywikipedia/clean_sandbox.py
===================================================================
--- trunk/pywikipedia/clean_sandbox.py 2012-02-19 14:24:41 UTC (rev 9911)
+++ trunk/pywikipedia/clean_sandbox.py 2012-02-19 20:39:42 UTC (rev 9912)
@@ -185,7 +185,7 @@
if self.user:
endpos = pos + len(translatedContent.strip())
if (pos < 0) or (endpos == len(text)):
- pywikibot.output(u'The user sandbox is still clean or not set up, no change necessary.')
+ pywikibot.output(u'The user sandbox is still clean, no change necessary.')
else:
sandboxPage.put(text[:endpos], translatedMsg)
pywikibot.output(u'Standard content was changed, user sandbox cleaned.')
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9911
Revision: 9911
Author: drtrigon
Date: 2012-02-19 14:24:41 +0000 (Sun, 19 Feb 2012)
Log Message:
-----------
Adding capabilities of DrTrigonBot 'textlib' script; 'removeHTMLParts'
(this is a follow-up or bug fix for r9902 also)
Modified Paths:
--------------
trunk/pywikipedia/pywikibot/textlib.py
Modified: trunk/pywikipedia/pywikibot/textlib.py
===================================================================
--- trunk/pywikipedia/pywikibot/textlib.py 2012-02-19 12:49:35 UTC (rev 9910)
+++ trunk/pywikipedia/pywikibot/textlib.py 2012-02-19 14:24:41 UTC (rev 9911)
@@ -16,6 +16,7 @@
import wikipedia as pywikibot
import re
+from HTMLParser import HTMLParser
def unescape(s):
"""Replace escaped HTML-special characters by their originals"""
@@ -219,6 +220,40 @@
return toRemoveR.sub('', text)
+def removeHTMLParts(text, keeptags = ['tt', 'nowiki', 'small', 'sup']):
+ """
+ Return text without portions where HTML markup is disabled
+
+ Parts that can/will be removed are --
+ * HTML and all wiki tags
+
+ The exact set of parts which should NOT be removed can be passed as the
+ 'keeptags' parameter, which defaults to ['tt', 'nowiki', 'small', 'sup'].
+ """
+ # try to merge with 'removeDisabledParts()' above into one generic function
+
+ # thanks to http://www.hellboundhackers.org/articles/841-using-python-39;s-htmlparser-c…
+ parser = _GetDataHTML()
+ parser.keeptags = keeptags
+ parser.feed(text)
+ parser.close()
+ return parser.textdata
+
+# thanks to http://docs.python.org/library/htmlparser.html
+class _GetDataHTML(HTMLParser):
+ textdata = u''
+ keeptags = []
+
+ def handle_data(self, data):
+ self.textdata += data
+
+ def handle_starttag(self, tag, attrs):
+ if tag in self.keeptags: self.textdata += u"<%s>" % tag
+
+ def handle_endtag(self, tag):
+ if tag in self.keeptags: self.textdata += u"</%s>" % tag
+
+
def isDisabled(text, index, tags = ['*']):
"""
Return True if text[index] is disabled, e.g. by a comment or by nowiki tags.
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9909
Revision: 9909
Author: drtrigon
Date: 2012-02-17 23:09:15 +0000 (Fri, 17 Feb 2012)
Log Message:
-----------
follow-up to r9905 '-debug' replaced with new '-simulate' that allows switching off write access
Modified Paths:
--------------
trunk/pywikipedia/clean_sandbox.py
trunk/pywikipedia/config.py
trunk/pywikipedia/query.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/clean_sandbox.py
===================================================================
--- trunk/pywikipedia/clean_sandbox.py 2012-02-17 20:13:21 UTC (rev 9908)
+++ trunk/pywikipedia/clean_sandbox.py 2012-02-17 23:09:15 UTC (rev 9909)
@@ -187,12 +187,10 @@
if (pos < 0) or (endpos == len(text)):
pywikibot.output(u'The user sandbox is still clean or not set up, no change necessary.')
else:
- if not pywikibot.debug:
- sandboxPage.put(text[:endpos], translatedMsg)
+ sandboxPage.put(text[:endpos], translatedMsg)
pywikibot.output(u'Standard content was changed, user sandbox cleaned.')
else:
- if not pywikibot.debug:
- sandboxPage.put(translatedContent, translatedMsg)
+ sandboxPage.put(translatedContent, translatedMsg)
pywikibot.output(u'Standard content was changed, sandbox cleaned.')
else:
diff = minutesDiff(sandboxPage.editTime(), time.strftime("%Y%m%d%H%M%S", time.gmtime()))
@@ -200,8 +198,7 @@
print sandboxPage.editTime(), time.strftime("%Y%m%d%H%M%S", time.gmtime())
#Is the last edit more than 5 minutes ago?
if diff >= self.delay:
- if not pywikibot.debug:
- sandboxPage.put(translatedContent, translatedMsg)
+ sandboxPage.put(translatedContent, translatedMsg)
else: #wait for the rest
pywikibot.output(u'Sleeping for %d minutes.' % (self.delay-diff))
time.sleep((self.delay-diff)*60)
@@ -238,9 +235,6 @@
pywikibot.showHelp('clean_sandbox')
return
- if pywikibot.debug:
- pywikibot.output(u'\03{lightyellow}DEBUG: write actions blocked.\03{default}')
-
bot = SandboxBot(hours, no_repeat, delay, user)
try:
bot.run()
Modified: trunk/pywikipedia/config.py
===================================================================
--- trunk/pywikipedia/config.py 2012-02-17 20:13:21 UTC (rev 9908)
+++ trunk/pywikipedia/config.py 2012-02-17 23:09:15 UTC (rev 9909)
@@ -464,6 +464,13 @@
# up to 30 minutes)
retry_on_fail = True
+# Defines what actions the bots are NOT allowed to do (e.g. 'edit') on wikipedia
+# servers. Setting this to:
+# actions_to_block = ['edit', 'watch', 'move', 'delete', 'undelete', 'protect']
+# allows simulation runs of bots to be carried out without changing any page on
+# the server side.
+actions_to_block = []
+
# How many pages should be put to a queue in asynchroneous mode.
# If maxsize is <= 0, the queue size is infinite.
# Increasing this value will increase memory space but could speed up
Modified: trunk/pywikipedia/query.py
===================================================================
--- trunk/pywikipedia/query.py 2012-02-17 20:13:21 UTC (rev 9908)
+++ trunk/pywikipedia/query.py 2012-02-17 23:09:15 UTC (rev 9909)
@@ -42,6 +42,18 @@
sysop=False, back_response=False):
"""Get data from the query api, and convert it into a data object
"""
+ if ('action' in params) and (params['action'] in pywikibot.config.actions_to_block):
+ pywikibot.output(u'\03{lightyellow}SIMULATION: %s action blocked.\03{default}'%\
+ params['action'])
+ jsontext_dummy = {params['action']: {u'result':u''}}
+ if back_response:
+ import StringIO
+ res_dummy = StringIO.StringIO()
+ res_dummy.__dict__.update({u'code': 0, u'msg': u''})
+ return res_dummy, jsontext_dummy
+ else:
+ return jsontext_dummy
+
if not site:
site = pywikibot.getSite()
data = {}
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2012-02-17 20:13:21 UTC (rev 9908)
+++ trunk/pywikipedia/wikipedia.py 2012-02-17 23:09:15 UTC (rev 9909)
@@ -7762,6 +7762,9 @@
elif arg == '-debug':
debug = True
config.special_page_limit = 500
+ elif arg == '-simulate':
+ config.actions_to_block = ['edit', 'watch', 'move', 'delete',
+ 'undelete', 'protect']
else:
# the argument is not global. Let the specific bot script care
# about it.
@@ -7827,10 +7830,14 @@
-verbose Have the bot provide additional output that may be
-v useful in debugging.
+-debug
-cosmeticchanges Toggles the cosmetic_changes setting made in config.py or
-cc user_config.py to its inverse and overrules it. All other
settings and restrictions are untouched.
+
+-simulate Toggles writing to the wikipedia server. Useful for testing
+ and debugging of new code.
'''# % moduleName
output(globalHelp, toStdout=True)
try:
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9908
Revision: 9908
Author: drtrigon
Date: 2012-02-17 20:13:21 +0000 (Fri, 17 Feb 2012)
Log Message:
-----------
follow-up to r9899 more and clearer docu
Modified Paths:
--------------
trunk/pywikipedia/clean_sandbox.py
Modified: trunk/pywikipedia/clean_sandbox.py
===================================================================
--- trunk/pywikipedia/clean_sandbox.py 2012-02-17 19:56:22 UTC (rev 9907)
+++ trunk/pywikipedia/clean_sandbox.py 2012-02-17 20:13:21 UTC (rev 9908)
@@ -19,8 +19,9 @@
> ATTENTION: on most wiki THIS IS FORBIDEN FOR BOTS ! <
> (please talk with your admin first) <
Since it is considered bad style to edit user page with-
- out permission, the 'user_sandboxTitle' for given
+ out permission, the 'user_sandboxTemplate' for given
language has to be set-up (no fall-back will be used).
+ All pages containing that template will get cleaned.
Please be also aware that the rules when to clean the
user sandbox differ from those for project sandbox.
@@ -118,7 +119,7 @@
'de': u'{{Benutzer:DrTrigonBot/Spielwiese}}',
}
-user_sandboxTitle = {
+user_sandboxTemplate = {
'de': u'User:DrTrigonBot/Spielwiese',
}
@@ -133,12 +134,12 @@
self.user = user
self.site = pywikibot.getSite()
if self.user:
- localSandboxTitle = pywikibot.translate(self.site, user_sandboxTitle)
+ localSandboxTitle = pywikibot.translate(self.site, user_sandboxTemplate)
localSandbox = pywikibot.Page(self.site, localSandboxTitle)
content.update(user_content)
sandboxTitle[self.site.lang] = [item.title() \
for item in localSandbox.getReferences(onlyTemplateInclusion=True)]
- if self.site.lang not in user_sandboxTitle:
+ if self.site.lang not in user_sandboxTemplate:
sandboxTitle[self.site.lang] = []
pywikibot.output(u'Not properly set-up to run in user namespace!')
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9907
Revision: 9907
Author: binbot
Date: 2012-02-17 19:56:22 +0000 (Fri, 17 Feb 2012)
Log Message:
-----------
Nothing serious. :-) Minor corrections in comments.
Modified Paths:
--------------
trunk/pywikipedia/cosmetic_changes.py
Modified: trunk/pywikipedia/cosmetic_changes.py
===================================================================
--- trunk/pywikipedia/cosmetic_changes.py 2012-02-17 16:36:18 UTC (rev 9906)
+++ trunk/pywikipedia/cosmetic_changes.py 2012-02-17 19:56:22 UTC (rev 9907)
@@ -51,7 +51,7 @@
"""
#
# (C) xqt, 2009-2011
-# (C) Pywikipedia bot team, 2006-2010
+# (C) Pywikipedia bot team, 2006-2012
#
# Distributed under the terms of the MIT license.
#
@@ -66,9 +66,9 @@
warning = """
ATTENTION: You can run this script as a stand-alone for testing purposes.
-However, the changes are that are made are only minor, and other users
+However, the changes that are made are only minor, and other users
might get angry if you fill the version histories and watchlists with such
-irrelevant changes."""
+irrelevant changes. Some wikis prohibit stand-alone running."""
docuReplacements = {
'¶ms;': pagegenerators.parameterHelp,
@@ -235,7 +235,7 @@
# The PyWikipediaBot is no longer allowed to touch categories on the
# German Wikipedia. See
- # http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/bis_2006…
+ # http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Positi…
# ignoring nn-wiki of cause of the comment line above iw section
if not self.template and not '{{Personendaten' in text:
categories = pywikibot.getCategoryLinks(text, site = self.site)
@@ -392,7 +392,8 @@
return match.group()
# Remove unnecessary initial and final spaces from label.
- # Please note that some editors prefer spaces around pipes. (See [[en:Wikipedia:Semi-bots]]). We remove them anyway.
+ # Please note that some editors prefer spaces around pipes.
+ #(See [[en:Wikipedia:Semi-bots]]). We remove them anyway.
if label is not None:
# Remove unnecessary leading spaces from label,
# but remember if we did this because we want
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9904
Revision: 9904
Author: drtrigon
Date: 2012-02-17 13:29:07 +0000 (Fri, 17 Feb 2012)
Log Message:
-----------
bug fix; scope has changed, thus output was not working
Modified Paths:
--------------
trunk/pywikipedia/pywikibot/comms/http.py
Modified: trunk/pywikipedia/pywikibot/comms/http.py
===================================================================
--- trunk/pywikipedia/pywikibot/comms/http.py 2012-02-17 12:19:57 UTC (rev 9903)
+++ trunk/pywikipedia/pywikibot/comms/http.py 2012-02-17 13:29:07 UTC (rev 9904)
@@ -102,12 +102,12 @@
u'Page %s could not be retrieved. Check your virus wall.'
% url)
elif e.code == 504:
- output(u'HTTPError: %s %s' % (e.code, e.msg))
+ pywikibot.output(u'HTTPError: %s %s' % (e.code, e.msg))
if retry:
retry_attempt += 1
if retry_attempt > config.maxretries:
raise MaxTriesExceededError()
- output(
+ pywikibot.output(
u"WARNING: Could not open '%s'.Maybe the server or\n your connection is down. Retrying in %i minutes..."
% (url, retry_idle_time))
time.sleep(retry_idle_time * 60)
@@ -119,15 +119,15 @@
continue
raise
else:
- output(u"Result: %s %s" % (e.code, e.msg))
+ pywikibot.output(u"Result: %s %s" % (e.code, e.msg))
raise
except Exception, e:
- output(u'%s' %e)
+ pywikibot.output(u'%s' %e)
if retry:
retry_attempt += 1
if retry_attempt > config.maxretries:
raise MaxTriesExceededError()
- output(
+ pywikibot.output(
u"WARNING: Could not open '%s'. Maybe the server or\n your connection is down. Retrying in %i minutes..."
% (url, retry_idle_time))
time.sleep(retry_idle_time * 60)
@@ -160,7 +160,7 @@
# We need to split it to get a value
content_length = int(headers.get('content-length', '0').split(',')[0])
if content_length != len(text) and 'content-length' in headers:
- output(
+ pywikibot.output(
u'Warning! len(text) does not match content-length: %s != %s'
% (len(text), content_length))
return request(site, uri, retry, sysop, data, compress, no_hostname,
@@ -175,7 +175,7 @@
charset = m.group(1)
else:
if verbose:
- output(u"WARNING: No character set found.")
+ pywikibot.output(u"WARNING: No character set found.")
# UTF-8 as default
charset = 'utf-8'
# Check if this is the charset we expected
@@ -186,9 +186,9 @@
except UnicodeDecodeError, e:
print e
if no_hostname:
- output(u'ERROR: Invalid characters found on %s, replaced by \\ufffd.' % uri)
+ pywikibot.output(u'ERROR: Invalid characters found on %s, replaced by \\ufffd.' % uri)
else:
- output(u'ERROR: Invalid characters found on %s://%s%s, replaced by \\ufffd.' % (site.protocol(), site.hostname(), uri))
+ pywikibot.output(u'ERROR: Invalid characters found on %s://%s%s, replaced by \\ufffd.' % (site.protocol(), site.hostname(), uri))
# We use error='replace' in case of bad encoding.
text = unicode(text, charset, errors = 'replace')