Revision: 7346
Author: siebrand
Date: 2009-10-02 09:22:46 +0000 (Fri, 02 Oct 2009)
Log Message:
-----------
Preparing for 'rewrite branch readiness' changes
Added Paths:
-----------
branches/rewrite/scripts/blockpageschecker.py
Copied: branches/rewrite/scripts/blockpageschecker.py (from rev 7345, trunk/pywikipedia/blockpageschecker.py)
===================================================================
--- branches/rewrite/scripts/blockpageschecker.py (rev 0)
+++ branches/rewrite/scripts/blockpageschecker.py 2009-10-02 09:22:46 UTC (rev 7346)
@@ -0,0 +1,439 @@
+# -*- coding: utf-8 -*-
+"""
+This is a script originally written by Wikihermit and then rewritten by Filnik,
+to delete the templates used to warn in the pages that a page is blocked,
+when the page isn't blocked at all. Indeed, very often sysops block the pages
+for a setted time but then the forget to delete the warning! This script is useful
+if you want to delete those useless warning left in these pages.
+
+Parameters:
+
+These command line parameters can be used to specify which pages to work on:
+
+¶ms;
+
+-xml Retrieve information from a local XML dump (pages-articles
+ or pages-meta-current, see http://download.wikimedia.org).
+ Argument can also be given as "-xml:filename".
+
+-page Only edit a specific page.
+ Argument can also be given as "-page:pagetitle". You can
+ give this parameter multiple times to edit multiple pages.
+
+-protectedpages: Check all the blocked pages (useful when you have not categories
+ or when you have problems with them. (add the namespace after ":" where
+ you want to check - default checks all protected pages)
+
+-moveprotected: Same as -protectedpages, for moveprotected pages
+
+Furthermore, the following command line parameters are supported:
+
+-always Doesn't ask every time if the bot should make the change or not, do it always.
+
+-debug When the bot can't delete the template from the page (wrong regex or something like that)
+ it will ask you if it should open the page on your browser.
+ (attention: pages included may give false positives..)
+
+-move The bot will check if the page is blocked also for the move option, not only for edit
+
+--- Warning! ---
+You have to edit this script in order to add your preferences
+otherwise the script won't work!
+
+If you have problems, ask on botwiki ( http://botwiki.sno.cc )
+or on IRC (#pywikipediabot)
+
+--- Example of how to use the script ---
+
+python blockpageschecker.py -always
+
+python blockpageschecker.py -cat:Geography -always
+
+python blockpageschecker.py -debug -protectedpages:4
+
+"""
+#
+# (C) Monobi a.k.a. Wikihermit, 2007
+# (C) Filnik, 2007-2008-2009
+# (C) NicDumZ, 2008
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id: blockpageschecker.py,v 1.5 2008/04/24 19.40.00 filnik Exp$'
+#
+
+import re, webbrowser
+import wikipedia, catlib, pagegenerators, config
+
+# This is required for the text that is shown when you run this script
+# with the parameter -help.
+docuReplacements = {
+ '¶ms;': pagegenerators.parameterHelp,
+}
+
+#######################################################
+#--------------------- PREFERENCES -------------------#
+################### -- Edit below! -- #################
+
+templateSemiProtection = {
+ 'en': None,
+ 'it':[r'\{\{(?:[Tt]emplate:|)[Aa]vvisobloccoparziale(?:|[ _]scad\|.*?|\|.*?)\}\}',
+ r'\{\{(?:[Tt]emplate:|)[Aa]bp(?:|[ _]scad\|(?:.*?))\}\}'],
+ 'fr': [ur'\{\{(?:[Tt]emplate:|[Mm]odèle:|)[Ss]emi[- ]?protection(|[^\}]*)\}\}'],
+ 'ja':[ur'(?<!\<nowiki\>)\{\{(?:[Tt]emplate:|)半保護(?:[Ss]|)(?:\|.+|)\}\}(?!\<\/nowiki\>)\s*(?:\r\n|)*'],
+ 'zh':[ur'\{\{(?:[Tt]emplate:|)Protected|(?:[Ss]|[Ss]emi|半)(?:\|.+|)\}\}(\n+?|)',ur'\{\{(?:[Tt]emplate:|)Mini-protected|(?:[Ss]|[Ss]emi|半)(?:\|.+|)\}\}(\n+?|)',ur'\{\{(?:[Tt]emplate:|)Protected-logo|(?:[Ss]|[Ss]emi|半)(?:\|.+|)\}\}(\n+?|)'],
+ }
+# Regex to get the total-protection template
+templateTotalProtection = {
+ 'en': None,
+ 'it':[r'\{\{(?:[Tt]emplate:|)[Aa]vvisoblocco(?:|[ _]scad\|(?:.*?)|minaccia|cancellata)\}\}',
+ r'\{\{(?:[Tt]emplate:|)(?:[Cc][Tt]|[Cc]anc fatte|[Cc][Ee])\}\}', r'<div class="toccolours[ _]itwiki[ _]template[ _]avviso">(?:\s|\n)*?[Qq]uesta pagina'],
+ 'fr':[ur'\{\{(?:[Tt]emplate:|[Mm]odèle:|)[Pp]rotection(|[^\}]*)\}\}',
+ ur'\{\{(?:[Tt]emplate:|[Mm]odèle:|)(?:[Pp]age|[Aa]rchive|[Mm]odèle) protégée?(|[^\}]*)\}\}'],
+ 'ja':[ur'(?<!\<nowiki\>)\{\{(?:[Tt]emplate:|)保護(?:性急|)(?:[Ss]|)(?:\|.+|)\}\}(?!\<\/nowiki\>)\s*(?:\r\n|)*'],
+ 'zh':[r'\{\{(?:[Tt]emplate:|)Protected|(?:[Nn]|[Nn]ormal)(?:\|.+|)\}\}(\n+?|)',r'\{\{(?:[Tt]emplate:|)Mini-protected|(?:[Nn]|[Nn]ormal)(?:\|.+|)\}\}(\n+?|)',r'\{\{(?:[Tt]emplate:|)Protected-logo|(?:[Nn]|[Nn]ormal)(?:\|.+|)\}\}(\n+?|)'],
+ }
+# Regex to get the semi-protection move template
+templateSemiMoveProtection = {
+ 'en': None,
+ 'it':[r'\{\{(?:[Tt]emplate:|)[Aa]vvisobloccospostamento(?:|[ _]scad\|.*?|\|.*?)\}\}'],
+ 'ja':[ur'(?<!\<nowiki\>)\{\{(?:[Tt]emplate:|)移動半保護(?:[Ss]|)(?:\|.+|)\}\}(?!\<\/nowiki\>)\s*(?:\r\n|)*'],
+ 'zh':[r'\{\{(?:[Tt]emplate:|)Protected|(?:MS|ms)(?:\|.+|)\}\}(\n+?|)',r'\{\{(?:[Tt]emplate:|)Mini-protected|(?:MS|ms)(?:\|.+|)\}\}(\n+?|)',r'\{\{(?:[Tt]emplate:|)Protected-logo|(?:MS|ms)(?:\|.+|)\}\}(\n+?|)'],
+ }
+# Regex to get the total-protection move template
+templateTotalMoveProtection = {
+ 'en': None,
+ 'it':[r'\{\{(?:[Tt]emplate:|)[Aa]vvisobloccospostamento(?:|[ _]scad\|.*?|\|.*?)\}\}'],
+ 'ja':[ur'(?<!\<nowiki\>)\{\{(?:[Tt]emplate:|)移動保護(?:[Ss]|)(?:\|.+|)\}\}(?!\<\/nowiki\>)\s*(?:\r\n|)*'],
+ 'zh':[ur'\{\{(?:[Tt]emplate:|)Protected|(?:[Mm]|[Mm]ove|移[動动])(?:\|.+|)\}\}(\n+?|)',ur'\{\{(?:[Tt]emplate:|)Mini-protected|(?:[Mm]|[Mm]ove|移[動动])(?:\|.+|)\}\}(\n+?|)',ur'\{\{(?:[Tt]emplate:|)Protected-logo|(?:[Mm]|[Mm]ove|移[動动])(?:\|.+|)\}\}(\n+?|)'],
+ }
+
+# If you use only one template for all the type of protection, put it here.
+# You may use only one template or an unique template and some other "old" template that the
+# script should still check (as on it.wikipedia)
+templateUnique = {
+ 'en': None,
+ 'it': [r'\{\{(?:[Tt]emplate:|)[Pp]rotetta\}\}'],
+}
+
+# Array: 0 => Semi-block, 1 => Total Block, 2 => Semi-Move, 3 => Total-Move, 4 => template-unique
+templateNoRegex = {
+ 'it':['{{Avvisobloccoparziale}}', '{{Avvisoblocco}}', None, None, '{{Protetta}}'],
+ 'fr':['{{Semi-protection}}', '{{Protection}}', None, None, None],
+ 'ja':[u'{{半保護}}', u'{{保護}}', u'{{移動半保護}}', u'{{移動保護}}', None],
+ 'zh':[u'{{Protected/semi}}',u'{{Protected}}',u'{{Protected/ms}}',u'{{Protected/move}}', None],
+ }
+
+# Category where the bot will check
+categoryToCheck = {
+ 'en':[u'Category:Protected'],
+ 'ar':[u'تصنيف:محتويات محمية'],
+ 'fr':[u'Category:Page semi-protégée', u'Category:Page protégée', u'Catégorie:Article protégé'],
+ 'he':[u'קטגוריה:ויקיפדיה: דפים מוגנים', u'קטגוריה:ויקיפדיה: דפים מוגנים חלקית'],
+ 'it':[u'Categoria:Pagine protette - scadute', u'Categoria:Pagine semiprotette', u'Categoria:Voci protette'],
+ 'ja':[u'Category:編集保護中の記事',u'Category:編集半保護中の記事',
+ u'Category:移動保護中の記事',],
+ 'pt':[u'Category:!Páginas protegidas', u'Category:!Páginas semiprotegidas'],
+ 'zh':[u'Category:被保护的页面',u'Category:被保護的模板',u'Category:暂时不能移动的页面',
+ u'Category:被半保护的页面',],
+ }
+# Comment used when the Bot edits
+comment = {
+ 'en':u'Bot: Deleting out-dated template',
+ 'ar':u'بوت: حذف قالب قديم',
+ 'fr':u'Robot: Mise à jour des bandeaux de protection',
+ 'he':u'בוט: מסיר תבנית שעבר זמנה',
+ 'it':u'Bot: Tolgo o sistemo template di avviso blocco',
+ 'ja':u'ロボットによる: 保護テンプレート除去',
+ 'pt':u'Bot: Retirando predefinição de proteção',
+ 'zh':u'機器人: 移除過期的保護模板',
+ }
+# Check list to block the users that haven't set their preferences
+project_inserted = ['en', 'fr', 'it', 'ja', 'pt', 'zh']
+
+#######################################################
+#------------------ END PREFERENCES ------------------#
+################## -- Edit above! -- ##################
+
+def understandBlock(text, TTP, TSP, TSMP, TTMP, TU):
+ """ Understand if the page is blocked and if it has the right template """
+ if TTP != None:
+ for catchRegex in TTP: # TTP = templateTotalProtection
+ resultCatch = re.findall(catchRegex, text)
+ if resultCatch:
+ return ('sysop-total', catchRegex)
+ if TSP != None:
+ for catchRegex in TSP:
+ resultCatch = re.findall(catchRegex, text)
+ if resultCatch:
+ return ('autoconfirmed-total', catchRegex)
+ if TU != None:
+ for catchRegex in TU:
+ resultCatch = re.findall(catchRegex, text)
+ if resultCatch:
+ return ('unique', catchRegex)
+ if TSMP != None and TTMP != None and TTP != TTMP and TSP != TSMP:
+ for catchRegex in TTMP:
+ resultCatch = re.findall(catchRegex, text)
+ if resultCatch:
+ return ('sysop-move', catchRegex)
+ for catchRegex in TSMP:
+ resultCatch = re.findall(catchRegex, text)
+ if resultCatch:
+ return ('autoconfirmed-move', catchRegex)
+ return ('editable', r'\A\n') # If editable means that we have no regex, won't change anything with this regex
+
+def debugQuest(site, page):
+ quest = wikipedia.input(u'Do you want to open the page on your [b]rowser, [g]ui or [n]othing?')
+ pathWiki = site.family.nicepath(site.lang)
+ url = 'http://%s%s%s?&redirect=no' % (wikipedia.getSite().hostname(), pathWiki, page.urlname())
+ while 1:
+ if quest.lower() in ['b', 'B']:
+ webbrowser.open(url)
+ break
+ elif quest.lower() in ['g', 'G']:
+ import editarticle
+ editor = editarticle.TextEditor()
+ text = editor.edit(page.get())
+ break
+ elif quest.lower() in ['n', 'N']:
+ break
+ else:
+ wikipedia.output(u'wrong entry, type "b", "g" or "n"')
+ continue
+
+def main():
+ """ Main Function """
+ # Loading the comments
+ global categoryToCheck; global comment; global project_inserted
+ if config.mylang not in project_inserted:
+ wikipedia.output(u"Your project is not supported by this script. You have to edit the script and add it!")
+ return
+ # always, define a generator to understand if the user sets one, defining what's genFactory
+ always = False; generator = False; debug = False
+ moveBlockCheck = False; genFactory = pagegenerators.GeneratorFactory()
+ # To prevent Infinite loops
+ errorCount = 0
+ # Load the right site
+ site = wikipedia.getSite()
+ # Loading the default options.
+ for arg in wikipedia.handleArgs():
+ if arg == '-always':
+ always = True
+ elif arg == '-move':
+ moveBlockCheck = True
+ elif arg == '-debug':
+ debug = True
+ elif arg.startswith('-protectedpages'):
+ if len(arg) == 15:
+ generator = site.protectedpages(namespace = 0)
+ else:
+ generator = site.protectedpages(namespace = int(arg[16:]))
+ elif arg.startswith('-moveprotected'):
+ if len(arg) == 14:
+ generator = site.protectedpages(namespace = 0, type = 'move')
+ else:
+ generator = site.protectedpages(namespace = int(arg[16:]),
+ type = 'move')
+ elif arg.startswith('-page'):
+ if len(arg) == 5:
+ generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))]
+ else:
+ generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])]
+ else:
+ genFactory.handleArg(arg)
+
+ # Take the right templates to use, the category and the comment
+ TSP = wikipedia.translate(site, templateSemiProtection)
+ TTP = wikipedia.translate(site, templateTotalProtection)
+ TSMP = wikipedia.translate(site, templateSemiMoveProtection)
+ TTMP = wikipedia.translate(site, templateTotalMoveProtection)
+ TNR = wikipedia.translate(site, templateNoRegex)
+ TU = wikipedia.translate(site, templateUnique)
+
+ category = wikipedia.translate(site, categoryToCheck)
+ commentUsed = wikipedia.translate(site, comment)
+ if not generator:
+ gen = genFactory.getCombinedGenerator()
+ if not generator:
+ generator = list()
+ wikipedia.output(u'Loading categories...')
+ # Define the category if no other generator has been setted
+ for CAT in category:
+ cat = catlib.Category(site, CAT)
+ # Define the generator
+ gen = pagegenerators.CategorizedPageGenerator(cat)
+ for pageCat in gen:
+ generator.append(pageCat)
+ wikipedia.output(u'Categories loaded, start!')
+ # Main Loop
+ preloadingGen = pagegenerators.PreloadingGenerator(generator, pageNumber = 60)
+ for page in preloadingGen:
+ pagename = page.aslink()
+ wikipedia.output('Loading %s...' % pagename)
+ try:
+ text = page.get()
+ restrictions = page.getRestrictions()
+ except wikipedia.NoPage:
+ wikipedia.output("%s doesn't exist! Skipping..." % pagename)
+ continue
+ except wikipedia.IsRedirectPage:
+ wikipedia.output("%s is a redirect! Skipping..." % pagename)
+ if debug:
+ debugQuest(site, page)
+ continue
+ """
+ # This check does not work :
+ # PreloadingGenerator cannot set correctly page.editRestriction
+ # (see bug #1949476 )
+ if not page.canBeEdited():
+ wikipedia.output("%s is sysop-protected : this account can't edit it! Skipping..." % pagename)
+ continue
+ """
+ editRestr = restrictions['edit']
+ if editRestr and editRestr[0] == 'sysop':
+ try:
+ config.sysopnames[site.family.name][site.lang]
+ except:
+ wikipedia.output("%s is sysop-protected : this account can't edit it! Skipping..." % pagename)
+ continue
+
+ # Understand, according to the template in the page, what should be the protection
+ # and compare it with what there really is.
+ TemplateInThePage = understandBlock(text, TTP, TSP, TSMP, TTMP, TU)
+ # Only to see if the text is the same or not...
+ oldtext = text
+ # keep track of the changes for each step (edit then move)
+ changes = -1
+
+ if not editRestr:
+ # page is not edit-protected
+ # Deleting the template because the page doesn't need it.
+ if TU != None:
+ replaceToPerform = u'|'.join(TTP + TSP + TU)
+ else:
+ replaceToPerform = u'|'.join(TTP + TSP)
+ text, changes = re.subn('<noinclude>(%s)</noinclude>' % replaceToPerform, '', text)
+ if changes == 0:
+ text, changes = re.subn('(%s)' % replaceToPerform, '', text)
+ wikipedia.output(u'The page is editable for all, deleting the template...')
+
+ elif editRestr[0] == 'sysop':
+ # total edit protection
+ if (TemplateInThePage[0] == 'sysop-total' and TTP != None) or (TemplateInThePage[0] == 'unique' and TU != None):
+ msg = 'The page is protected to the sysop'
+ if not moveBlockCheck:
+ msg += ', skipping...'
+ wikipedia.output(msg)
+ else:
+ wikipedia.output(u'The page is protected to the sysop, but the template seems not correct. Fixing...')
+ if TU != None:
+ text, changes = re.subn(TemplateInThePage[1], TNR[4], text)
+ else:
+ text, changes = re.subn(TemplateInThePage[1], TNR[1], text)
+
+ elif TSP != None or TU != None:
+ # implicitely editRestr[0] = 'autoconfirmed', edit-Semi-protection
+ if TemplateInThePage[0] == 'autoconfirmed-total' or TemplateInThePage[0] == 'unique':
+ msg = 'The page is editable only for the autoconfirmed users'
+ if not moveBlockCheck:
+ msg += ', skipping...'
+ wikipedia.output(msg)
+ else:
+ wikipedia.output(u'The page is editable only for the autoconfirmed users, but the template seems not correct. Fixing...')
+ if TU != None:
+ text, changes = re.subn(TemplateInThePage[1], TNR[4], text)
+ else:
+ text, changes = re.subn(TemplateInThePage[1], TNR[0], text)
+
+ if changes == 0:
+ # We tried to fix edit-protection templates, but it did not work.
+ wikipedia.output('Warning : No edit-protection template could be found')
+
+ if moveBlockCheck:
+ # checking move protection now
+ moveRestr = restrictions['move']
+ changes = -1
+
+ if not moveRestr:
+ wikipedia.output(u'The page is movable for all, deleting the template...')
+ # Deleting the template because the page doesn't need it.
+ if TU != None:
+ replaceToPerform = u'|'.join(TSMP + TTMP + TU)
+ else:
+ replaceToPerform = u'|'.join(TSMP + TTMP)
+ text, changes = re.subn('<noinclude>(%s)</noinclude>' % replaceToPerform, '', text)
+ if changes == 0:
+ text, changes = re.subn('(%s)' % replaceToPerform, '', text)
+ elif moveRestr[0] == 'sysop':
+ # move-total-protection
+ if (TemplateInThePage[0] == 'sysop-move' and TTMP != None) or (TemplateInThePage[0] == 'unique' and TU != None):
+ wikipedia.output(u'The page is protected from moving to the sysop, skipping...')
+ else:
+ wikipedia.output(u'The page is protected from moving to the sysop, but the template seems not correct. Fixing...')
+ if TU != None:
+ text, changes = re.subn(TemplateInThePage[1], TNR[4], text)
+ else:
+ text, changes = re.subn(TemplateInThePage[1], TNR[3], text)
+
+ elif TSMP != None or TU != None:
+ # implicitely moveRestr[0] = 'autoconfirmed', move-semi-protection
+ if TemplateInThePage[0] == 'autoconfirmed-move' or TemplateInThePage[0] == 'unique':
+ wikipedia.output(u'The page is movable only for the autoconfirmed users, skipping...')
+ else:
+ wikipedia.output(u'The page is movable only for the autoconfirmed users, but the template seems not correct. Fixing...')
+ if TU != None:
+ text, changes = re.subn(TemplateInThePage[1], TNR[4], text)
+ else:
+ text, changes = re.subn(TemplateInThePage[1], TNR[2], text)
+
+ if changes == 0:
+ # We tried to fix move-protection templates, but it did not work.
+ wikipedia.output('Warning : No move-protection template could be found')
+
+
+ if oldtext != text:
+ # Ok, asking if the change has to be performed and do it if yes.
+ wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
+ wikipedia.showDiff(oldtext, text)
+ if not always:
+ choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
+ if choice == 'a':
+ always = True
+ if always or choice == 'y':
+ while 1:
+ try:
+ page.put(text, commentUsed, force=True)
+ except wikipedia.EditConflict:
+ wikipedia.output(u'Edit conflict! skip!')
+ break
+ except wikipedia.ServerError:
+ # Sometimes there is this error that's quite annoying because
+ # can block the whole process for nothing.
+ errorCount += 1
+ if errorCount < 5:
+ wikipedia.output(u'Server Error! Wait..')
+ time.sleep(3)
+ continue
+ else:
+ # Prevent Infinite Loops
+ raise wikipedia.ServerError(u'Fifth Server Error!')
+ except wikipedia.SpamfilterError, e:
+ wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url))
+ break
+ except wikipedia.PageNotSaved, error:
+ wikipedia.output(u'Error putting page: %s' % (error.args,))
+ break
+ except wikipedia.LockedPage:
+ wikipedia.output(u'The page is still protected. Skipping...')
+ break
+ else:
+ # Break only if the errors are one after the other
+ errorCount = 0
+ break
+
+if __name__ == "__main__":
+ try:
+ main()
+ finally:
+ wikipedia.stopme()