jenkins-bot has submitted this change and it was merged.
Change subject: [FEAT] Scripts: Use more intelligent generators
......................................................................
[FEAT] Scripts: Use more intelligent generators
Instead of using the pagegenerators as often as possible, this is only
using them, when they provide additional funtionality. Otherwise it's
using the normal methods of a page. This removes all usages of the
NamespaceFilterPageGenerator in the scripts which does only filter out
invalid pages but still requests them.
Change-Id: Iddc36b040ff010467559ea8fd7523056a511cb6f
---
M pywikibot/pagegenerators.py
M scripts/add_text.py
M scripts/checkimages.py
M scripts/commonscat.py
M scripts/disambredir.py
M scripts/fixing_redirects.py
M scripts/nowcommons.py
M scripts/solve_disambiguation.py
M scripts/spamremove.py
M scripts/templatecount.py
M scripts/unlink.py
M scripts/weblinkchecker.py
12 files changed, 33 insertions(+), 80 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index 792e092..601f809 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -1010,7 +1010,8 @@
def CategorizedPageGenerator(category, recurse=False, start=None,
- step=None, total=None, content=False):
+ step=None, total=None, content=False,
+ namespaces=None):
"""Yield all pages in a specific category.
If recurse is True, pages in subcategories are included as well; if
@@ -1026,7 +1027,7 @@
"""
kwargs = dict(recurse=recurse, step=step, total=total,
- content=content)
+ content=content, namespaces=namespaces)
if start:
kwargs['sortby'] = 'sortkey'
kwargs['startsort'] = start
@@ -1473,7 +1474,7 @@
ImageGenerator = FileGenerator
-def PageWithTalkPageGenerator(generator):
+def PageWithTalkPageGenerator(generator, return_talk_only=False):
"""Yield pages and associated talk pages from another generator.
Only yields talk pages if the original generator yields a non-talk page,
@@ -1481,7 +1482,8 @@
"""
for page in generator:
- yield page
+ if not return_talk_only or page.isTalkPage():
+ yield page
if not page.isTalkPage():
yield page.toggleTalkPage()
diff --git a/scripts/add_text.py b/scripts/add_text.py
index 3bfc2b9..ae21071 100644
--- a/scripts/add_text.py
+++ b/scripts/add_text.py
@@ -288,7 +288,6 @@
textfile = None
talkPage = False
reorderEnabled = True
- namespaces = []
# Put the text above or below the text?
up = False
@@ -346,14 +345,7 @@
pywikibot.error("The text to add wasn't given.")
return
if talkPage:
- generator = pagegenerators.PageWithTalkPageGenerator(generator)
- site = pywikibot.Site()
- for namespace in site.namespaces():
- index = site.getNamespaceIndex(namespace)
- if index % 2 == 1 and index > 0:
- namespaces += [index]
- generator = pagegenerators.NamespaceFilterPageGenerator(
- generator, namespaces, site)
+ generator = pagegenerators.PageWithTalkPageGenerator(generator, True)
for page in generator:
(text, newtext, always) = add_text(page, addText, summary, regexSkip,
regexSkipUrl, always, up, True,
diff --git a/scripts/checkimages.py b/scripts/checkimages.py
index 315c606..19f07e6 100644
--- a/scripts/checkimages.py
+++ b/scripts/checkimages.py
@@ -1770,7 +1770,7 @@
firstPageTitle = arg[7:]
firstPageTitle = firstPageTitle.split(":")[1:]
generator = pywikibot.Site().allpages(start=firstPageTitle,
- namespace=6)
+ namespace=6)
repeat = False
elif arg.startswith('-page'):
if len(arg) == 5:
@@ -1804,7 +1804,7 @@
catName = str(arg[5:])
catSelected = pywikibot.Category(pywikibot.Site(),
'Category:%s' % catName)
- generator = pg.CategorizedPageGenerator(catSelected)
+ generator = catSelected.articles(namespaces=[6])
repeat = False
elif arg.startswith('-ref'):
if len(arg) == 4:
@@ -1812,8 +1812,8 @@
u'The references of what page should I parse?'))
elif len(arg) > 4:
refName = str(arg[5:])
- generator = pg.ReferringPageGenerator(
- pywikibot.Page(pywikibot.Site(), refName))
+ ref = pywikibot.Page(pywikibot.Site(), refName)
+ generator = ref.getReferences(namespaces=[6])
repeat = False
if not generator:
@@ -1862,7 +1862,6 @@
Bot.takesettings()
if waitTime:
generator = Bot.wait(waitTime, generator, normal, limit)
- generator = pg.NamespaceFilterPageGenerator(generator, 6, site)
for image in generator:
# Setting the image for the main class
Bot.setParameters(image.title(withNamespace=False))
diff --git a/scripts/commonscat.py b/scripts/commonscat.py
index cc9fb19..6df29f7 100755
--- a/scripts/commonscat.py
+++ b/scripts/commonscat.py
@@ -500,10 +500,7 @@
@type args: list of unicode
"""
options = {}
- generator = None
checkcurrent = False
- ns = []
- ns.append(14)
# Process global args and prepare generator args parser
local_args = pywikibot.handle_args(args)
@@ -527,14 +524,10 @@
primaryCommonscat, commonscatAlternatives = \
CommonscatBot.getCommonscatTemplate(
site.code)
- generator = pagegenerators.NamespaceFilterPageGenerator(
- pagegenerators.ReferringPageGenerator(
- pywikibot.Page(site, u'Template:' + primaryCommonscat),
- onlyTemplateInclusion=True),
- ns,
- site)
-
- if not generator:
+ template_page = pywikibot.Page(site, u'Template:' + primaryCommonscat)
+ generator = template_page.getReferences(namespaces=14,
+ onlyTemplateInclusion=True)
+ else:
generator = genFactory.getCombinedGenerator()
if generator:
diff --git a/scripts/disambredir.py b/scripts/disambredir.py
index 4f3cdcf..8739f3a 100644
--- a/scripts/disambredir.py
+++ b/scripts/disambredir.py
@@ -156,7 +156,6 @@
"""
local_args = pywikibot.handle_args(args)
- generator = None
start = local_args[0] if local_args else '!'
mysite = pywikibot.Site()
@@ -164,17 +163,13 @@
mysite.disambcategory()
except pywikibot.Error as e:
pywikibot.output(e)
- else:
- generator = pagegenerators.CategorizedPageGenerator(
- mysite.disambcategory(), start=start)
-
- if not generator:
pywikibot.showHelp()
return
+ generator = pagegenerators.CategorizedPageGenerator(
+ mysite.disambcategory(), start=start, content=True, namespaces=[0])
+
# only work on articles
- generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0])
- generator = pagegenerators.PreloadingGenerator(generator)
pagestodo = []
pagestoload = []
for page in generator:
diff --git a/scripts/fixing_redirects.py b/scripts/fixing_redirects.py
index 10de73d..180163e 100644
--- a/scripts/fixing_redirects.py
+++ b/scripts/fixing_redirects.py
@@ -217,8 +217,7 @@
if featured:
featuredList = i18n.translate(mysite, featured_articles)
ref = pywikibot.Page(pywikibot.Site(), featuredList)
- gen = pagegenerators.ReferringPageGenerator(ref)
- gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0])
+ gen = ref.getReferences(namespaces=[0])
if not gen:
gen = genFactory.getCombinedGenerator()
if gen:
diff --git a/scripts/nowcommons.py b/scripts/nowcommons.py
index ebdf184..3125fdb 100644
--- a/scripts/nowcommons.py
+++ b/scripts/nowcommons.py
@@ -271,11 +271,10 @@
nowCommonsTemplates = [pywikibot.Page(self.site, title,
ns=10)
for title in self.ncTemplates()]
- gens = [pg.ReferringPageGenerator(t, followRedirects=True,
- onlyTemplateInclusion=True)
+ gens = [t.getReferences(followRedirects=True, namespaces=[6],
+ onlyTemplateInclusion=True)
for t in nowCommonsTemplates]
gen = pg.CombinedPageGenerator(gens)
- gen = pg.NamespaceFilterPageGenerator(gen, [6])
gen = pg.DuplicateFilterPageGenerator(gen)
gen = pg.PreloadingGenerator(gen)
return gen
diff --git a/scripts/solve_disambiguation.py b/scripts/solve_disambiguation.py
index 83d1050..e5c07e2 100644
--- a/scripts/solve_disambiguation.py
+++ b/scripts/solve_disambiguation.py
@@ -1090,15 +1090,9 @@
minimum = int(arg[5:])
elif arg.startswith('-start'):
try:
- if len(arg) <= len('-start:'):
- generator = pagegenerators.CategorizedPageGenerator(
- pywikibot.Site().disambcategory())
- else:
- generator = pagegenerators.CategorizedPageGenerator(
- pywikibot.Site().disambcategory(),
- start=arg[7:])
- generator = pagegenerators.NamespaceFilterPageGenerator(
- generator, [0])
+ generator = pagegenerators.CategorizedPageGenerator(
+ pywikibot.Site().disambcategory(),
+ start=arg[7:], namespaces=[0])
except pywikibot.NoPage:
pywikibot.output("Disambiguation category for your wiki is not
known.")
raise
diff --git a/scripts/spamremove.py b/scripts/spamremove.py
index 92f70a2..0f8303e 100755
--- a/scripts/spamremove.py
+++ b/scripts/spamremove.py
@@ -34,7 +34,7 @@
#
import pywikibot
-from pywikibot import pagegenerators, i18n
+from pywikibot import i18n
from pywikibot.editor import TextEditor
@@ -67,10 +67,7 @@
return
mysite = pywikibot.Site()
- pages = mysite.exturlusage(spamSite)
- if namespaces:
- pages = pagegenerators.NamespaceFilterPageGenerator(pages, namespaces)
- pages = pagegenerators.PreloadingGenerator(pages)
+ pages = mysite.exturlusage(spamSite, namespaces=namespaces, content=True)
summary = i18n.twtranslate(mysite, 'spamremove-remove',
{'url': spamSite})
diff --git a/scripts/templatecount.py b/scripts/templatecount.py
index 036b012..8e36b94 100644
--- a/scripts/templatecount.py
+++ b/scripts/templatecount.py
@@ -40,7 +40,6 @@
import datetime
import pywikibot
-from pywikibot import pagegenerators
templates = ['ref', 'note', 'ref label', 'note label',
'reflist']
@@ -100,12 +99,8 @@
mytpl = mysite.ns_index(mysite.template_namespace())
for template in templates:
transcludingArray = []
- gen = pagegenerators.ReferringPageGenerator(
- pywikibot.Page(mysite, template, ns=mytpl),
- onlyTemplateInclusion=True)
- if namespaces:
- gen = pagegenerators.NamespaceFilterPageGenerator(gen,
- namespaces)
+ gen = pywikibot.Page(mysite, template, ns=mytpl).getReferences(
+ namespaces=namespaces, onlyTemplateInclusion=True)
for page in gen:
transcludingArray.append(page)
yield template, transcludingArray
diff --git a/scripts/unlink.py b/scripts/unlink.py
index ea24f7c..de05fdd 100755
--- a/scripts/unlink.py
+++ b/scripts/unlink.py
@@ -31,7 +31,7 @@
import re
import pywikibot
from pywikibot.editor import TextEditor
-from pywikibot import pagegenerators, i18n, Bot
+from pywikibot import i18n, Bot
class UnlinkBot(Bot):
@@ -49,10 +49,8 @@
self.pageToUnlink = pageToUnlink
linktrail = self.pageToUnlink.site.linktrail()
- gen = pagegenerators.ReferringPageGenerator(pageToUnlink)
- if self.getOption('namespaces') != []:
- gen = pagegenerators.NamespaceFilterPageGenerator(gen,
self.getOption('namespaces'))
- self.generator = pagegenerators.PreloadingGenerator(gen)
+ self.generator = pageToUnlink.getReferences(
+ namespaces=self.getOption('namespaces'), content=True)
# The regular expression which finds links. Results consist of four
# groups:
#
diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py
index a94bb79..ba33030 100644
--- a/scripts/weblinkchecker.py
+++ b/scripts/weblinkchecker.py
@@ -214,7 +214,7 @@
continue
self.skipping = False
page = pywikibot.Page(self.site, entry.title)
- if not self.namespaces == []:
+ if self.namespaces:
if page.namespace() not in self.namespaces:
continue
found = False
@@ -852,9 +852,6 @@
"""
gen = None
xmlFilename = None
- # Which namespaces should be processed?
- # default to [] which means all namespaces will be processed
- namespaces = []
HTTPignore = []
day = 7
@@ -867,11 +864,6 @@
config.report_dead_links_on_talk = True
elif arg == '-notalk':
config.report_dead_links_on_talk = False
- elif arg.startswith('-namespace:'):
- try:
- namespaces.append(int(arg[11:]))
- except ValueError:
- namespaces.append(arg[11:])
elif arg == '-repeat':
gen = RepeatPageGenerator()
elif arg.startswith('-ignore:'):
@@ -897,13 +889,11 @@
xmlStart
except NameError:
xmlStart = None
- gen = XmlDumpPageGenerator(xmlFilename, xmlStart, namespaces)
+ gen = XmlDumpPageGenerator(xmlFilename, xmlStart, genFactory.namespaces)
if not gen:
gen = genFactory.getCombinedGenerator()
if gen:
- if namespaces != []:
- gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
# fetch at least 240 pages simultaneously from the wiki, but more if
# a high thread number is set.
pageNumber = max(240, config.max_external_links * 2)
--
To view, visit
https://gerrit.wikimedia.org/r/185982
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Iddc36b040ff010467559ea8fd7523056a511cb6f
Gerrit-PatchSet: 5
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>