jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/638017 )
Change subject: [bugfix] Enable APISite.exturlusage() with default parameters
......................................................................
[bugfix] Enable APISite.exturlusage() with default parameters
- url parameter of APISite.exturlusage() may be None and not
a sequence by default.
- use rpartition to retrieve protocol and url
Bug: T266989
Change-Id: I392fbbbdb5a41c6e435f41ba9673685c0b057c68
---
M pywikibot/site/__init__.py
1 file changed, 15 insertions(+), 13 deletions(-)
Approvals:
Mpaa: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/site/__init__.py b/pywikibot/site/__init__.py
index f4408a1..0c28349 100644
--- a/pywikibot/site/__init__.py
+++ b/pywikibot/site/__init__.py
@@ -3890,7 +3890,7 @@
@see: U{https://www.mediawiki.org/wiki/API:Exturlusage}
- @param url: The URL to search for (with ot without the protocol
+ @param url: The URL to search for (with or without the protocol
prefix); this may include a '*' as a wildcard, only at the start
of the hostname
@param namespaces: list of namespace numbers to fetch contribs from
@@ -3899,21 +3899,23 @@
@param protocol: Protocol to search for, likely http or https, http by
default. Full list shown on Special:LinkSearch wikipage
"""
- separator = '://'
- if separator in url:
- found_protocol = url[:url.index(separator)]
- url = url[url.index(separator) + len(separator):]
- if protocol and protocol != found_protocol:
- raise ValueError('Protocol was specified, but a different one '
- 'was found in searched url')
- protocol = found_protocol
+ if url is not None:
+ found_protocol, _, url = url.rpartition('://')
+
+ # If url is * we make it None in order to search for every page
+ # with any URL.
+ if url == '*':
+ url = None
+
+ if found_protocol:
+ if protocol and protocol != found_protocol:
+ raise ValueError('Protocol was specified, but a different '
+ 'one was found in searched url')
+ protocol = found_protocol
+
if not protocol:
protocol = 'http'
- # If url is * we make it None in order to search for every page
- # with any URL.
- if url == '*':
- url = None
return self._generator(api.PageGenerator, type_arg='exturlusage',
geuquery=url, geuprotocol=protocol,
namespaces=namespaces,
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/638017
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I392fbbbdb5a41c6e435f41ba9673685c0b057c68
Gerrit-Change-Number: 638017
Gerrit-PatchSet: 1
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/458792 )
Change subject: [IMPR] Improvements fo MakeCatBot.include()
......................................................................
[IMPR] Improvements fo MakeCatBot.include()
- set sortKey directly in workingcat
- improved link checking in checklinks() which uses chained generators
and print progress dots for each page
- introduce print_dot method for that
- simplify changing category by using change_category method
- "checked" container becomes a set
- removes checkforward which is always True
6th step detached from Id7ca3461d for easier reviewing
Change-Id: I32238bdb412783be2e84c04be7c09bd2c652b51f
---
M scripts/makecat.py
1 file changed, 50 insertions(+), 47 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/makecat.py b/scripts/makecat.py
index 7d72ca5..76c31bf 100755
--- a/scripts/makecat.py
+++ b/scripts/makecat.py
@@ -50,6 +50,7 @@
# Distributed under the terms of the MIT license.
#
import codecs
+from itertools import chain
from textwrap import fill
import pywikibot
@@ -77,7 +78,6 @@
})
super().__init__(**kwargs)
self.skipdates = self.opt.nodate
- self.checkforward = True
self.checkbackward = not self.opt.forward
self.checkbroken = not (self.opt.forward
and self.opt.exist)
@@ -107,6 +107,12 @@
color_format('\n>>> {lightpurple}{0}{default} <<<',
page.title()))
+ @staticmethod
+ def print_dot(condition=True):
+ """Print a single dot if conditon is True."""
+ if condition:
+ pywikibot.output('.', newline=False)
+
def needcheck(self, page):
"""Verify whether the current page may be processed."""
global checked
@@ -114,53 +120,50 @@
or page in checked
or self.skipdates and page.autoFormat()[0] is not None)
- def change_category(self, page, catlist):
+ def change_category(self, page, categories, summary):
"""Change the category of page."""
- pass
+ global workingcat, parentcats
+ for category in categories:
+ if self.removeparent and category in parentcats:
+ page.change_category(workingcat, summary=summary)
+ return True
+ return False
- def include(self, pl, checklinks=True, realinclude=True, linkterm=None,
+ def include(self, page, checklinks=True, realinclude=True, linkterm=None,
summary=''):
"""Include the current page to the working category."""
global workingcat, parentcats
- global checked, tocheck
- cl = checklinks
- mysite = self.site
+ actualworkingcat = workingcat
if linkterm:
- actualworkingcat = pywikibot.Category(mysite, workingcat.title(),
- sort_key=linkterm)
- else:
- actualworkingcat = workingcat
- if realinclude:
- try:
- text = pl.get()
- except pywikibot.NoPage:
- pass
- except pywikibot.IsRedirectPage:
- cl = True
+ actualworkingcat.sortKey = linkterm
+ if realinclude and page.exists():
+ if page.isRedirectPage():
+ checklinks = True
else:
- cats = list(pl.categories())
- if workingcat not in cats:
- for c in cats:
- if c in parentcats:
- if self.removeparent:
- pl.change_category(actualworkingcat,
- summary=summary)
- break
- else:
- pl.put(textlib.replaceCategoryLinks(
- text, cats + [actualworkingcat], site=pl.site),
- summary=summary)
- if cl:
- if self.checkforward:
- for page2 in pl.linkedPages():
- if self.needcheck(page2):
- tocheck.append(page2)
- checked[page2] = page2
- if self.checkbackward:
- for ref_page in pl.getReferences():
- if self.needcheck(ref_page):
- tocheck.append(ref_page)
- checked[ref_page] = ref_page
+ cats = list(page.categories())
+ if workingcat not in cats \
+ and not self.change_category(page, cats, summary):
+ newtext = textlib.replaceCategoryLinks(
+ page.text, cats + [actualworkingcat],
+ site=page.site)
+ page.put(newtext, summary=summary)
+
+ if checklinks:
+ self.checklinks(page)
+
+ def checklinks(self, page):
+ """Check whether the page has to be added to the tocheck deque."""
+ global checked, tocheck
+ pywikibot.output('\nChecking links for "{}"...'
+ .format(page.title()), newline=False)
+ generators = [page.linkedPages()]
+ if self.checkbackward:
+ generators.append(page.getReferences())
+ for i, linked_page in enumerate(chain(*generators)):
+ self.print_dot(not i % 25)
+ if self.needcheck(linked_page):
+ tocheck.append(linked_page)
+ checked.add(linked_page)
def skip_page(self, page):
"""Check whether the page is to be skipped."""
@@ -177,7 +180,7 @@
pl2 = pl.getRedirectTarget()
if self.needcheck(pl2):
tocheck.append(pl2)
- checked[pl2] = pl2
+ checked.add(pl2)
return
ctoshow = 500
pywikibot.output('')
@@ -225,7 +228,7 @@
elif answer == 'o':
pagetitle = pywikibot.input('Specify page to add:')
page = pywikibot.Page(pywikibot.Site(), pagetitle)
- if page not in checked.keys():
+ if page not in checked:
self.include(page, summary=summary)
elif answer == 's':
if not pl.exists():
@@ -268,7 +271,7 @@
global checked, tocheck
global excludefile
- checked = {}
+ checked = set()
tocheck = DequeGenerator()
workingcatname = ''
@@ -311,7 +314,7 @@
if not line:
continue
pl = pywikibot.Page(mysite, line)
- checked[pl] = pl
+ checked.add(pl)
excludefile = codecs.open(filename, 'a', encoding=mysite.encoding())
except IOError:
@@ -331,7 +334,7 @@
for cat in subcatlist:
artlist = list(cat.articles())
for page in artlist:
- checked[page] = page
+ checked.add(page)
# Fetch articles in category, and mark as already checked (seen)
# If category is empty, ask user if they want to look for pages
@@ -349,7 +352,7 @@
articles = [pl]
for pl in articles:
- checked[pl] = pl
+ checked.add(pl)
bot.include(pl, summary=summary)
gen = pagegenerators.DequePreloadingGenerator(tocheck)
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/458792
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I32238bdb412783be2e84c04be7c09bd2c652b51f
Gerrit-Change-Number: 458792
Gerrit-PatchSet: 5
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: Dalba <dalba.wiki(a)gmail.com>
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/458774 )
Change subject: [IMPR] Use input_choice with makecat.py
......................................................................
[IMPR] Use input_choice with makecat.py
- additional choose options [e]xtend, [r]educe, [h]elp, [q]uit
initialized by _setup_menubar() helper classmethod
- static method highlight_title for title highlighting
- improvements for [l]ist and [m]ore options
5th step detached from Id7ca3461d for easier reviewing
Change-Id: Iffb6e7a150bb49c5572c90ea1124722be37518a1
---
M scripts/makecat.py
1 file changed, 55 insertions(+), 14 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/makecat.py b/scripts/makecat.py
index e4f59af..7d72ca5 100755
--- a/scripts/makecat.py
+++ b/scripts/makecat.py
@@ -25,13 +25,16 @@
-all Work on all pages (default: only main namespace)
When running the bot, you will get one by one a number by pages.
-You can choose
+You can choose with small menu bar:
* [y]es - include the page
* [n]o - do not include the page or
* [i]gnore - do not include the page, but if you meet it again, ask again.
+* [e]xtend - extend menu bar
+* [h]elp - show options to be choosed
+* [q]uit - leave the bot
-Other possibilities
+Other possibilities with extended menu bar:
* [m]ore - show more content of the page starting from the beginning
* sort [k]ey - add with sort key like [[Category|Title]]
@@ -39,6 +42,7 @@
* [c]heck - check links to and from the page, but do not add the page itself
* [o]ther - add another page, which may have been included before
* [l]ist - show current list of pages to include or to check
+* [r]educe - reduce menu bar
"""
# (C) Pywikibot team, 2004-2020
@@ -46,6 +50,7 @@
# Distributed under the terms of the MIT license.
#
import codecs
+from textwrap import fill
import pywikibot
@@ -53,6 +58,7 @@
from pywikibot import pagegenerators, i18n, textlib
from pywikibot.tools import DequeGenerator
+from pywikibot.tools.formatter import color_format
class MakeCatBot(SingleSiteBot, NoRedirectPageBot):
@@ -63,6 +69,7 @@
"""Initializer."""
self.available_options.update({
'all': False,
+ 'catnames': None,
'exist': False,
'forward': False,
'keepparent': False,
@@ -77,6 +84,29 @@
self.removeparent = not self.opt.keepparent
self.main = not self.opt.all
+ self.workingcatname = self.getOption('catnames')
+ self._setup_menubar()
+
+ @classmethod
+ def _setup_menubar(cls):
+ """Setup treat_page option bar."""
+ small = [
+ ('yes', 'y'), ('no', 'n'), ('ignore', 'i'),
+ ('extend', 'e'), ('help', 'h')]
+ extended = small[:3] + [
+ ('more', 'm'), ('sort key', 'k'), ('skip', 's'), ('check', 'c'),
+ ('other', 'o'), ('list', 'l'), ('reduce', 'r'), ('help', 'h')]
+ cls.option_bar = {'e': extended, 'r': small}
+ cls.treat_options = cls.option_bar['r']
+
+ @staticmethod
+ def highlight_title(page, condition=True):
+ """Highlight a page title if conditon is True."""
+ if condition:
+ pywikibot.output(
+ color_format('\n>>> {lightpurple}{0}{default} <<<',
+ page.title()))
+
def needcheck(self, page):
"""Verify whether the current page may be processed."""
global checked
@@ -153,7 +183,9 @@
pywikibot.output('')
pywikibot.output('== {} =='.format(pl.title()))
while True:
- answer = pywikibot.input('[y]es/[n]o/[i]gnore/[h]elp for options?')
+ answer = pywikibot.input_choice(
+ 'Add to category {}?'.format(self.workingcatname),
+ self.treat_options, default='i')
if answer == 'y':
self.include(pl, summary=summary)
break
@@ -173,14 +205,22 @@
break
elif answer == 'i':
break
+ if answer in 'er':
+ self.treat_options = self.option_bar[answer]
elif answer == 'h':
pywikibot.output("""
+[y]es: Add the page and check links')
+[n]o: Never add the page, saved to exclusion list
+[i]gnore: Neither do not add the page not check links
[m]ore: show more content of the page starting from the beginning
sort [k]ey: Add with sort key like [[Category|Title]]
[s]kip: Add the page, but skip checking links
[c]heck: Do not add the page, but do check links
[o]ther: Add another page
[l]ist: Show a list of the pages to check
+[e]xtend: A more extended option list
+[r]educe: Reduce option list
+[q]uit: Save exclusion list and exit this script
""")
elif answer == 'o':
pagetitle = pywikibot.input('Specify page to add:')
@@ -198,20 +238,21 @@
self.include(pl, checklinks=False, summary=summary)
break
elif answer == 'l':
+ length = len(tocheck)
pywikibot.output('Number of pages still to check: {}'
- .format(len(tocheck)))
- pywikibot.output('Pages to be checked:')
- pywikibot.output(' - '.join(page.title() for page in tocheck))
- pywikibot.output('== {} =='.format(pl.title()))
+ .format(length))
+ if length:
+ pywikibot.output('Pages to be checked:')
+ pywikibot.output(
+ fill(' - '.join(page.title() for page in tocheck)))
+ self.highlight_title(page)
elif answer == 'm':
- pywikibot.output('== {} =='.format(pl.title()))
- try:
- pywikibot.output('' + pl.get(get_redirect=True)[0:ctoshow])
- except pywikibot.NoPage:
+ self.highlight_title(pl, ctoshow > 500)
+ if pl.exists():
+ pywikibot.output(pl.text[0:ctoshow])
+ else:
pywikibot.output('Page does not exist.')
ctoshow += 500
- else:
- pywikibot.output('Not understood.')
def main(*args):
@@ -238,7 +279,7 @@
option = arg[1:]
if not arg.startswith('-'):
if not workingcatname:
- workingcatname = arg
+ options['catnames'] = workingcatname = arg
else:
pywikibot.warning('Working category "{}" is already given.'
.format(workingcatname))
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/458774
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Iffb6e7a150bb49c5572c90ea1124722be37518a1
Gerrit-Change-Number: 458774
Gerrit-PatchSet: 7
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: Dalba <dalba.wiki(a)gmail.com>
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/640707 )
Change subject: [IMPR] reduce code complexity of pagegenerators.py
......................................................................
[IMPR] reduce code complexity of pagegenerators.py
There is a small incompatibility:
- AssertionError is raised instead of ValueError but it should
cause no harm as it is difficult that someone is currently
catching it.
Change-Id: I5d068a3a79861cbc358b32669602b490687ab389
---
M pywikibot/pagegenerators.py
M tests/pagegenerators_tests.py
2 files changed, 12 insertions(+), 11 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index 5d1c164..e92f6a1 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -703,19 +703,22 @@
cats = self.site.siteinfo.get('linter') # Get linter categories.
valid_cats = [c for _list in cats.values() for c in _list]
- value = '' if value is None else value
+ value = value or ''
cat, _, lint_from = value.partition('/')
- if not lint_from:
- lint_from = None
+ lint_from = lint_from or None
- if cat == 'show': # Display categories of lint errors.
+ def show_available_categories(cats):
_i = ' ' * 4
+ _2i = 2 * _i
txt = 'Available categories of lint errors:\n'
for prio, _list in cats.items():
txt += '{indent}{prio}\n'.format(indent=_i, prio=prio)
- for c in _list:
- txt += '{indent}{cat}\n'.format(indent=2 * _i, cat=c)
+ txt += ''.join(
+ '{indent}{cat}\n'.format(indent=_2i, cat=c) for c in _list)
pywikibot.output('%s' % txt)
+
+ if cat == 'show': # Display categories of lint errors.
+ show_available_categories(cats)
sys.exit(0)
if not cat:
@@ -724,10 +727,8 @@
lint_cats = cats[cat]
else:
lint_cats = cat.split(',')
- for lint_cat in lint_cats:
- if lint_cat not in valid_cats:
- raise ValueError('Invalid category of lint errors: %s'
- % cat)
+ assert set(lint_cats) <= set(valid_cats), \
+ 'Invalid category of lint errors: %s' % cat
return self.site.linter_pages(
lint_categories='|'.join(lint_cats), namespaces=self.namespaces,
diff --git a/tests/pagegenerators_tests.py b/tests/pagegenerators_tests.py
index 41302eb..1762769 100755
--- a/tests/pagegenerators_tests.py
+++ b/tests/pagegenerators_tests.py
@@ -1247,7 +1247,7 @@
self.skipTest('The site {0} does not use Linter extension'
.format(self.site))
gf = pagegenerators.GeneratorFactory(site=self.site)
- self.assertRaises(ValueError, gf.handleArg, '-linter:dummy')
+ self.assertRaises(AssertionError, gf.handleArg, '-linter:dummy')
def test_linter_generator_show(self):
"""Test generator of pages with lint errors."""
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/640707
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I5d068a3a79861cbc358b32669602b490687ab389
Gerrit-Change-Number: 640707
Gerrit-PatchSet: 2
Gerrit-Owner: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged