jenkins-bot has submitted this change and it was merged.
Change subject: Use sitename property
......................................................................
Use sitename property
sitename was changed to property in ce164aa7
For compatibility purpose sitename() can be called
but using the property should be preferred.
Change-Id: I9c36512f7796a160ba40a30ccb62e665fde8c0de
---
M pywikibot/cosmetic_changes.py
M pywikibot/textlib.py
M scripts/blockreview.py
M scripts/fixing_redirects.py
M scripts/imagetransfer.py
M scripts/interwiki.py
M scripts/noreferences.py
7 files changed, 17 insertions(+), 16 deletions(-)
Approvals:
XZise: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py
index c21868f..1064595 100755
--- a/pywikibot/cosmetic_changes.py
+++ b/pywikibot/cosmetic_changes.py
@@ -415,7 +415,7 @@
def translateAndCapitalizeNamespaces(self, text):
"""Use localized namespace names."""
# arz uses english stylish codes
- if self.site.sitename() == 'wikipedia:arz':
+ if self.site.sitename == 'wikipedia:arz':
return text
family = self.site.family
# wiki links aren't parsed here.
@@ -565,7 +565,7 @@
# TODO: Add a configuration variable for each site,
# which determines if the link target is written in
# uppercase
- if self.site.sitename() == 'wikipedia:de':
+ if self.site.sitename == 'wikipedia:de':
titleWithSection = first_upper(titleWithSection)
newLink = "[[%s|%s]]" % (titleWithSection, label)
# re-add spaces that were pulled out of the link.
@@ -898,7 +898,7 @@
[1]: https://commons.wikimedia.org/wiki/Commons:Tools/pywiki_file_description_cl…
"""
- if self.site.sitename() != u'commons:commons' or self.namespace == 6:
+ if self.site.sitename != 'commons:commons' or self.namespace == 6:
return
# section headers to {{int:}} versions
exceptions = ['comment', 'includeonly', 'math', 'noinclude', 'nowiki',
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index f56fa9b..77454fc 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -1118,7 +1118,7 @@
marker = findmarker(oldtext)
if site is None:
site = pywikibot.Site()
- if site.sitename() == 'wikipedia:de' and "{{Personendaten" in oldtext:
+ if site.sitename == 'wikipedia:de' and '{{Personendaten' in oldtext:
raise pywikibot.Error(
'The Pywikibot is no longer allowed to touch categories on the '
'German\nWikipedia on pages that contain the Personendaten '
diff --git a/scripts/blockreview.py b/scripts/blockreview.py
index 62ec90b..08ec0ff 100755
--- a/scripts/blockreview.py
+++ b/scripts/blockreview.py
@@ -130,7 +130,7 @@
# Notify the blocking admin
if templates[1] == [] or templates[1][0] == u'1':
if self.info['action'] == 'block' or user.isBlocked():
- if self.site.sitename() == 'wikipedia:de':
+ if self.site.sitename == 'wikipedia:de':
admin = pywikibot.User(self.site, self.info['user'])
adminPage = admin.getUserTalkPage()
adminText = adminPage.get()
@@ -144,7 +144,7 @@
self.save(adminText, adminPage, comment, False)
# test for pt-wiki
# just print all sysops talk pages
- elif self.site.sitename() == 'wikipedia:pt':
+ elif self.site.sitename == 'wikipedia:pt':
from pywikibot import pagegenerators as pg
gen = pg.PreloadingGenerator(self.SysopGenerator())
for sysop in gen:
diff --git a/scripts/fixing_redirects.py b/scripts/fixing_redirects.py
index f689ec4..2cb94ec 100755
--- a/scripts/fixing_redirects.py
+++ b/scripts/fixing_redirects.py
@@ -127,7 +127,7 @@
genFactory.handleArg(arg)
mysite = pywikibot.Site()
- if mysite.sitename() == 'wikipedia:nl':
+ if mysite.sitename == 'wikipedia:nl':
pywikibot.output(
'\03{lightred}There is consensus on the Dutch Wikipedia that '
'bots should not be used to fix redirects.\03{default}')
diff --git a/scripts/imagetransfer.py b/scripts/imagetransfer.py
index 8f4329f..4dc25b7 100755
--- a/scripts/imagetransfer.py
+++ b/scripts/imagetransfer.py
@@ -178,9 +178,11 @@
try:
description = sourceImagePage.get()
# try to translate license templates
- if (sourceSite.sitename(), self.targetSite.sitename()) in licenseTemplates:
- for old, new in licenseTemplates[(sourceSite.sitename(),
- self.targetSite.sitename())].items():
+ if (sourceSite.sitename,
+ self.targetSite.sitename) in licenseTemplates:
+ for old, new in licenseTemplates[
+ (sourceSite.sitename,
+ self.targetSite.sitename)].items():
new = '{{%s}}' % new
old = re.compile('{{%s}}' % old)
description = textlib.replaceExcept(description, old, new,
diff --git a/scripts/interwiki.py b/scripts/interwiki.py
index fb05532..f6b4c5b 100755
--- a/scripts/interwiki.py
+++ b/scripts/interwiki.py
@@ -1722,7 +1722,7 @@
for (site, page) in new.items():
# edit restriction for some templates on zh-wiki where
# interlanguage keys are included by /doc subpage
- smallWikiAllowed = not (page.site.sitename() == 'wikipedia:zh' and
+ smallWikiAllowed = not (page.site.sitename == 'wikipedia:zh' and
page.namespace() == 10 and
u'Country data' in page.title(withNamespace=False))
# edit restriction on is-wiki
@@ -1733,8 +1733,8 @@
# or the last edit was 1 month ago
if (smallWikiAllowed and
globalvar.autonomous and
- (page.site.sitename() == 'wikipedia:is' or
- page.site.sitename() == 'wikipedia:zh' and
+ (page.site.sitename == 'wikipedia:is' or
+ page.site.sitename == 'wikipedia:zh' and
page.namespace() == 10
)):
old = {}
@@ -1772,7 +1772,7 @@
else:
pywikibot.output(
u'NOTE: number of edits are restricted at %s'
- % page.site.sitename()
+ % page.site.sitename
)
# if we have an account for this site
diff --git a/scripts/noreferences.py b/scripts/noreferences.py
index 709c1e8..df65e8c 100755
--- a/scripts/noreferences.py
+++ b/scripts/noreferences.py
@@ -671,8 +671,7 @@
pywikibot.output(u"Page %s is a disambig; skipping."
% page.title(asLink=True))
continue
- if self.site.sitename() == 'wikipedia:en' and \
- page.isIpEdit():
+ if self.site.sitename == 'wikipedia:en' and page.isIpEdit():
pywikibot.output(
u"Page %s is edited by IP. Possible vandalized"
% page.title(asLink=True))
--
To view, visit https://gerrit.wikimedia.org/r/227646
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I9c36512f7796a160ba40a30ccb62e665fde8c0de
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: Fix NESTED_TEMPLATE_REGEX
......................................................................
Fix NESTED_TEMPLATE_REGEX
Improve performance of NESTED_TEMPLATE_REGEX.
Also fixes bugs preventing the regex capturing templates:
* with names containing numbers.
* with values containing non-adjacent single brackets
* captures arbitary template levels safely
Bug: T63024
Bug: T105621
Change-Id: I61a92fb1b6d893de31fab738ab883af231917f4c
---
M pywikibot/textlib.py
M tests/textlib_tests.py
2 files changed, 72 insertions(+), 10 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index a0b4d99..367d175 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -55,7 +55,7 @@
# It exists for backwards compatibility to the old 'TEMP_REGEX'
# which was the _ETP_REGEX.
TEMP_REGEX = DeprecatedRegex(r"""
-{{\s*(?:msg:)?\s*
+{{\s*(?:msg:\s*)?
(?P<name>[^{\|]+?)\s*
(?:\|(?P<params>[^{]*
(?:(?:{}|{{[A-Z]+(?:\:[^}])?}}|{{{[^}]+}}}) [^{]*)*
@@ -72,18 +72,26 @@
# Prefix msg: is not included in the 'name' group, but all others are
# included for backwards compatibility with TEMP_REGEX.
# Only parser functions using # are excluded.
+# When more than two levels of templates are found, this regex will
+# capture from the beginning of the first {{ to the end of the last }},
+# with wikitext between templates as part of the parameters of the first
+# template in the wikitext.
+# This ensures it fallsback to a safe mode for replaceExcept, as it
+# ensures that any replacement will not occur within template text.
NESTED_TEMPLATE_REGEX = re.compile(r"""
-{{\s*(?:msg:)?\s*
- (?P<name>[^{\|#0-9][^{\|#0-9]*?)\s*
- (?:\|(?P<params>[^{]*
- (({{{[^}]+}}}
- |{{[^}|]+\|?[^}]*}}
- |{}
- ) [^{]*
- )*
- )?
+{{\s*(?:msg:\s*)?
+ (?P<name>[^{\|#0-9][^{\|#]*?)\s*
+ (?:\|(?P<params> [^{]*?
+ (({{{[^{}]+?}}}
+ |{{[^{}]+?}}
+ |{[^{}]*?}
+ ) [^{]*?
+ )*?
+ )?
)?
}}
+|
+(?P<unhandled_depth>{{\s*[^{\|#0-9][^{\|#]*?\s* [^{]* {{ .* }})
""", re.VERBOSE)
diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py
index d6b8b69..1de1fa9 100644
--- a/tests/textlib_tests.py
+++ b/tests/textlib_tests.py
@@ -314,6 +314,10 @@
self.assertEqual(func('{{{a|b}}X}'),
[('a', OrderedDict((('1', 'b'), )))])
+ # sf.net bug 1575: unclosed template
+ self.assertEqual(func('{{a'), [])
+ self.assertEqual(func('{{a}}{{foo|'), [('a', OrderedDict())])
+
def _etp_regex_differs(self, func):
"""Common cases not handled the same by ETP_REGEX."""
self.assertEqual(func('{{a| b=c}}'), [('a', OrderedDict(((' b', 'c'), )))])
@@ -350,6 +354,18 @@
self._order_differs(func)
self._etp_regex_differs(func)
+ self.assertCountEqual(func('{{a|{{c|{{d}}}}}}'),
+ [('c', OrderedDict((('1', '{{d}}'), ))),
+ ('a', OrderedDict([('1', '{{c|{{d}}}}')])),
+ ('d', OrderedDict())
+ ])
+
+ self.assertCountEqual(func('{{a|{{c|{{d|}}}}}}'),
+ [('c', OrderedDict((('1', '{{d|}}'), ))),
+ ('a', OrderedDict([('1', '{{c|{{d|}}}}')])),
+ ('d', OrderedDict([('1', '')]))
+ ])
+
def test_extract_templates_params_regex(self):
"""Test using many complex regexes."""
func = functools.partial(textlib.extract_templates_and_params_regex,
@@ -367,6 +383,20 @@
func = textlib.extract_templates_and_params_regex
self.assertEqual(func('{{a|b=<!--{{{1}}}-->}}'),
[('a', OrderedDict((('b', ''), )))])
+
+ # Identical to mwpfh
+ self.assertCountEqual(func('{{a|{{c|{{d}}}}}}'),
+ [('c', OrderedDict((('1', '{{d}}'), ))),
+ ('a', OrderedDict([('1', '{{c|{{d}}}}')])),
+ ('d', OrderedDict())
+ ])
+
+ # However fails to correctly handle three levels of balanced brackets
+ # with empty parameters
+ self.assertCountEqual(func('{{a|{{c|{{d|}}}}}}'),
+ [('c', OrderedDict((('1', '{{d|}}}'), ))),
+ ('d', OrderedDict([('1', '}')]))
+ ])
def test_extract_templates_params(self):
"""Test that the normal entry point works."""
@@ -404,6 +434,13 @@
[(u'a', OrderedDict([('1', u'{{b'),
('2', u'c}}}'),
('3', u'd')]))])
+
+ # Safe fallback to handle arbitary template levels
+ # by merging top level templates together.
+ # i.e. 'b' is not recognised as a template, and 'foo' is also
+ # consumed as part of 'a'.
+ self.assertEqual(func('{{a|{{c|{{d|{{e|}}}} }} }} foo {{b}}'),
+ [(None, OrderedDict())])
def test_regexes(self):
"""_ETP_REGEX, NESTED_TEMPLATE_REGEX and TEMP_REGEX tests."""
@@ -493,6 +530,17 @@
self.assertIsNotNone(func('{{a|{{c}} }}'))
self.assertIsNotNone(func('{{a|{{c|d}} }}'))
+
+ # All templates are captured when template depth is greater than 2
+ m = func('{{a|{{c|{{d|}} }} | foo = bar }} foo {{bar}} baz')
+ self.assertIsNotNone(m)
+ self.assertIsNotNone(m.group(0))
+ self.assertIsNone(m.group('name'))
+ self.assertIsNone(m.group(1))
+ self.assertIsNone(m.group('params'))
+ self.assertIsNone(m.group(2))
+ self.assertIsNotNone(m.group('unhandled_depth'))
+ self.assertTrue(m.group(0).endswith('foo {{bar}}'))
class TestReplaceLinks(TestCase):
@@ -962,6 +1010,12 @@
['template'], site=self.site),
'X' + template_sample[1:])
+ # sf.net bug 1575: unclosed template
+ template_sample = template_sample[:-2]
+ self.assertEqual(textlib.replaceExcept(template_sample, 'a', 'X',
+ ['template'], site=self.site),
+ 'X' + template_sample[1:])
+
def test_replace_source_reference(self):
"""Test replacing in text which contains back references."""
# Don't use a valid reference number in the original string, in case it
--
To view, visit https://gerrit.wikimedia.org/r/226531
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I61a92fb1b6d893de31fab738ab883af231917f4c
Gerrit-PatchSet: 11
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: Minor improvements in README.rst
......................................................................
Minor improvements in README.rst
- Add link to doc.wikimedia.org for further documentation
in a more visible place.
- Add commands to install pywikibot via PyPI
Change-Id: Ica65a16befbe1a6fe7471a3ac44c5ca7ae5fe135
---
M README.rst
1 file changed, 8 insertions(+), 2 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/README.rst b/README.rst
index 93a1135..08a1991 100644
--- a/README.rst
+++ b/README.rst
@@ -8,6 +8,9 @@
Also included are various general function scripts that can be adapted for
different tasks.
+For further information about the library excluding scripts see
+the full `code documentation <https://doc.wikimedia.org/pywikibot/>`_.
+
Quick start
-----------
@@ -17,6 +20,11 @@
cd core
git submodule update --init
python pwb.py script_name
+
+Or to install using PyPI (excluding scripts)
+::
+
+ pip install pywikibot --pre
Our `installation
guide <https://www.mediawiki.org/wiki/Special:MyLanguage/Manual:Pywikibot/Installa…>`_
@@ -34,8 +42,6 @@
page = pywikibot.Page(site, 'Wikipedia:Sandbox')
page.text = page.text.replace('foo', 'bar')
page.save('Replacing "foo" with "bar"') # Saves the page
-
-For more documentation on pywikibot see our `docs <https://doc.wikimedia.org/pywikibot/>`_.
Required external programms
---------------------------
--
To view, visit https://gerrit.wikimedia.org/r/227211
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ica65a16befbe1a6fe7471a3ac44c5ca7ae5fe135
Gerrit-PatchSet: 4
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Ricordisamoa <ricordisamoa(a)openmailbox.org>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: Move watchlist functions to Site object from scripts/watchlist.py
......................................................................
Move watchlist functions to Site object from scripts/watchlist.py
It also fixes the bug in watchlist.py regarding caching.
"watchlist.py -all" failed if the cache entry wasn't a watchlistraw api call.
Bug: T59995
Change-Id: Iba40f2eef6d161ff08500950ec4c097bac97f505
---
M pywikibot/site.py
M scripts/watchlist.py
2 files changed, 35 insertions(+), 45 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/site.py b/pywikibot/site.py
index c4d53ce..1b76668 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -5722,6 +5722,29 @@
data = req.submit()
return data['flow']['view-topic']['result']['topic']
+ def watched_pages(self, sysop=False, force=False, step=None, total=None):
+ """
+ Return watchlist.
+
+ @param sysop: Returns watchlist of sysop user if true
+ @type sysop: bool
+ @param force_reload: Reload watchlist
+ @type force_reload: bool
+ @return: list of pages in watchlist
+ @rtype: list of pywikibot.Page objects
+ """
+ self.login(sysop=sysop)
+ if not total:
+ total = pywikibot.config.special_page_limit
+ if force:
+ gen = api.PageGenerator(site=self, generator='watchlistraw',
+ step=step, gwrlimit=total)
+ else:
+ gen = api.PageGenerator(
+ site=self, generator='watchlistraw', step=step,
+ expiry=pywikibot.config.API_config_expiry, gwrlimit=total)
+ return gen
+
# aliases for backwards compatibility
isBlocked = redirect_func(is_blocked, old_name='isBlocked',
class_name='APISite')
diff --git a/scripts/watchlist.py b/scripts/watchlist.py
index f3a0a76..6729e73 100755
--- a/scripts/watchlist.py
+++ b/scripts/watchlist.py
@@ -3,10 +3,7 @@
"""
Allows access to the bot account's watchlist.
-The function refresh() downloads the current watchlist and saves it to disk.
-It is run automatically when a bot first tries to save a page retrieved. The
-watchlist can be updated manually by running this script. The list will also
-be reloaded automatically once a month.
+The watchlist can be updated manually by running this script.
Syntax: python watchlist [-all | -new]
@@ -18,7 +15,7 @@
"""
#
# (C) Daniel Herding, 2005
-# (C) Pywikibot team, 2005-2014
+# (C) Pywikibot team, 2005-2015
#
# Distributed under the terms of the MIT license.
#
@@ -33,20 +30,12 @@
from pywikibot.data.api import CachedRequest
from scripts.maintenance.cache import CacheEntry
-cache = {}
-
def get(site=None):
"""Load the watchlist, fetching it if necessary."""
if site is None:
site = pywikibot.Site()
- if site in cache:
- # Use cached copy if it exists.
- watchlist = cache[site]
- else:
- # create cached copy
- watchlist = refresh(site)
- cache[site] = watchlist
+ watchlist = [p.title() for p in site.watched_pages()]
return watchlist
@@ -58,30 +47,8 @@
def refresh(site, sysop=False):
"""Fetch the watchlist."""
- if not site.logged_in(sysop=sysop):
- site.login(sysop=sysop)
-
- params = {
- 'action': 'query',
- 'list': 'watchlistraw',
- 'wrlimit': config.special_page_limit,
- }
-
pywikibot.output(u'Retrieving watchlist for %s via API.' % str(site))
- # pywikibot.put_throttle() # It actually is a get, but a heavy one.
- watchlist = []
- while True:
- req = CachedRequest(config.API_config_expiry, site=site, **params)
- data = req.submit()
- if 'error' in data:
- raise RuntimeError('ERROR: %s' % data)
- watchlist.extend([w['title'] for w in data['watchlistraw']])
-
- if 'query-continue' in data:
- params.update(data['query-continue']['watchlistraw'])
- else:
- break
- return watchlist
+ return list(site.watched_pages(sysop=sysop, force=True))
def refresh_all(sysop=False):
@@ -95,7 +62,7 @@
entry.parse_key()
entry._rebuild()
if entry.site not in seen:
- if entry._data['watchlistraw']:
+ if entry._data.get('watchlistraw'):
refresh(entry.site, sysop)
seen.append(entry.site)
@@ -106,10 +73,12 @@
'Downloading all watchlists for your accounts in user-config.py')
for family in config.usernames:
for lang in config.usernames[family]:
- refresh(pywikibot.Site(lang, family), sysop=sysop)
+ site = pywikibot.Site(lang, family)
+ refresh(site, sysop=sysop)
for family in config.sysopnames:
for lang in config.sysopnames[family]:
- refresh(pywikibot.Site(lang, family), sysop=sysop)
+ site = pywikibot.Site(lang, family)
+ refresh(site, sysop=sysop)
def main(*args):
@@ -137,12 +106,10 @@
refresh_new(sysop=sysop)
else:
site = pywikibot.Site()
- refresh(site, sysop=sysop)
-
- watchlist = get(site)
+ watchlist = refresh(site, sysop=sysop)
pywikibot.output(u'%i pages in the watchlist.' % len(watchlist))
- for pageName in watchlist:
- pywikibot.output(pageName, toStdout=True)
+ for page in watchlist:
+ pywikibot.output(page.title(), toStdout=True)
if __name__ == "__main__":
main()
--
To view, visit https://gerrit.wikimedia.org/r/215902
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Iba40f2eef6d161ff08500950ec4c097bac97f505
Gerrit-PatchSet: 21
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>