jenkins-bot has submitted this change and it was merged.
Change subject: Check Internet Archive API URL is available
......................................................................
Check Internet Archive API URL is available
The Internet Archive API responds with status code 503
when it is unavailable. The tests should not be run when
it is unavailable.
Re-enable Internet Archive tests.
Bug: T104761
Change-Id: I4f550efa380db41a4e489c4821b2400c70ada369
---
M tests/__init__.py
M tests/weblib_tests.py
2 files changed, 4 insertions(+), 4 deletions(-)
Approvals:
XZise: Looks good to me, approved
jenkins-bot: Verified
diff --git a/tests/__init__.py b/tests/__init__.py
index 4416e05..631b7ce 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -141,10 +141,7 @@
disabled_test_modules = [
'tests', # tests of the tests package
- # weblib is deprecated, the tests fail for weblib,
- # but the tests are run in weblinkchecker_tests.
'l10n',
- 'weblib',
]
if not i18n.messages_available():
disabled_test_modules.append('l10n')
@@ -156,6 +153,9 @@
'site_detect': [
'test_IWM', # very slow and tests include unnecessary sites
],
+ 'weblib': [
+ 'testWebCiteOlder', # fails. T110640
+ ],
}
diff --git a/tests/weblib_tests.py b/tests/weblib_tests.py
index 769080c..13c5a13 100644
--- a/tests/weblib_tests.py
+++ b/tests/weblib_tests.py
@@ -28,7 +28,7 @@
sites = {
'archive.org': {
- 'hostname': 'web.archive.org',
+ 'hostname': 'https://archive.org/wayback/available?url=invalid',
},
}
--
To view, visit https://gerrit.wikimedia.org/r/229620
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I4f550efa380db41a4e489c4821b2400c70ada369
Gerrit-PatchSet: 3
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: [FIX] Removing disabled parts without mwpfh
......................................................................
[FIX] Removing disabled parts without mwpfh
When `mwparserfromhell` is not installed it won't remove disabled parts unless
that parameter is explicitly set to `True`. Before 13cd73de it was always set
to `True` when using the regex variant but after that patch it only changed the
default value of `None` when `mwparserfromhell` is used.
Bug: T113892
Change-Id: I255823fc574c9d03f8d9961350a3545f3bcea3fb
---
M pywikibot/textlib.py
M tests/textlib_tests.py
2 files changed, 51 insertions(+), 4 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 5998ac0..36c6e81 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -1291,9 +1291,8 @@
use_mwparserfromhell = (config.use_mwparserfromhell and
not isinstance(mwparserfromhell, Exception))
- if use_mwparserfromhell:
- if remove_disabled_parts is None:
- remove_disabled_parts = False
+ if remove_disabled_parts is None:
+ remove_disabled_parts = not use_mwparserfromhell
if remove_disabled_parts:
text = removeDisabledParts(text)
diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py
index e50d5ef..0b0205e 100644
--- a/tests/textlib_tests.py
+++ b/tests/textlib_tests.py
@@ -23,6 +23,7 @@
from tests.aspects import (
unittest, require_modules, TestCase, DefaultDrySiteTestCase,
+ PatchingTestCase,
)
files = {}
@@ -415,7 +416,8 @@
def test_extract_templates_params(self):
"""Test that the normal entry point works."""
self._common_results(
- textlib.extract_templates_and_params)
+ functools.partial(textlib.extract_templates_and_params,
+ remove_disabled_parts=False))
def test_template_simple_regex(self):
"""Test using simple regex."""
@@ -557,6 +559,52 @@
self.assertTrue(m.group(0).endswith('foo {{bar}}'))
+class TestGenericTemplateParams(PatchingTestCase):
+
+ """Test whether the generic function forwards the call correctly."""
+
+ net = False
+
+ @PatchingTestCase.patched(textlib, 'extract_templates_and_params_mwpfh')
+ def extract_mwpfh(self, text, *args, **kwargs):
+ """Patched call to extract_templates_and_params_mwpfh."""
+ self._text = text
+ self._mwpfh = True
+
+ @PatchingTestCase.patched(textlib, 'extract_templates_and_params_regex')
+ def extract_regex(self, text, *args, **kwargs):
+ """Patched call to extract_templates_and_params_regex."""
+ self._text = text
+ self._mwpfh = False
+
+ def test_removing_disabled_parts_regex(self):
+ """Test removing disabled parts when using the regex variant."""
+ self.patch(config, 'use_mwparserfromhell', False)
+ textlib.extract_templates_and_params('{{a<!-- -->}}', True)
+ self.assertEqual(self._text, '{{a}}')
+ self.assertFalse(self._mwpfh)
+ textlib.extract_templates_and_params('{{a<!-- -->}}', False)
+ self.assertEqual(self._text, '{{a<!-- -->}}')
+ self.assertFalse(self._mwpfh)
+ textlib.extract_templates_and_params('{{a<!-- -->}}')
+ self.assertEqual(self._text, '{{a}}')
+ self.assertFalse(self._mwpfh)
+
+ @require_modules('mwparserfromhell')
+ def test_removing_disabled_parts_mwpfh(self):
+ """Test removing disabled parts when using the mwpfh variant."""
+ self.patch(config, 'use_mwparserfromhell', True)
+ textlib.extract_templates_and_params('{{a<!-- -->}}', True)
+ self.assertEqual(self._text, '{{a}}')
+ self.assertTrue(self._mwpfh)
+ textlib.extract_templates_and_params('{{a<!-- -->}}', False)
+ self.assertEqual(self._text, '{{a<!-- -->}}')
+ self.assertTrue(self._mwpfh)
+ textlib.extract_templates_and_params('{{a<!-- -->}}')
+ self.assertEqual(self._text, '{{a<!-- -->}}')
+ self.assertTrue(self._mwpfh)
+
+
class TestReplaceLinks(TestCase):
"""Test the replace_links function in textlib."""
--
To view, visit https://gerrit.wikimedia.org/r/241565
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I255823fc574c9d03f8d9961350a3545f3bcea3fb
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: jenkins-bot <>
Build Update for wikimedia/pywikibot-core
-------------------------------------
Build: #2918
Status: Failed
Duration: 33 minutes and 7 seconds
Commit: b66f9b5 (master)
Author: Fabian Neundorf
Message: [IMPROV] patrol: Replace add_to_tuples with defaultdict
Instead of using the static method it now just uses a `defaultdict`. This is
also renaming the method-local variable from `tuples` to `whitelist`,
especially as it's a `dict` of `lists` and `tuples` isn't applicable.
Change-Id: I7cb543a4f1077e4a76c2abe8888ff95ae4dade0f
View the changeset: https://github.com/wikimedia/pywikibot-core/compare/0c8150f39c7e...b66f9b57…
View the full build log and details: https://travis-ci.org/wikimedia/pywikibot-core/builds/82352827
--
You can configure recipients for build notifications in your .travis.yml file. See http://docs.travis-ci.com/user/notifications
Build Update for wikimedia/pywikibot-core
-------------------------------------
Build: #2917
Status: Errored
Duration: 31 minutes and 20 seconds
Commit: 0c8150f (master)
Author: Fabian Neundorf
Message: [FIX] patrol: Replace mwlib with mwparserfromhell
This removes the dependency of `mwlib` (which doesn't work on Python 3)
and uses `mwparserfromhell` instead. Unfortunately does the `mwparserfromhell`
implementation not support lists directly so the parsed result has to be
filtered manually.
It now allows links outside of lists and normalizes the titles and checks
against the aliases for `Special:PrefixIndex`.
Bug: T95142
Change-Id: I4f7b5c7a67e0c90530319fce1f3ab0ca0c1a1138
View the changeset: https://github.com/wikimedia/pywikibot-core/compare/62b6743eb672...0c8150f3…
View the full build log and details: https://travis-ci.org/wikimedia/pywikibot-core/builds/82302454
--
You can configure recipients for build notifications in your .travis.yml file. See http://docs.travis-ci.com/user/notifications
jenkins-bot has submitted this change and it was merged.
Change subject: [FIX] patrol: Replace mwlib with mwparserfromhell
......................................................................
[FIX] patrol: Replace mwlib with mwparserfromhell
This removes the dependency of `mwlib` (which doesn't work on Python 3)
and uses `mwparserfromhell` instead. Unfortunately does the `mwparserfromhell`
implementation not support lists directly so the parsed result has to be
filtered manually.
It now allows links outside of lists and normalizes the titles and checks
against the aliases for `Special:PrefixIndex`.
Bug: T95142
Change-Id: I4f7b5c7a67e0c90530319fce1f3ab0ca0c1a1138
---
M pywikibot/version.py
M requirements.txt
M scripts/patrol.py
M setup.py
M tests/patrolbot_tests.py
M tests/script_tests.py
M tests/utils.py
7 files changed, 67 insertions(+), 54 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/version.py b/pywikibot/version.py
index 7f5c64d..5ac6e1d 100644
--- a/pywikibot/version.py
+++ b/pywikibot/version.py
@@ -533,14 +533,6 @@
info['ver'] = package.__version__
elif name.startswith('unicodedata'):
info['ver'] = package.unidata_version
- elif name == 'mwlib': # mwlib 0.14.3 does not include a __init__.py
- module = __import__(name + '._version',
- fromlist=['_version'], level=0)
- if '__version__' in module.__dict__:
- info['ver'] = module.__version__
- path = module.__file__
- path = path[0:path.index('_version.')]
- info['path'] = path
# If builtins or standard_lib is None,
# only include package if a version was found.
diff --git a/requirements.txt b/requirements.txt
index cad79bf..66f32bf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -73,7 +73,7 @@
# incomplete core component botirc
irc ; python_version > '2.6'
-# textlib.py
+# textlib.py and patrol.py
mwparserfromhell>=0.3.3 ; python_version <= '3.4'
# The mysql generator in pagegenerators depends on either oursql or MySQLdb
diff --git a/scripts/patrol.py b/scripts/patrol.py
index e37907d..0adacee 100755
--- a/scripts/patrol.py
+++ b/scripts/patrol.py
@@ -5,9 +5,30 @@
This bot obtains a list of recent changes and newpages and marks the
edits as patrolled based on a whitelist.
-See http://en.wikisource.org/wiki/User:JVbot/patrol_whitelist
-Commandline parameters that are supported:
+Whitelist format
+================
+
+The whitelist is formatted as a number of list entries. Any links outside of
+lists are ignored and can be used for documentation. In a list the first link
+must be to the username which should be white listed and any other link
+following is adding that page to the white list of that username. If the user
+edited a page on their white list it gets patrolled. It will also patrol pages
+which start with the mentioned link (e.g. [[foo]] will also patrol [[foobar]]).
+
+To avoid redlinks it's possible to use Special:PrefixIndex as a prefix so that
+it will list all pages which will be patrolled. The page after the slash will be
+used then.
+
+On Wikisource, it'll also check if the page is on the author namespace in which
+case it'll also patrol pages which are linked from that page.
+
+An example can be found at:
+
+https://en.wikisource.org/wiki/User:Wikisource-bot/patrol_whitelist
+
+Commandline parameters
+======================
-namespace Filter the page generator to only yield pages in
specified namespaces
@@ -25,9 +46,9 @@
from __future__ import absolute_import, unicode_literals
__version__ = '$Id$'
-import mwlib.uparser # used to parse the whitelist
-import mwlib.parser # used to parse the whitelist
import time
+
+import mwparserfromhell
import pywikibot
@@ -92,6 +113,13 @@
self.rc_item_counter = 0 # counts how many items have been reviewed
self.patrol_counter = 0 # and how many times an action was taken
+ for entry in self.site.siteinfo['specialpagealiases']:
+ if entry['realname'] == 'Prefixindex':
+ self._prefixindex_aliases = set(alias.lower()
+ for alias in entry['aliases'])
+ break
+ else:
+ raise RuntimeError('No alias for "prefixindex"')
def load_whitelist(self):
"""Load most recent watchlist_page for further processing."""
@@ -183,34 +211,35 @@
"""Parse page details apart from 'user:' for use."""
tuples = {}
- # for any structure, the only first 'user:' page
- # is registered as the user the rest of the structure
- # refers to.
- def process_children(obj, current_user):
- pywikibot.debug(u'Parsing node: %s' % obj, _logger)
- for c in obj.children:
- temp = process_node(c, current_user)
- if temp and not current_user:
- current_user = temp
+ current_user = False
+ parsed = mwparserfromhell.parse(wikitext)
+ for node in parsed.nodes:
+ if isinstance(node, mwparserfromhell.nodes.tag.Tag):
+ if node.tag == 'li':
+ current_user = None
+ elif isinstance(node, mwparserfromhell.nodes.text.Text):
+ if node.endswith('\n'):
+ current_user = False
+ elif isinstance(node, mwparserfromhell.nodes.wikilink.Wikilink):
+ if current_user is False:
+ pywikibot.debug('Link to "{0}" ignored as outside '
+ 'list'.format(node.title), _logger)
+ continue
- def process_node(obj, current_user):
- # links are analysed; interwiki links are included because mwlib
- # incorrectly calls 'Wikisource:' namespace links an interwiki
- if isinstance(obj, mwlib.parser.NamespaceLink) or \
- isinstance(obj, mwlib.parser.InterwikiLink) or \
- isinstance(obj, mwlib.parser.ArticleLink):
+ obj = pywikibot.Link(node.title, self.site)
if obj.namespace == -1:
# the parser accepts 'special:prefixindex/' as a wildcard
# this allows a prefix that doesnt match an existing page
# to be a blue link, and can be clicked to see what pages
# will be included in the whitelist
- if obj.target[:20].lower() == 'special:prefixindex/':
- if len(obj.target) == 20:
+ name, sep, prefix = obj.title.partition('/')
+ if name.lower() in self._prefixindex_aliases:
+ if not prefix:
if pywikibot.config.verbose_output:
pywikibot.output(u'Whitelist everything')
page = ''
else:
- page = obj.target[20:]
+ page = prefix
if pywikibot.config.verbose_output:
pywikibot.output(u'Whitelist prefixindex hack '
u'for: %s' % page)
@@ -222,13 +251,12 @@
# if a target user hasn't been found yet, and the link is
# 'user:'
# the user will be the target of subsequent rules
- page_prefix_len = len(self.site.namespace(2))
- current_user = obj.target[(page_prefix_len + 1):]
+ current_user = obj.title
if pywikibot.config.verbose_output:
pywikibot.output(u'Whitelist user: %s' % current_user)
- return current_user
+ continue
else:
- page = obj.target
+ page = obj.canonical_title()
if current_user:
if not user or current_user == user:
@@ -246,11 +274,6 @@
u'another user: %s' % page)
else:
raise Exception(u'No user set for page %s' % page)
- else:
- process_children(obj, current_user)
-
- root = mwlib.uparser.parseString(title='Not used', raw=wikitext)
- process_children(root, None)
return tuples
diff --git a/setup.py b/setup.py
index 6a120b1..ad37305 100644
--- a/setup.py
+++ b/setup.py
@@ -69,6 +69,7 @@
'flickrripper.py': ['Pillow'],
'states_redirect.py': ['pycountry'],
'weblinkchecker.py': ['memento_client>=0.5.1'],
+ 'patrol.py': ['mwparserfromhell>=0.3.3'],
}
# flickrapi 1.4.4 installs a root logger in verbose mode; 1.4.5 fixes this.
# The problem doesnt exist in flickrapi 2.x.
@@ -133,9 +134,6 @@
script_deps['data_ingestion.py'] = extra_deps['csv']
- # mwlib is not available for py3
- script_deps['patrol'] = ['mwlib']
-
# Some of the ui_tests depend on accessing the console window's menu
# to set the console font and copy and paste, achieved using pywinauto
# which depends on pywin32.
@@ -157,11 +155,6 @@
# so all scripts can be compiled for script_tests, etc.
if 'PYSETUP_TEST_EXTRAS' in os.environ:
test_deps += list(itertools.chain(*(extra_deps.values())))
- # mwlib requires 'pyparsing>=1.4.11,<1.6', which conflicts with
- # pydot's requirement for pyparsing>=2.0.1.
- if 'mwlib' in test_deps:
- test_deps.remove('mwlib')
-
if 'oursql' in test_deps and os.name == 'nt':
test_deps.remove('oursql') # depends on Cython
diff --git a/tests/patrolbot_tests.py b/tests/patrolbot_tests.py
index 961d637..efaa29d 100644
--- a/tests/patrolbot_tests.py
+++ b/tests/patrolbot_tests.py
@@ -12,7 +12,7 @@
try:
from scripts import patrol
except ImportError:
- patrol = None # if mwlib is not installed
+ patrol = None # if mwparserfromhell is not installed
from tests.aspects import require_modules, unittest, DefaultDrySiteTestCase
@@ -21,17 +21,17 @@
== Header ==
* [[User:Test 1]]: [[Page 1]], [[Page 2]]
-* [[User:Test 2]]: [[Page 2]], [[Page 4]], [[Page 6]]
+* [[User:Test_2]]: [[Page 2]], [[Page 4]], [[Page 6]]
== Others ==
* [[User:Prefixed]]: [[Special:PrefixIndex/Page 1]], [[Special:PREFIXINDEX/Page 2]]
== More test 1 ==
-* [[User:Test 1]]: [[Page 3]]
+* [[User:Test_1]]: [[Page 3]]
"""
-@require_modules('mwlib')
+@require_modules('mwparserfromhell')
class TestPatrolBot(DefaultDrySiteTestCase):
"""Test the PatrolBot class."""
@@ -51,7 +51,7 @@
for i in range(1, 4)])
self.assertIn('Prefixed', tuples)
self.assertEqual(tuples['Prefixed'], ['Page 1', 'Page 2'])
- self.assertRaises(Exception, self.bot.parse_page_tuples, '[[link]]')
+ self.assertEqual(self.bot.parse_page_tuples('[[link]]'), {})
def test_in_list(self):
"""Test the method which returns whether a page is in the list."""
diff --git a/tests/script_tests.py b/tests/script_tests.py
index 10cd8d1..66bd657 100644
--- a/tests/script_tests.py
+++ b/tests/script_tests.py
@@ -44,7 +44,7 @@
'match_images': ['PIL.ImageTk'],
'panoramiopicker': ['BeautifulSoup'],
'states_redirect': ['pycountry'],
- 'patrol': ['mwlib'],
+ 'patrol': ['mwparserfromhell'],
}
if PYTHON_VERSION < (2, 7):
diff --git a/tests/utils.py b/tests/utils.py
index 69f2fae..b4335be 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -374,6 +374,11 @@
if self.family.name == 'wikisource':
extensions.append({'name': 'ProofreadPage'})
self._siteinfo._cache['extensions'] = (extensions, True)
+ aliases = []
+ for alias in ('PrefixIndex', ):
+ # TODO: Not all follow that scheme (e.g. "BrokenRedirects")
+ aliases.append({'realname': alias.capitalize(), 'aliases': [alias]})
+ self._siteinfo._cache['specialpagealiases'] = (aliases, True)
self._msgcache = {'*': 'dummy entry', 'hello': 'world'}
def _build_namespaces(self):
--
To view, visit https://gerrit.wikimedia.org/r/202011
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I4f7b5c7a67e0c90530319fce1f3ab0ca0c1a1138
Gerrit-PatchSet: 5
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>