jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/492504 )
Change subject: [IMPR] Fix handling of interlanguage links in replace_links
......................................................................
[IMPR] Fix handling of interlanguage links in replace_links
- fix iw (en:Example) links detection (it just didn't work at all)
- fix il (:en:Example) links detection (site should not be derived
from the replacement pair) and behavior (until now replace_links made
[[:en:Foo]] > [[Bar]] for :en:Foo > :en:Bar)
- make site mandatory (as the iw and il links correct detection and
handling needs a site matching the text source/target)
- extracted from
https://gerrit.wikimedia.org/r/#/c/pywikibot/core/+/491673/
Change-Id: Iae273b5440bd697b84b574bf2649996c1cec4f32
---
M pywikibot/textlib.py
M tests/textlib_tests.py
2 files changed, 62 insertions(+), 13 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 7c676c2..506fa93 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -644,8 +644,8 @@
in that case it will apply the second value from the sequence.
@type replace: sequence of pywikibot.Page/pywikibot.Link/str or
callable
- @param site: a Site object to use if replace is not a sequence or the link
- to be replaced is not a Link or Page instance.
+ @param site: a Site object to use. It should match the origin
+ or target site of the text
@type site: pywikibot.APISite
"""
def to_link(source):
@@ -689,14 +689,14 @@
if isinstance(replace_list[1], basestring):
replace_list[1] = pywikibot.Page(site, replace_list[1])
check_classes(replace_list[0])
- if replace_list[0].site != replace_list[1].site:
- raise ValueError('Both pages in the "replace" argument
'
- 'must belong to the same site.')
- site = replace_list[0].site
replace = replace_callable
+ if site is None:
+ issue_deprecation_warning(
+ 'site=None',
+ 'a valid site for list or tuple parameter "replace"',
+ 2, since='20190223')
elif site is None:
- raise ValueError('If "replace" is not a tuple or list of pages,
'
- 'the "site" argument must be provided.')
+ raise ValueError('The "site" argument must be provided.')
linktrail = site.linktrail()
link_pattern = re.compile(
@@ -710,10 +710,15 @@
m = link_pattern.search(text, pos=curpos)
if not m:
break
- # ignore links to sections of the same page
+ # Ignore links to sections of the same page
if not m.group('title').strip():
curpos = m.end()
continue
+ # Ignore interwiki links
+ if (site.isInterwikiLink(m.group('title').strip())
+ and not m.group('title').strip().startswith(':')):
+ curpos = m.end()
+ continue
groups = m.groupdict()
if groups['label'] and '[[' in groups['label']:
# TODO: Work on the link within the label too
@@ -740,10 +745,6 @@
# unrecognized iw prefix
curpos = end
continue
- # ignore interwiki links
- if link.site != site:
- curpos = end
- continue
# Check whether the link found should be replaced.
# Either None, False or tuple(Link, bool)
@@ -792,6 +793,9 @@
is_link = False
new_title = new_link.canonical_title()
+ # Make correct langlink if needed
+ if not new_link.site == site:
+ new_title = ':' + new_link.site.code + ':' + new_title
if is_link:
# Use link's label
diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py
index 6b754da..860fbfb 100644
--- a/tests/textlib_tests.py
+++ b/tests/textlib_tests.py
@@ -1060,6 +1060,51 @@
ValueError, r'unicode \(str.*bytes \(str',
textlib.replace_links, self.text, callback, self.wp_site)
+ def test_replace_interwiki_links(self):
+ """Make sure interwiki links can not be
replaced."""
+ link = '[[fr:how]]'
+ self.assertEqual(
+ textlib.replace_links(link, ('fr:how', 'de:are'),
self.wp_site),
+ link)
+ self.assertEqual(
+ textlib.replace_links(link, (':fr:how', ':de:are'),
self.wp_site),
+ link)
+ self.assertEqual(
+ textlib.replace_links(link, ('how', 'de:are'),
self.wp_site),
+ link)
+ self.assertEqual(
+ textlib.replace_links(link, ('de:how', 'de:are'),
self.wp_site),
+ link)
+
+
+class TestReplaceLinksNonDry(TestCase):
+ """Test the replace_links function in textlib
non-dry."""
+
+ family = 'wikipedia'
+ code = 'en'
+
+ cached = True
+
+ def test_replace_interlanguage_links(self):
+ """Test replacing interlanguage links."""
+ link = '[[:fr:how]]'
+ self.assertEqual(
+ textlib.replace_links(link, (':fr:how', ':de:are'),
+ self.site),
+ '[[:de:Are|fr:how]]')
+ self.assertEqual(
+ textlib.replace_links(link, ('fr:how', 'de:are'),
+ self.site),
+ '[[:de:Are|fr:how]]')
+ self.assertEqual(
+ textlib.replace_links(link, ('how', ':de:are'),
+ self.site),
+ link)
+ self.assertEqual(
+ textlib.replace_links(link, (':de:how', ':de:are'),
+ self.site),
+ link)
+
class TestLocalDigits(TestCase):
--
To view, visit
https://gerrit.wikimedia.org/r/492504
To unsubscribe, or for help writing mail filters, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: Iae273b5440bd697b84b574bf2649996c1cec4f32
Gerrit-Change-Number: 492504
Gerrit-PatchSet: 7
Gerrit-Owner: Dvorapa <dvorapa(a)seznam.cz>
Gerrit-Reviewer: D3r1ck01 <alangiderick(a)gmail.com>
Gerrit-Reviewer: Dalba <dalba.wiki(a)gmail.com>
Gerrit-Reviewer: Dvorapa <dvorapa(a)seznam.cz>
Gerrit-Reviewer: Framawiki <framawiki(a)tools.wmflabs.org>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: Zhuyifei1999 <zhuyifei1999(a)gmail.com>
Gerrit-Reviewer: jenkins-bot (75)