jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/165712 )
Change subject: [IMPROV] Link: parser rewritten
......................................................................
[IMPROV] Link: parser rewritten
Instead of all those stripping this is only iterating over the string
and never modifying it until the very last moment after all prefixes
have been analysed.
Change-Id: Ib93b32de6dafa9edb062df16ac55bfe580410088
---
M pywikibot/page.py
1 file changed, 22 insertions(+), 19 deletions(-)
Approvals:
jenkins-bot: Verified
Xqt: Looks good to me, approved
diff --git a/pywikibot/page.py b/pywikibot/page.py
index dc83ae4..b96e15c 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -4983,40 +4983,38 @@
self._site = self._source
self._namespace = self._defaultns
self._is_interwiki = False
- t = self._text
ns_prefix = False
- # This code was adapted from Title.php : secureAndSplit()
- #
+ old_position = 0 if self._text.find(':') != 0 else 1
+ colon_position = self._text.find(':', old_position)
first_other_site = None
- while u":" in t:
- # Initial colon indicates main namespace rather than default
- if t.startswith(u":"):
- self._namespace = self._site.namespaces[0]
- # remove the colon but continue processing
- # remove any subsequent whitespace
- t = t.lstrip(u":").lstrip(u" ")
- continue
-
- prefix = t[:t.index(u":")].lower()
+ while colon_position >= 0:
+ prefix = self._text[old_position:colon_position].lower()
+ # All spaces after a prefix are discarded
+ colon_position += 1
+ while (len(self._text) > colon_position and
+ self._text[colon_position] == ' '):
+ colon_position += 1
ns = self._site.namespaces.lookup_name(prefix)
if ns:
- # Ordinary namespace
- t = t[t.index(u":"):].lstrip(u":").lstrip(u"
")
+ if len(self._text) <= colon_position:
+ raise pywikibot.InvalidTitle(
+ "'{0}' has no title.".format(self._text))
self._namespace = ns
ns_prefix = True
+ old_position = colon_position
break
+
try:
newsite = self._site.interwiki(prefix)
except KeyError:
break # text before : doesn't match any known prefix
except SiteDefinitionError as e:
raise SiteDefinitionError(
- u'{0} is not a local page on {1}, and the interwiki prefix '
- '{2} is not supported by Pywikibot!:\n{3}'.format(
- self._text, self._site, prefix, e))
+ '{0} is not a local page on {1}, and the interwiki '
+ 'prefix {2} is not supported by Pywikibot!\n{3}'
+ .format(self._text, self._site, prefix, e))
else:
- t = t[t.index(u":"):].lstrip(u":").lstrip(u"
")
if first_other_site:
if not self._site.local_interwiki(prefix):
raise pywikibot.InvalidTitle(
@@ -5027,6 +5025,11 @@
first_other_site = newsite
self._site = newsite
self._is_interwiki = True
+ old_position = colon_position
+ colon_position = self._text.find(':', old_position)
+
+ # Remove any namespaces/interwiki prefixes
+ t = self._text[old_position:]
if u"#" in t:
t, sec = t.split(u'#', 1)
--
To view, visit
https://gerrit.wikimedia.org/r/165712
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ib93b32de6dafa9edb062df16ac55bfe580410088
Gerrit-PatchSet: 7
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>