Revision: 8557
Author: xqt
Date: 2010-09-15 06:21:27 +0000 (Wed, 15 Sep 2010)
Log Message:
-----------
bugfix for r8556 (bug #3066490)
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2010-09-14 22:07:50 UTC (rev 8556)
+++ trunk/pywikipedia/wikipedia.py 2010-09-15 06:21:27 UTC (rev 8557)
@@ -2755,7 +2755,7 @@
@param total: iterate no more than this number of revisions in total
"""
- it total == None:
+ if total == None:
total = 500 #set to default of getVersionHistory
edits = self.getVersionHistory(revCount=total)
users = set([edit[2] for edit in edits])
Revision: 8556
Author: xqt
Date: 2010-09-14 22:07:50 +0000 (Tue, 14 Sep 2010)
Log Message:
-----------
Edit filter labels and page sizes in version history (request bug #3054755)
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2010-09-14 16:23:48 UTC (rev 8555)
+++ trunk/pywikipedia/wikipedia.py 2010-09-14 22:07:50 UTC (rev 8556)
@@ -2525,9 +2525,10 @@
Return value is a list of tuples, where each tuple represents one
edit and is built of revision id, edit date/time, user name, and
edit summary. Starts with the most current revision, unless
- reverseOrder is True. Defaults to getting the first revCount edits,
- unless getAll is True.
+ reverseOrder is True.
+ Defaults to getting the first revCount edits, unless getAll is True.
+ @param revCount: iterate no more than this number of revisions in total
"""
# regular expression matching one edit in the version history.
@@ -2581,8 +2582,8 @@
return self._versionhistory[:revCount]
return self._versionhistory
- def _getVersionHistory(self, getAll = False, skipFirst = False, reverseOrder = False,
- revCount=500):
+ def _getVersionHistory(self, getAll=False, skipFirst=False, reverseOrder=False,
+ revCount=500):
"""Load history informations by API query.
Internal use for self.getVersionHistory(), don't use this function directly.
"""
@@ -2594,8 +2595,8 @@
'action': 'query',
'prop': 'revisions',
'titles': self.title(),
+ 'rvprop': 'ids|timestamp|flags|comment|user|size|tags',
'rvlimit': revCount,
- #'': '',
}
while not thisHistoryDone:
if reverseOrder:
@@ -2625,11 +2626,15 @@
timestampStrr = r['timestamp']
if 'user' in r:
userStrr = r['user']
- dataQ.append((revidStrr, timestampStrr, userStrr, c))
-
+ s=-1 #Will return -1 if not found
+ if 'size' in r:
+ s = r['size']
+ tags=[]
+ if 'tags' in r:
+ tags = r['tags']
+ dataQ.append((revidStrr, timestampStrr, userStrr, c, s, tags))
if len(result['query']['pages'].values()[0]['revisions']) < revCount:
thisHistoryDone = True
-
return dataQ
def _getVersionHistoryOld(self, getAll = False, skipFirst = False,
@@ -2697,26 +2702,30 @@
dataQ.extend(edits)
if len(edits) < revCount:
thisHistoryDone = True
-
return dataQ
def getVersionHistoryTable(self, forceReload=False, reverseOrder=False,
getAll=False, revCount=500):
"""Return the version history as a wiki table."""
+
result = '{| class="wikitable"\n'
- result += '! oldid || date/time || username || edit summary\n'
- for oldid, time, username, summary in self.getVersionHistory(forceReload = forceReload, reverseOrder = reverseOrder, getAll = getAll, revCount = revCount):
+ result += '! oldid || date/time || size || username || edit summary\n'
+ for oldid, time, username, summary, size, tags \
+ in self.getVersionHistory(forceReload=forceReload,
+ reverseOrder=reverseOrder,
+ getAll=getAll, revCount=revCount):
result += '|----\n'
- result += '| %s || %s || %s || <nowiki>%s</nowiki>\n' % (oldid, time, username, summary)
+ result += '| %s || %s || %d || %s || <nowiki>%s</nowiki>\n' \
+ % (oldid, time, size, username, summary)
result += '|}\n'
return result
def fullVersionHistory(self):
- """
- Return all previous versions including wikitext.
+ """Iterate previous versions including wikitext.
Gives a list of tuples consisting of revision ID, edit date/time, user name and
content
+
"""
address = self.site().export_address()
predata = {
@@ -2738,9 +2747,17 @@
unescape(match.group('content')))
for match in r.finditer(data) ]
- def contributingUsers(self):
- """Return a set of usernames (or IPs) of users who edited this page."""
- edits = self.getVersionHistory()
+ def contributingUsers(self, step=None, total=None):
+ """Return a set of usernames (or IPs) of users who edited this page.
+
+ @param step: limit each API call to this number of revisions
+ - not used yet, only in rewrite branch -
+ @param total: iterate no more than this number of revisions in total
+
+ """
+ it total == None:
+ total = 500 #set to default of getVersionHistory
+ edits = self.getVersionHistory(revCount=total)
users = set([edit[2] for edit in edits])
return users
@@ -2749,7 +2766,17 @@
"""Move this page to new title given by newtitle. If safe, don't try
to move and delete if not directly requested.
- * fixredirects has no effect in MW < 1.13"""
+ * fixredirects has no effect in MW < 1.13
+
+ @param newtitle: The new page title.
+ @param reason: The edit summary for the move.
+ @param movetalkpage: If true, move this page's talk page (if it exists)
+ @param sysop: Try to move using sysop account, if available
+ @param deleteAndMove: if move succeeds, delete the old page
+ (usually requires sysop privileges, depending on wiki settings)
+ @param safe: If false, attempt to delete existing page at newtitle
+ (if there is one) and then move this page to that title
+ """
if not self.site().has_api() or self.site().versionnumber() < 12:
return self._moveOld(newtitle, reason, movetalkpage, sysop,
throttle, deleteAndMove, safe, fixredirects, leaveRedirect)
@@ -2769,6 +2796,8 @@
if throttle:
put_throttle()
if reason is None:
+ pywikibot.output(u'Moving %s to [[%s]].'
+ % (self.title(asLink=True), newtitle))
reason = input(u'Please enter a reason for the move:')
if self.isTalkPage():
movetalkpage = False
@@ -2779,7 +2808,6 @@
'to': newtitle,
'token': self.site().getToken(sysop=sysop),
'reason': reason,
- #'': '',
}
if movesubpages:
params['movesubpages'] = 1
Revision: 8555
Author: xqt
Date: 2010-09-14 16:23:48 +0000 (Tue, 14 Sep 2010)
Log Message:
-----------
change Page.aslink() to Page.title(asLink=True)
Modified Paths:
--------------
trunk/pywikipedia/interwiki.py
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py 2010-09-14 15:39:26 UTC (rev 8554)
+++ trunk/pywikipedia/interwiki.py 2010-09-14 16:23:48 UTC (rev 8555)
@@ -767,7 +767,7 @@
StoredPage.SPpath = path
StoredPage.SPstore = shelve.open(path)
- self.SPkey = self.aslink().encode('utf-8')
+ self.SPkey = self.title(asLink=True).encode('utf-8')
self.SPcontentSet = False
def SPgetContents(self):
@@ -1072,7 +1072,7 @@
if page == self.originPage:
try:
pywikibot.output(u"%s has a backlink from %s."
- % (page.aslink(), linkingPage.aslink()))
+ % (page.title(asLink=True), linkingPage.title(asLink=True)))
except UnicodeDecodeError:
pywikibot.output(u"Found a backlink for a page.")
self.makeForcedStop(counter)
@@ -1147,10 +1147,10 @@
def wiktionaryMismatch(self, page):
if globalvar.same=='wiktionary':
if page.title().lower() != self.originPage.title().lower():
- pywikibot.output(u"NOTE: Ignoring %s for %s in wiktionary mode" % (page.aslink(), self.originPage.aslink()))
+ pywikibot.output(u"NOTE: Ignoring %s for %s in wiktionary mode" % (page.title(asLink=True), self.originPage.title(asLink=True)))
return True
elif page.title() != self.originPage.title() and self.originPage.site().nocapitalize and page.site().nocapitalize:
- pywikibot.output(u"NOTE: Ignoring %s for %s in wiktionary mode because both languages are uncapitalized." % (page.aslink(), self.originPage.aslink()))
+ pywikibot.output(u"NOTE: Ignoring %s for %s in wiktionary mode because both languages are uncapitalized." % (page.title(asLink=True), self.originPage.title(asLink=True)))
return True
return False
@@ -1205,10 +1205,10 @@
def isIgnored(self, page):
if page.site().language() in globalvar.neverlink:
- pywikibot.output(u"Skipping link %s to an ignored language" % page.aslink())
+ pywikibot.output(u"Skipping link %s to an ignored language" % page.title(asLink=True))
return True
if page in globalvar.ignore:
- pywikibot.output(u"Skipping link %s to an ignored page" % page.aslink())
+ pywikibot.output(u"Skipping link %s to an ignored page" % page.title(asLink=True))
return True
return False
@@ -1343,12 +1343,12 @@
if self.addIfNew(redirectTargetPage, counter, page):
if config.interwiki_shownew or pywikibot.verbose:
pywikibot.output(u"%s: %s gives new %sredirect %s"
- % (self.originPage.aslink(), page.aslink(True), redir, redirectTargetPage.aslink(True)))
+ % (self.originPage.title(asLink=True), page.aslink(True), redir, redirectTargetPage.aslink(True)))
continue
# must be behind the page.isRedirectPage() part
# otherwise a redirect error would be raised
- if page.isEmpty() and not page.isCategory():
+ elif page.isEmpty() and not page.isCategory():
globalvar.remove.append(page.aslink(forceInterwiki=True))
if not globalvar.quiet or pywikibot.verbose:
pywikibot.output(u"NOTE: %s is empty. Skipping." % page.aslink(True))
@@ -1434,7 +1434,7 @@
for linkedPage in iw:
if globalvar.hintsareright:
if linkedPage.site in self.hintedsites:
- pywikibot.output(u"NOTE: %s: %s extra interwiki on hinted site ignored %s" % (self.originPage.aslink(), page.aslink(True), linkedPage.aslink(True)))
+ pywikibot.output(u"NOTE: %s: %s extra interwiki on hinted site ignored %s" % (self.originPage.title(asLink=True), page.aslink(True), linkedPage.aslink(True)))
break
if not self.skipPage(page, linkedPage, counter):
if globalvar.followinterwiki or page == self.originPage:
@@ -1450,7 +1450,7 @@
break
else:
if config.interwiki_shownew or pywikibot.verbose:
- pywikibot.output(u"%s: %s gives new interwiki %s"% (self.originPage.aslink(), page.aslink(True), linkedPage.aslink(True)))
+ pywikibot.output(u"%s: %s gives new interwiki %s"% (self.originPage.title(asLink=True), page.aslink(True), linkedPage.aslink(True)))
# These pages are no longer 'in progress'
self.pending = PageTree()
@@ -1803,10 +1803,10 @@
try:
if (new[ignorepage.site()] == ignorepage) and (ignorepage.site() != page.site()):
if (ignorepage not in interwikis):
- pywikibot.output(u"Ignoring link to %(to)s for %(from)s" % {'to': ignorepage.aslink(), 'from': page.aslink()})
+ pywikibot.output(u"Ignoring link to %(to)s for %(from)s" % {'to': ignorepage.title(asLink=True), 'from': page.title(asLink=True)})
new.pop(ignorepage.site())
else:
- pywikibot.output(u"NOTE: Not removing interwiki from %(from)s to %(to)s (exists both commented and non-commented)" % {'to': ignorepage.aslink(), 'from': page.aslink()})
+ pywikibot.output(u"NOTE: Not removing interwiki from %(from)s to %(to)s (exists both commented and non-commented)" % {'to': ignorepage.title(asLink=True), 'from': page.title(asLink=True)})
except KeyError:
pass
@@ -1874,7 +1874,7 @@
return False
pywikibot.showDiff(oldtext, newtext)
- # pywikibot.output(u"NOTE: Replace %s" % page.aslink())
+ # pywikibot.output(u"NOTE: Replace %s" % page.title(asLink=True))
# Determine whether we need permission to submit
ask = False
if removing and removing != [page.site()]: # Allow for special case of a self-pointing interwiki link
Revision: 8554
Author: xqt
Date: 2010-09-14 15:39:26 +0000 (Tue, 14 Sep 2010)
Log Message:
-----------
remove obsolete parameter at extract_templates_and_params reported by DrTrigon; some PEP 8 changes
Modified Paths:
--------------
trunk/pywikipedia/pywikibot/textlib.py
Modified: trunk/pywikipedia/pywikibot/textlib.py
===================================================================
--- trunk/pywikipedia/pywikibot/textlib.py 2010-09-14 15:37:42 UTC (rev 8553)
+++ trunk/pywikipedia/pywikibot/textlib.py 2010-09-14 15:39:26 UTC (rev 8554)
@@ -109,11 +109,14 @@
if exc in exceptionRegexes:
dontTouchRegexes.append(exceptionRegexes[exc])
else:
- # nowiki, noinclude, includeonly, timeline, math ond other extensions
- dontTouchRegexes.append(re.compile(r'(?is)<%s>.*?</%s>' % (exc, exc)))
+ # nowiki, noinclude, includeonly, timeline, math ond other
+ # extensions
+ dontTouchRegexes.append(re.compile(r'(?is)<%s>.*?</%s>'
+ % (exc, exc)))
# handle alias
if exc == 'source':
- dontTouchRegexes.append(re.compile(r'(?is)<syntaxhighlight .*?</syntaxhighlight>'))
+ dontTouchRegexes.append(re.compile(
+ r'(?is)<syntaxhighlight .*?</syntaxhighlight>'))
else:
# assume it's a regular expression
dontTouchRegexes.append(exc)
@@ -167,8 +170,11 @@
groupMatch = groupR.search(replacement)
if not groupMatch:
break
- groupID = groupMatch.group('name') or int(groupMatch.group('number'))
- replacement = replacement[:groupMatch.start()] + match.group(groupID) + replacement[groupMatch.end():]
+ groupID = groupMatch.group('name') or \
+ int(groupMatch.group('number'))
+ replacement = replacement[:groupMatch.start()] + \
+ match.group(groupID) + \
+ replacement[groupMatch.end():]
text = text[:match.start()] + replacement + text[match.end():]
# continue the search on the remaining text
@@ -193,6 +199,7 @@
The exact set of parts which should be removed can be passed as the
'parts' parameter, which defaults to all.
+
"""
regexes = {
'comments' : r'<!--.*?-->',
@@ -247,7 +254,9 @@
striploopcontinue = True
while firstinseparator > 0 and striploopcontinue:
striploopcontinue = False
- if (firstinseparator >= lenseparator) and (separator == text[firstinseparator-lenseparator:firstinseparator]):
+ if (firstinseparator >= lenseparator) and \
+ (separator == text[firstinseparator - \
+ lenseparator : firstinseparator]):
firstinseparator -= lenseparator
striploopcontinue = True
elif text[firstinseparator-1] < ' ':
@@ -266,7 +275,7 @@
# or change links to a different project, or any that are formatted
# as in-line interwiki links (e.g., "[[:es:Articulo]]". (CONFIRM)
-def getLanguageLinks(text, insite = None, pageLink = "[[]]", template_subpage=False):
+def getLanguageLinks(text, insite=None, pageLink="[[]]", template_subpage=False):
"""
Return a dict of interlanguage links found in text.
@@ -303,7 +312,7 @@
# we want the actual page objects rather than the titles
site = insite.getSite(code = lang)
try:
- result[site] = pywikibot.Page(site, pagetitle, insite = insite)
+ result[site] = pywikibot.Page(site, pagetitle, insite=insite)
except pywikibot.InvalidTitle:
pywikibot.output(
u"[getLanguageLinks] Text contains invalid interwiki link [[%s:%s]]."
@@ -336,7 +345,7 @@
return text.strip()
-def removeLanguageLinksAndSeparator(text, site = None, marker = '', separator = ''):
+def removeLanguageLinksAndSeparator(text, site=None, marker='', separator=''):
"""
Return text with all interlanguage links, plus any preceeding whitespace
and separateor occurrences removed.
@@ -356,8 +365,8 @@
return removeLanguageLinks(text, site, marker)
-def replaceLanguageLinks(oldtext, new, site = None, addOnly = False,
- template = False, template_subpage = False):
+def replaceLanguageLinks(oldtext, new, site=None, addOnly=False,
+ template=False, template_subpage=False):
"""Replace interlanguage links in the text with a new set of links.
'new' should be a dict with the Site objects as keys, and Page or Link
@@ -402,9 +411,12 @@
site) + separator + s
newtext = replaceCategoryLinks(s2, cats, site=site,
addOnly=True)
- elif site.family.name == 'wikitravel': # for Wikitravel's language links position.
+ # for Wikitravel's language links position.
+ # (not supported by rewrite - no API)
+ elif site.family.name == 'wikitravel':
s = separator + s + separator
- newtext = s2[:firstafter].replace(marker,'') + s + s2[firstafter:]
+ newtext = s2[:firstafter].replace(marker,'') + s + \
+ s2[firstafter:]
else:
if template or template_subpage:
if template_subpage:
@@ -423,7 +435,8 @@
newtext = regexp.sub(s + includeOff, s2)
else:
# Put the langlinks at the end, inside noinclude's
- newtext = s2.replace(marker,'').strip() + separator + u'%s\n%s%s\n' % (includeOn, s, includeOff)
+ newtext = s2.replace(marker,'').strip() + separator + \
+ u'%s\n%s%s\n' % (includeOn, s, includeOff)
else:
newtext = s2.replace(marker,'').strip() + separator + s
else:
@@ -492,7 +505,7 @@
# Functions dealing with category links
#---------------------------------------
-def getCategoryLinks(text, site = None):
+def getCategoryLinks(text, site=None):
import catlib
"""Return a list of category links found in text.
@@ -501,15 +514,15 @@
"""
result = []
-
if site is None:
site = pywikibot.getSite()
-
# Ignore category links within nowiki tags, pre tags, includeonly tags,
# and HTML comments
text = removeDisabledParts(text)
catNamespace = '|'.join(site.category_namespaces())
- R = re.compile(r'\[\[\s*(?P<namespace>%s)\s*:\s*(?P<catName>.+?)(?:\|(?P<sortKey>.+?))?\s*\]\]' % catNamespace, re.I)
+ R = re.compile(r'\[\[\s*(?P<namespace>%s)\s*:\s*(?P<catName>.+?)'
+ r'(?:\|(?P<sortKey>.+?))?\s*\]\]'
+ % catNamespace, re.I)
for match in R.finditer(text):
cat = catlib.Category(site,
'%s:%s' % (match.group('namespace'),
@@ -519,7 +532,7 @@
return result
-def removeCategoryLinks(text, site = None, marker = ''):
+def removeCategoryLinks(text, site=None, marker=''):
"""Return text with all category links removed.
Put the string marker after the last replacement (at the end of the text
@@ -530,10 +543,8 @@
# interwiki link, plus trailing whitespace. The language code is grouped.
# NOTE: This assumes that language codes only consist of non-capital
# ASCII letters and hyphens.
-
if site is None:
site = pywikibot.getSite()
-
catNamespace = '|'.join(site.category_namespaces())
categoryR = re.compile(r'\[\[\s*(%s)\s*:.*?\]\]\s*' % catNamespace, re.I)
text = replaceExcept(text, categoryR, '',
@@ -555,10 +566,8 @@
if there is no replacement).
"""
-
if site is None:
site = pywikibot.getSite()
-
if separator:
mymarker = findmarker(text, u'@C@')
newtext = removeCategoryLinks(text, site, mymarker)
@@ -620,7 +629,8 @@
The PyWikipediaBot is no longer allowed to touch categories on the German
Wikipedia on pages that contain the Personendaten template because of the
non-standard placement of that template.
-See http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/bis_2006…""")
+See http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/bis_2006…
+""")
separator = site.family.category_text_separator
iseparator = site.family.interwiki_text_separator
separatorstripped = separator.strip()
@@ -646,12 +656,16 @@
if "</noinclude>" in s2[firstafter:]:
if separatorstripped:
s = separator + s
- newtext = s2[:firstafter].replace(marker,'') + s + s2[firstafter:]
+ newtext = s2[:firstafter].replace(marker, '') + s + \
+ s2[firstafter:]
elif site.language() in site.family.categories_last:
newtext = s2.replace(marker,'').strip() + separator + s
else:
interwiki = getLanguageLinks(s2)
- s2 = removeLanguageLinksAndSeparator(s2.replace(marker,''), site, '', iseparatorstripped) + separator + s
+ s2 = removeLanguageLinksAndSeparator(s2.replace(marker, ''),
+ site, '',
+ iseparatorstripped
+ ) + separator + s
newtext = replaceLanguageLinks(s2, interwiki, site=site,
addOnly=True)
else:
@@ -671,7 +685,7 @@
return ''
if insite is None:
insite = pywikibot.getSite()
- catLinks = [category.aslink(noInterwiki = True) for category in categories]
+ catLinks = [category.aslink(noInterwiki=True) for category in categories]
if insite.category_on_one_line():
sep = ' '
else:
@@ -717,7 +731,7 @@
# Functions dealing with templates
#----------------------------------
-def extract_templates_and_params(text, get_redirect=False):
+def extract_templates_and_params(text):
"""Return list of template calls found in text.
Return value is a list of tuples. There is one tuple for each use of a
@@ -954,10 +968,11 @@
if code == 'ckb':
return ['ku', 'ar']
#Chinese
- if code in ['minnan', 'zh', 'zh-classical', 'zh-min-nan', 'zh-tw', 'zh-hans', 'zh-hant']:
+ if code in ['minnan', 'zh', 'zh-classical', 'zh-min-nan', 'zh-tw',
+ 'zh-hans', 'zh-hant']:
return ['zh', 'zh-tw', 'zh-cn', 'zh-classical']
- if code in ['cdo', 'gan', 'hak', 'ii', 'wuu', 'za', 'zh-cdo', 'zh-classical',
- 'zh-cn', 'zh-yue']:
+ if code in ['cdo', 'gan', 'hak', 'ii', 'wuu', 'za', 'zh-cdo',
+ 'zh-classical', 'zh-cn', 'zh-yue']:
return ['zh', 'zh-cn', 'zh-tw', 'zh-classical']
#Scandinavian languages
if code in ['da', 'sv']:
Revision: 8553
Author: xqt
Date: 2010-09-14 15:37:42 +0000 (Tue, 14 Sep 2010)
Log Message:
-----------
remove obsolete parameter in extract_templates_and_params; enable getLanguageLinks on template subpages (update from trunk)
Modified Paths:
--------------
branches/rewrite/pywikibot/textlib.py
Modified: branches/rewrite/pywikibot/textlib.py
===================================================================
--- branches/rewrite/pywikibot/textlib.py 2010-09-14 05:47:44 UTC (rev 8552)
+++ branches/rewrite/pywikibot/textlib.py 2010-09-14 15:37:42 UTC (rev 8553)
@@ -109,11 +109,14 @@
if exc in exceptionRegexes:
dontTouchRegexes.append(exceptionRegexes[exc])
else:
- # nowiki, noinclude, includeonly, timeline, math ond other extensions
- dontTouchRegexes.append(re.compile(r'(?is)<%s>.*?</%s>' % (exc, exc)))
+ # nowiki, noinclude, includeonly, timeline, math ond other
+ # extensions
+ dontTouchRegexes.append(re.compile(r'(?is)<%s>.*?</%s>'
+ % (exc, exc)))
# handle alias
if exc == 'source':
- dontTouchRegexes.append(re.compile(r'(?is)<syntaxhighlight .*?</syntaxhighlight>'))
+ dontTouchRegexes.append(re.compile(
+ r'(?is)<syntaxhighlight .*?</syntaxhighlight>'))
else:
# assume it's a regular expression
dontTouchRegexes.append(exc)
@@ -167,11 +170,11 @@
groupMatch = groupR.search(replacement)
if not groupMatch:
break
- groupID = (groupMatch.group('name')
- or int(groupMatch.group('number')))
- replacement = (replacement[:groupMatch.start()]
- + match.group(groupID)
- + replacement[groupMatch.end():])
+ groupID = groupMatch.group('name') or \
+ int(groupMatch.group('number'))
+ replacement = replacement[:groupMatch.start()] + \
+ match.group(groupID) + \
+ replacement[groupMatch.end():]
text = text[:match.start()] + replacement + text[match.end():]
# continue the search on the remaining text
@@ -196,6 +199,7 @@
The exact set of parts which should be removed can be passed as the
'parts' parameter, which defaults to all.
+
"""
regexes = {
'comments' : r'<!--.*?-->',
@@ -250,9 +254,9 @@
striploopcontinue = True
while firstinseparator > 0 and striploopcontinue:
striploopcontinue = False
- if ( (firstinseparator >= lenseparator) and
- (separator ==
- text[firstinseparator-lenseparator:firstinseparator])):
+ if (firstinseparator >= lenseparator) and \
+ (separator == text[firstinseparator - \
+ lenseparator : firstinseparator]):
firstinseparator -= lenseparator
striploopcontinue = True
elif text[firstinseparator-1] < ' ':
@@ -271,7 +275,7 @@
# or change links to a different project, or any that are formatted
# as in-line interwiki links (e.g., "[[:es:Articulo]]". (CONFIRM)
-def getLanguageLinks(text, insite = None, pageLink = "[[]]"):
+def getLanguageLinks(text, insite=None, pageLink="[[]]", template_subpage=False):
"""
Return a dict of interlanguage links found in text.
@@ -285,7 +289,10 @@
result = {}
# Ignore interwiki links within nowiki tags, includeonly tags, pre tags,
# and HTML comments
- text = removeDisabledParts(text)
+ tags = ['comments', 'nowiki', 'pre', 'source']
+ if not template_subpage:
+ tags += ['includeonly']
+ text = removeDisabledParts(text, tags)
# This regular expression will find every link that is possibly an
# interwiki link.
@@ -332,7 +339,7 @@
return text.strip()
-def removeLanguageLinksAndSeparator(text, site = None, marker = '', separator = ''):
+def removeLanguageLinksAndSeparator(text, site=None, marker='', separator=''):
"""
Return text with all interlanguage links, plus any preceeding whitespace
and separateor occurrences removed.
@@ -352,8 +359,8 @@
return removeLanguageLinks(text, site, marker)
-def replaceLanguageLinks(oldtext, new, site = None, addOnly = False,
- template = False):
+def replaceLanguageLinks(oldtext, new, site=None, addOnly=False,
+ template=False, template_subpage=False):
"""Replace interlanguage links in the text with a new set of links.
'new' should be a dict with the Site objects as keys, and Page or Link
@@ -400,17 +407,25 @@
newtext = replaceCategoryLinks(s2, cats, site=site,
addOnly=True)
else:
- if template:
+ if template or template_subpage:
+ if template_subpage:
+ includeOn = '<includeonly>'
+ includeOff = '</includeonly>'
+ else:
+ includeOn = '<noinclude>'
+ includeOff = '</noinclude>'
+ separator = ''
# Do we have a noinclude at the end of the template?
- parts = s2.split('</noinclude>')
+ parts = s2.split(includeOff)
lastpart = parts[-1]
if re.match('\s*%s' % marker, lastpart):
# Put the langlinks back into the noinclude's
- regexp = re.compile('</noinclude>\s*%s' % marker)
- newtext = regexp.sub(s + '</noinclude>', s2)
+ regexp = re.compile('%s\s*%s' % (includeOff, marker))
+ newtext = regexp.sub(s + includeOff, s2)
else:
# Put the langlinks at the end, inside noinclude's
- newtext = s2.replace(marker,'').strip() + separator + u'<noinclude>\n%s</noinclude>\n' % s
+ newtext = s2.replace(marker,'').strip() + separator + \
+ u'%s\n%s%s\n' % (includeOn, s, includeOff)
else:
newtext = s2.replace(marker,'').strip() + separator + s
else:
@@ -506,7 +521,7 @@
return result
-def removeCategoryLinks(text, site, marker = ''):
+def removeCategoryLinks(text, site, marker=''):
"""Return text with all category links removed.
Put the string marker after the last replacement (at the end of the text
@@ -599,7 +614,8 @@
The PyWikipediaBot is no longer allowed to touch categories on the German
Wikipedia on pages that contain the Personendaten template because of the
non-standard placement of that template.
-See http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/bis_2006…""")
+See http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/bis_2006…
+""")
separator = site.family.category_text_separator
iseparator = site.family.interwiki_text_separator
separatorstripped = separator.strip()
@@ -626,15 +642,16 @@
if "</noinclude>" in s2[firstafter:]:
if separatorstripped:
s = separator + s
- newtext = (s2[:firstafter].replace(marker,'') + s
- + s2[firstafter:])
+ newtext = s2[:firstafter].replace(marker, '') + s + \
+ s2[firstafter:]
elif site.language() in site.family.categories_last:
newtext = s2.replace(marker,'').strip() + separator + s
else:
interwiki = getLanguageLinks(s2)
- s2 = removeLanguageLinksAndSeparator(
- s2.replace(marker,''), site, '', iseparatorstripped
- ) + separator + s
+ s2 = removeLanguageLinksAndSeparator(s2.replace(marker, ''),
+ site, '',
+ iseparatorstripped
+ ) + separator + s
newtext = replaceLanguageLinks(s2, interwiki, site=site,
addOnly=True)
else:
@@ -700,7 +717,7 @@
# Functions dealing with templates
#----------------------------------
-def extract_templates_and_params(text, get_redirect=False):
+def extract_templates_and_params(text):
"""Return list of template calls found in text.
Return value is a list of tuples. There is one tuple for each use of a
@@ -937,10 +954,11 @@
if code == 'ckb':
return ['ku', 'ar']
#Chinese
- if code in ['minnan', 'zh', 'zh-classical', 'zh-min-nan', 'zh-tw', 'zh-hans', 'zh-hant']:
+ if code in ['minnan', 'zh', 'zh-classical', 'zh-min-nan', 'zh-tw',
+ 'zh-hans', 'zh-hant']:
return ['zh', 'zh-tw', 'zh-cn', 'zh-classical']
- if code in ['cdo', 'gan', 'hak', 'ii', 'wuu', 'za', 'zh-cdo', 'zh-classical',
- 'zh-cn', 'zh-yue']:
+ if code in ['cdo', 'gan', 'hak', 'ii', 'wuu', 'za', 'zh-cdo',
+ 'zh-classical', 'zh-cn', 'zh-yue']:
return ['zh', 'zh-cn', 'zh-tw', 'zh-classical']
#Scandinavian languages
if code in ['da', 'sv']: