http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11483
Revision: 11483
Author: xqt
Date: 2013-04-28 17:04:21 +0000 (Sun, 28 Apr 2013)
Log Message:
-----------
If we have digits between brackets like {{foo}}1{{bar}}, restoring from dict may fail. We
need to change the index. Follow up for r11406, r11408.
Modified Paths:
--------------
trunk/pywikipedia/pywikibot/textlib.py
Modified: trunk/pywikipedia/pywikibot/textlib.py
===================================================================
--- trunk/pywikipedia/pywikibot/textlib.py 2013-04-28 13:23:57 UTC (rev 11482)
+++ trunk/pywikipedia/pywikibot/textlib.py 2013-04-28 17:04:21 UTC (rev 11483)
@@ -133,30 +133,35 @@
Rmarker2 = re.compile('%(mark)s(\d+)%(mark)s' % {'mark':
marker2})
# hide the flat template marker
dontTouchRegexes.append(Rmarker1)
+ origin = text
values = {}
count = 0
for m in Rvalue.finditer(text):
count += 1
+ # If we have digits between brackets, restoring from dict may fail.
+ # So we need to change the index. We have to search in the origin.
+ while u'}}}%d{{{' % count in origin:
+ count += 1
item = m.group()
text = text.replace(item, '%s%d%s' % (marker2, count, marker2))
values[count] = item
inside = {}
seen = set()
+ count = 0
while TEMP_REGEX.search(text) is not None:
for m in TEMP_REGEX.finditer(text):
item = m.group()
if item in seen:
continue # speed up
seen.add(item)
- count = len(seen)
+ count += 1
+ while u'}}%d{{' % count in origin:
+ count += 1
text = text.replace(item, '%s%d%s' % (marker1, count, marker1))
# Make sure stored templates don't contain markers
- # We replace the last item first, otherwise inside templates
- # like {{A{{B}}{{C}}1{{D}}}} could fail
- for i in range(count - 1, 0, -1):
- item = item.replace('%s%d%s' % (marker1, i, marker1),
- inside[i])
+ for m2 in Rmarker1.finditer(item):
+ item = item.replace(m2.group(), inside[int(m2.group(1))])
for m2 in Rmarker2.finditer(item):
item = item.replace(m2.group(), values[int(m2.group(1))])
inside[count] = item
@@ -899,7 +904,7 @@
thistxt = removeDisabledParts(text)
# marker for inside templates or parameters
- marker = findmarker(thistxt)
+ marker1 = findmarker(thistxt)
# marker for links
marker2 = findmarker(thistxt, u'##', u'#')
@@ -913,7 +918,7 @@
result = []
Rmath = re.compile(ur'<math>[^<]+</math>')
Rvalue = re.compile(r'{{{.+?}}}')
- Rmarker = re.compile(ur'%s(\d+)%s' % (marker, marker))
+ Rmarker1 = re.compile(ur'%s(\d+)%s' % (marker1, marker1))
Rmarker2 = re.compile(ur'%s(\d+)%s' % (marker2, marker2))
Rmarker3 = re.compile(ur'%s(\d+)%s' % (marker3, marker3))
Rmarker4 = re.compile(ur'%s(\d+)%s' % (marker4, marker4))
@@ -931,12 +936,17 @@
count = 0
for m in Rvalue.finditer(thistxt):
count += 1
+ # If we have digits between brackets, restoring from dict may fail.
+ # So we need to change the index. We have to search in the origin text.
+ while u'}}}%d{{{' % count in text:
+ count += 1
item = m.group()
thistxt = thistxt.replace(item, '%s%d%s' % (marker4, count, marker4))
values[count] = item
inside = {}
seen = set()
+ count = 0
while TEMP_REGEX.search(thistxt) is not None:
for m in TEMP_REGEX.finditer(thistxt):
# Make sure it is not detected again
@@ -944,14 +954,15 @@
if item in seen:
continue # speed up
seen.add(item)
- count = len(seen)
- thistxt = thistxt.replace(item, '%s%d%s' % (marker, count, marker))
+ count += 1
+ while u'}}%d{{' % count in text:
+ count += 1
+ thistxt = thistxt.replace(item,
+ '%s%d%s' % (marker1, count, marker1))
+
# Make sure stored templates don't contain markers
- # We replace the last item first, otherwise inside templates
- # like {{A|{{B}}{{C}}1{{D}}}} could fail
- for i in range(count - 1, 0, -1):
- item = item.replace('%s%d%s' % (marker, count, marker),
- inside[i])
+ for m2 in Rmarker1.finditer(item):
+ item = item.replace(m2.group(), inside[int(m2.group(1))])
for m2 in Rmarker3.finditer(item):
item = item.replace(m2.group(), maths[int(m2.group(1))])
for m2 in Rmarker4.finditer(item):
@@ -960,7 +971,7 @@
# Name
name = m.group('name').strip()
- m2 = Rmarker.search(name) or Rmath.search(name)
+ m2 = Rmarker1.search(name) or Rmath.search(name)
if m2 is not None:
# Doesn't detect templates whose name changes,
# or templates whose name contains math tags
@@ -1019,10 +1030,9 @@
param_val = param
numbered_param += 1
count = len(inside)
- for i in range(count - 1, 0, -1):
- param_val = param_val.replace('%s%d%s'
- % (marker, i, marker),
- inside[i])
+ for m2 in Rmarker1.finditer(param_val):
+ param_val = param_val.replace(m2.group(),
+ inside[int(m2.group(1))])
for m2 in Rmarker2.finditer(param_val):
param_val = param_val.replace(m2.group(),
links[int(m2.group(1))])