Revision: 8239
Author: xqt
Date: 2010-06-03 05:30:45 +0000 (Thu, 03 Jun 2010)
Log Message:
-----------
solve_disambiguation.py: update from rewrite r7986, adds a command to tag the ambiguous
link with the Disambiguation Needed template (patch bug #2919950)
Modified Paths:
--------------
branches/rewrite/scripts/solve_disambiguation.py
trunk/pywikipedia/solve_disambiguation.py
Modified: branches/rewrite/scripts/solve_disambiguation.py
===================================================================
--- branches/rewrite/scripts/solve_disambiguation.py 2010-06-02 13:50:48 UTC (rev 8238)
+++ branches/rewrite/scripts/solve_disambiguation.py 2010-06-03 05:30:45 UTC (rev 8239)
@@ -86,8 +86,9 @@
# Application specific imports
import pywikibot
-from pywikibot import config, pagegenerators
from scripts import editarticle
+from pywikibot import pagegenerators
+from pywikibot import config
# Summary message when working on disambiguation pages
msg = {
@@ -501,8 +502,8 @@
self.ignorelist = []
filename = config.datafilepath(
- 'disambiguations',
- self.disambPage.title(as_filename=True) + '.txt')
+ 'disambiguations',
+ self.disambPage.title(as_filename=True) + '.txt')
try:
# The file is stored in the disambiguation/ subdir.
# Create if necessary.
@@ -525,8 +526,8 @@
if self.enabled:
# Skip this occurence next time.
filename = config.datafilepath(
- 'disambiguations',
- self.disambPage.title(asUrl=True) + '.txt')
+ 'disambiguations',
+ self.disambPage.title(asUrl=True) + '.txt')
try:
# Open file for appending. If none exists yet, create a new one.
f = codecs.open(filename, 'a', 'utf-8')
@@ -650,7 +651,7 @@
text=refPage.get(throttle=False)
ignoreReason = self.checkContents(text)
if ignoreReason:
- pywikibot.output('\n\nSkipping %s because it contains %s.\n'
+ pywikibot.output('\n\nSkipping %s because it contains %s.\n\n'
% (refPage.title(), ignoreReason))
else:
include = True
@@ -663,8 +664,8 @@
u'Do you want to make redirect %s point to %s?'
% (refPage.title(), target), ['yes', 'no'],
['y', 'N'], 'N')
if choice == 'y':
- redir_text = '#%s [[%s]]' % (
- self.mysite.redirect(default=True), target)
+ redir_text = '#%s [[%s]]' \
+ % (self.mysite.redirect(default=True), target)
try:
refPage.put_async(redir_text,comment=self.comment)
except pywikibot.PageNotSaved, error:
@@ -672,8 +673,8 @@
else:
choice = pywikibot.inputChoice(
u'Do you want to work on pages linking to %s?'
- % refPage.title(), ['yes', 'no', 'change
redirect'],
- ['y', 'N', 'c'],
'N')
+ % refPage.title(), ['yes', 'no', 'change
redirect'],
+ ['y', 'N', 'c'],
'N')
if choice == 'y':
gen = ReferringPageGeneratorWithIgnore(refPage,
self.primary)
@@ -718,7 +719,7 @@
# ignore interwiki links
if foundlink.site != disambPage.site:
continue
- # check whether the link found is to disambPage
+ # Check whether the link found is to disambPage.
try:
if foundlink.canonical_title() != disambPage.title():
continue
@@ -742,7 +743,7 @@
# Highlight the title in purple.
pywikibot.output(
u"\n\n>>> \03{lightpurple}%s\03{default}
<<<"
- % refPage.title())
+ % refPage.title())
if not self.always:
# at the beginning of the link, start red color.
@@ -756,12 +757,12 @@
if edited:
choice = pywikibot.input(
u"Option (#, r#, [s]kip link, [e]dit page, [n]ext page, [u]nlink, [q]uit,\n"
-u" ?=tag with " + dn_template_str + ",\n"
+u" [t]ag template " + dn_template_str + ",\n"
u" [m]ore context, [l]ist, [a]dd new, x=save in this form):")
else:
choice = pywikibot.input(
u"Option (#, r#, [s]kip link, [e]dit page, [n]ext page, [u]nlink, [q]uit,\n"
-u" ?=tag with " + dn_template_str + ",\n"
+u" [t]ag template " + dn_template_str + ",\n"
u" [m]ore context, show [d]isambiguation page, [l]ist, [a]dd new):")
else:
choice = self.always
@@ -789,7 +790,7 @@
disambigText = editor.edit(
disambPage.get(),
jumpIndex=m.start(),
- highlight = disambPage.title())
+ highlight=disambPage.title())
elif choice in ['l', 'L']:
self.listAlternatives()
elif choice in ['m', 'M']:
@@ -835,8 +836,8 @@
trailing_chars = m.group('linktrail')
if trailing_chars:
link_text += trailing_chars
-
- if choice in ['?', '/']:
+ # '?', '/' for old choice
+ if choice in ['t', 'T', '?', '/']:
#small chunk of text to search
search_text = text[m.end() : m.end() + context]
#figure out where the link (and sentance) ends, put note there
@@ -887,7 +888,7 @@
repPl = pywikibot.Page(pywikibot.Link(new_page_title,
disambPage.site))
if (new_page_title[0].isupper()
- or link_text[0].isupper()):
+ or link_text[0].isupper()):
new_page_title = repPl.title()
else:
new_page_title = repPl.title()
@@ -905,11 +906,11 @@
# check if we can create a link with trailing characters
# instead of a pipelink
elif len(new_page_title) <= len(link_text) \
- and firstcap(link_text[:len(new_page_title)]) \
- == firstcap(new_page_title) \
- and re.sub(self.trailR, '',
- link_text[len(new_page_title):]) == '' \
- and not section:
+ and firstcap(link_text[:len(new_page_title)]) \
+ == firstcap(new_page_title) \
+ and re.sub(self.trailR, '',
+ link_text[len(new_page_title):]) == '' \
+ and not section:
newlink = "[[%s]]%s" \
% (link_text[:len(new_page_title)],
link_text[len(new_page_title):])
@@ -939,26 +940,26 @@
def findAlternatives(self, disambPage):
if disambPage.isRedirectPage() and not self.primary:
if (disambPage.site.lang in self.primary_redir_template
- and self.primary_redir_template[disambPage.site.lang]
- in disambPage.templates(get_redirect = True)):
+ and self.primary_redir_template[disambPage.site.lang]
+ in disambPage.templates(get_redirect = True)):
baseTerm = disambPage.title()
for template in disambPage.templatesWithParams(
- get_redirect=True):
+ get_redirect=True):
if template[0] == self.primary_redir_template[
- disambPage.site.lang] \
- and len(template[1]) > 0:
+ disambPage.site.lang] \
+ and len(template[1]) > 0:
baseTerm = template[1][1]
disambTitle = primary_topic_format[self.mylang] % baseTerm
try:
disambPage2 = pywikibot.Page(
- pywikibot.Link(disambTitle, self.mysite))
+ pywikibot.Link(disambTitle, self.mysite))
links = disambPage2.linkedPages()
links = [correctcap(l,disambPage2.get()) for l in links]
except pywikibot.NoPage:
pywikibot.output(u"No page at %s, using redirect target."
% disambTitle)
links = disambPage.linkedPages()[:1]
- links = [correctcap(l, disambPage.get(get_redirect = True))
+ links = [correctcap(l, disambPage.get(get_redirect=True))
for l in links]
self.alternatives += links
else:
@@ -983,16 +984,16 @@
if self.primary:
try:
disambPage2 = pywikibot.Page(
- pywikibot.Link(
- primary_topic_format[self.mylang]
- % disambPage.title(),
- self.mysite))
+ pywikibot.Link(
+ primary_topic_format[self.mylang]
+ % disambPage.title(),
+ self.mysite))
links = disambPage2.linkedPages()
links = [correctcap(l, disambPage2.get())
for l in links]
except pywikibot.NoPage:
pywikibot.output(
- u"Page does not exist, using the first link in page %s."
+u"Page does not exist, using the first link in page %s."
% disambPage.title())
links = disambPage.linkedPages()[:1]
links = [correctcap(l, disambPage.get())
@@ -1000,7 +1001,7 @@
else:
try:
links = disambPage.linkedPages()
- links = [correctcap(l ,disambPage.get())
+ links = [correctcap(l, disambPage.get())
for l in links]
except pywikibot.NoPage:
pywikibot.output(u"Page does not exist, skipping.")
@@ -1025,44 +1026,44 @@
# first check whether user has customized the edit comment
if (self.mysite.family.name in config.disambiguation_comment
- and self.mylang in config.disambiguation_comment
+ and self.mylang in config.disambiguation_comment
[self.mysite.family.name]):
try:
self.comment = pywikibot.translate(
- self.mysite,
- config.disambiguation_comment
- [self.mysite.family.name]
- ) % (disambPage.title(), targets)
+ self.mysite,
+ config.disambiguation_comment[
+ self.mysite.family.name]
+ ) % (disambPage.title(), targets)
# Backwards compatibility, type error probably caused by too
# many arguments for format string
except TypeError:
self.comment = pywikibot.translate(
- self.mysite,
- config.disambiguation_comment
- [self.mysite.family.name]
- ) % disambPage.title()
+ self.mysite,
+ config.disambiguation_comment[
+ self.mysite.family.name]
+ ) % disambPage.title()
elif disambPage.isRedirectPage():
# when working on redirects, there's another summary message
if unlink and not new_targets:
self.comment = pywikibot.translate(self.mysite,
msg_redir_unlink) \
- % disambPage.title()
+ % disambPage.title()
elif dn and not new_targets:
self.comment = pywikibot.translate(self.mysite, msg_redir_dn) \
- % disambPage.title()
+ % disambPage.title()
else:
self.comment = pywikibot.translate(self.mysite, msg_redir) \
- % (disambPage.title(), targets)
+ % (disambPage.title(), targets)
else:
if unlink and not new_targets:
self.comment = pywikibot.translate(self.mysite, msg_unlink) \
- % disambPage.title()
+ % disambPage.title()
elif dn and not new_targets:
self.comment = pywikibot.translate(self.mysite, msg_dn) \
- % disambPage.title()
+ % disambPage.title()
else:
self.comment = pywikibot.translate(self.mysite, msg) \
- % (disambPage.title(), targets)
+ % (disambPage.title(), targets)
def run(self):
if self.main_only:
@@ -1133,10 +1134,10 @@
elif arg.startswith('-file'):
if len(arg) == 5:
generator = pagegenerators.TextfilePageGenerator(
- filename = None)
+ filename=None)
else:
generator = pagegenerators.TextfilePageGenerator(
- filename = arg[6:])
+ filename=arg[6:])
elif arg.startswith('-pos:'):
if arg[5]!=':':
mysite = pywikibot.getSite()
@@ -1167,7 +1168,7 @@
else:
generator = pagegenerators.CategorizedPageGenerator(
pywikibot.getSite().disambcategory(),
- start = arg[7:])
+ start=arg[7:])
generator = pagegenerators.NamespaceFilterPageGenerator(
generator, [0])
except pywikibot.NoPage:
Modified: trunk/pywikipedia/solve_disambiguation.py
===================================================================
--- trunk/pywikipedia/solve_disambiguation.py 2010-06-02 13:50:48 UTC (rev 8238)
+++ trunk/pywikipedia/solve_disambiguation.py 2010-06-03 05:30:45 UTC (rev 8239)
@@ -27,6 +27,9 @@
-just only use the alternatives given on the command line, do not
read the page for other possibilities
+ -dnskip Skip links already marked with a disambiguation-needed
+ template (e.g., {{dn}})
+
-primary "primary topic" disambiguation (Begriffsklärung nach Modell 2).
That's titles where one topic is much more important, the
disambiguation page is saved somewhere else, and the important
@@ -55,7 +58,7 @@
wiki that is defined (to the bot) as the category containing
disambiguation pages, starting at XY. If only '-start' or
'-start:' is given, it starts at the beginning.
-
+
-min:XX (XX being a number) only work on disambiguation pages for which
at least XX are to be worked on.
@@ -83,7 +86,8 @@
# Application specific imports
import wikipedia as pywikibot
-import pagegenerators, editarticle
+import editarticle
+import pagegenerators
# Summary message when working on disambiguation pages
msg = {
@@ -190,6 +194,21 @@
'uk': u'Виправлення посилання на багатозначність за допомогою бота: %s
вилучено',
}
+# Disambiguation Needed template
+dn_template = {
+ 'en' : u'{{dn}}',
+ }
+
+# Summary message when adding Disambiguation Needed template
+msg_dn = {
+ 'en' : u'Robot-assisted disambiguation: %s - Marked as needing expert
attention',
+ }
+
+# Summary message when adding Disambiguation Needed template to a redirect link
+msg_redir_dn = {
+ 'en' : u'Robot-assisted disambiguation: %s - Marked as needing expert
attention',
+ }
+
# Summary message to (unknown)
unknown_msg = {
'ar' : u'(غير معروف)',
@@ -426,7 +445,8 @@
return string[0].upper()+string[1:]
def correctcap(link, text):
- # If text links to a page with title link uncapitalized, uncapitalize link, otherwise
capitalize it
+ # If text links to a page with title link uncapitalized, uncapitalize link,
+ # otherwise capitalize it
linkupper = link.title()
linklower = linkupper[0].lower() + linkupper[1:]
if "[[%s]]"%linklower in text or "[[%s|"%linklower in text:
@@ -440,13 +460,18 @@
self.minimum = minimum
def __iter__(self):
- generator = pagegenerators.ReferringPageGenerator(self.disambPage,
followRedirects = False, withTemplateInclusion = False)
- generator = pagegenerators.PageTitleFilterPageGenerator(generator, ignore_title)
+ generator = pagegenerators.ReferringPageGenerator(
+ self.disambPage,
+ followRedirects=False,
+ withTemplateInclusion=False)
+ generator = pagegenerators.PageTitleFilterPageGenerator(generator,
+ ignore_title)
refs = [page for page in generator]
if len(refs) < self.minimum:
- pywikibot.output(u"Found only %d pages to work on; skipping." %
len(refs))
+ pywikibot.output(u"Found only %d pages to work on; skipping."
+ % len(refs))
return
pywikibot.output(u"Will work on %d pages." % len(refs))
for ref in refs:
@@ -463,8 +488,9 @@
self.enabled = enabled
self.ignorelist = []
- filename = pywikibot.config.datafilepath('disambiguations',
- self.disambPage.titleForFilename() + '.txt')
+ filename = pywikibot.config.datafilepath(
+ 'disambiguations',
+ self.disambPage.titleForFilename() + '.txt')
try:
# The file is stored in the disambiguation/ subdir.
# Create if necessary.
@@ -486,11 +512,11 @@
def ignore(self, refPage):
if self.enabled:
# Skip this occurence next time.
- filename = pywikibot.config.datafilepath('disambiguations',
- self.disambPage.urlname() + '.txt')
+ filename = pywikibot.config.datafilepath(
+ 'disambiguations',
+ self.disambPage.urlname() + '.txt')
try:
# Open file for appending. If none exists yet, create a new one.
- # The file is stored in the disambiguation/ subdir. Create if necessary.
f = codecs.open(filename, 'a', 'utf-8')
f.write(refPage.urlname() + '\n')
f.close()
@@ -515,16 +541,18 @@
u'{{[Pp]rocessing}}',
),
}
-
+
primary_redir_template = {
# Page.templates() format, first letter uppercase
'hu': u'Egyért-redir',
}
-
- def __init__(self, always, alternatives, getAlternatives, generator, primary,
main_only, minimum = 0):
+
+ def __init__(self, always, alternatives, getAlternatives, dnSkip, generator,
+ primary, main_only, minimum = 0):
self.always = always
self.alternatives = alternatives
self.getAlternatives = getAlternatives
+ self.dnSkip = dnSkip
self.generator = generator
self.primary = primary
self.main_only = main_only
@@ -572,11 +600,16 @@
linktrail = self.mysite.linktrail()
self.trailR = re.compile(linktrail)
- # The regular expression which finds links. Results consist of four groups:
- # group title is the target page title, that is, everything before | or ].
- # group section is the page section. It'll include the # to make life easier
for us.
- # group label is the alternative link title, that's everything between | and
].
- # group linktrail is the link trail, that's letters after ]] which are part
of the word.
+ # The regular expression which finds links. Results consist of four
+ # groups:
+ # group title is the target page title, that is, everything before
+ # | or ].
+ # group section is the page section. It'll include the # to make life
+ # easier for us.
+ # group label is the alternative link title, that's everything
+ # between | and ].
+ # group linktrail is the link trail, that's letters after ]] which
+ # are part of the word.
# note that the definition of 'letter' varies from language to language.
self.linkR =
re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>'
+ linktrail + ')')
@@ -588,10 +621,11 @@
refPage - A page linking to disambPage
Returns False if the user pressed q to completely quit the program.
Otherwise, returns True.
-
+
"""
# TODO: break this function up into subroutines!
+ dn_template_str = pywikibot.translate(self.mysite, dn_template)
include = False
unlink = False
new_targets = []
@@ -599,24 +633,33 @@
text=refPage.get(throttle=False)
ignoreReason = self.checkContents(text)
if ignoreReason:
- pywikibot.output('\n\nSkipping %s because it contains %s.\n\n' %
(refPage.title(), ignoreReason))
+ pywikibot.output('\n\nSkipping %s because it contains %s.\n\n'
+ % (refPage.title(), ignoreReason))
else:
include = True
except pywikibot.IsRedirectPage:
- pywikibot.output(u'%s is a redirect to %s' % (refPage.title(),
disambPage.title()))
+ pywikibot.output(u'%s is a redirect to %s'
+ % (refPage.title(), disambPage.title()))
if disambPage.isRedirectPage():
target = self.alternatives[0]
- choice = pywikibot.inputChoice(u'Do you want to make redirect %s
point to %s?' % (refPage.title(), target), ['yes', 'no'],
['y', 'N'], 'N')
+ choice = pywikibot.inputChoice(
+ u'Do you want to make redirect %s point to %s?'
+ % (refPage.title(), target), ['yes', 'no'],
['y', 'N'], 'N')
if choice == 'y':
- redir_text = '#%s [[%s]]' %
(self.mysite.redirect(default=True), target)
+ redir_text = '#%s [[%s]]' \
+ % (self.mysite.redirect(default=True), target)
try:
refPage.put_async(redir_text,comment=self.comment)
except pywikibot.PageNotSaved, error:
pywikibot.output(u'Page not saved: %s' % error.args)
else:
- choice = pywikibot.inputChoice(u'Do you want to work on pages linking
to %s?' % refPage.title(), ['yes', 'no', 'change redirect'],
['y', 'N', 'c'], 'N')
+ choice = pywikibot.inputChoice(
+ u'Do you want to work on pages linking to %s?'
+ % refPage.title(), ['yes', 'no', 'change
redirect'],
+ ['y', 'N', 'c'],
'N')
if choice == 'y':
- gen = ReferringPageGeneratorWithIgnore(refPage, self.primary)
+ gen = ReferringPageGeneratorWithIgnore(refPage,
+ self.primary)
preloadingGen = pagegenerators.PreloadingGenerator(gen)
for refPage2 in preloadingGen:
# run until the user selected 'quit'
@@ -626,20 +669,24 @@
text=refPage.get(throttle=False,get_redirect=True)
include = "redirect"
except pywikibot.NoPage:
- pywikibot.output(u'Page [[%s]] does not seem to exist?! Skipping.' %
refPage.title())
+ pywikibot.output(
+ u'Page [[%s]] does not seem to exist?! Skipping.'
+ % refPage.title())
include = False
if include in (True, "redirect"):
# make a backup of the original text so we can show the changes later
original_text = text
n = 0
curpos = 0
+ dn = False
edited = False
# This loop will run until we have finished the current page
while True:
m = self.linkR.search(text, pos = curpos)
if not m:
if n == 0:
- pywikibot.output(u"No changes necessary in %s" %
refPage.title())
+ pywikibot.output(u"No changes necessary in %s"
+ % refPage.title())
return True
else:
# stop loop and save page
@@ -661,23 +708,39 @@
n += 1
# how many bytes should be displayed around the current link
context = 60
+ #there's a {{dn}} here already
+ already_dn = text[m.end() : m.end() + 8].find(dn_template_str[:4]) >
-1
+ if already_dn and self.dnSkip:
+ continue
+
# This loop will run while the user doesn't choose an option
# that will actually change the page
while True:
# Show the title of the page where the link was found.
# Highlight the title in purple.
- pywikibot.output(u"\n\n>>>
\03{lightpurple}%s\03{default} <<<" % refPage.title())
+ pywikibot.output(
+ u"\n\n>>> \03{lightpurple}%s\03{default}
<<<"
+ % refPage.title())
if not self.always:
# at the beginning of the link, start red color.
# at the end of the link, reset the color to default
- pywikibot.output(text[max(0, m.start() - context) : m.start()] +
'\03{lightred}' + text[m.start() : m.end()] + '\03{default}' +
text[m.end() : m.end() + context])
+ pywikibot.output(text[max(0, m.start() - context)
+ : m.start()]
+ + '\03{lightred}'
+ + text[m.start() : m.end()]
+ + '\03{default}'
+ + text[m.end() : m.end() + context])
if edited:
- choice = pywikibot.input(u"Option (#, r#, [s]kip link,
[e]dit page, [n]ext page, [u]nlink, [q]uit\n"
- " [m]ore context, [l]ist,
[a]dd new, x=save in this form):")
+ choice = pywikibot.input(
+u"Option (#, r#, [s]kip link, [e]dit page, [n]ext page, [u]nlink, [q]uit,\n"
+u" [t]ag template " + dn_template_str + ",\n"
+u" [m]ore context, [l]ist, [a]dd new, x=save in this form):")
else:
- choice = pywikibot.input(u"Option (#, r#, [s]kip link,
[e]dit page, [n]ext page, [u]nlink, [q]uit\n"
- " [m]ore context, show
[d]isambiguation page, [l]ist, [a]dd new):")
+ choice = pywikibot.input(
+u"Option (#, r#, [s]kip link, [e]dit page, [n]ext page, [u]nlink, [q]uit,\n"
+u" [t]ag template " + dn_template_str + ",\n"
+u" [m]ore context, show [d]isambiguation page, [l]ist, [a]dd new):")
else:
choice = self.always
if choice in ['a', 'A']:
@@ -686,7 +749,8 @@
self.listAlternatives()
elif choice in ['e', 'E']:
editor = editarticle.TextEditor()
- newText = editor.edit(text, jumpIndex = m.start(), highlight =
disambPage.title())
+ newText = editor.edit(text, jumpIndex=m.start(),
+ highlight=disambPage.title())
# if user didn't press Cancel
if newText and newText != text:
text = newText
@@ -695,9 +759,15 @@
editor = editarticle.TextEditor()
if disambPage.isRedirectPage():
disambredir = disambPage.getRedirectTarget()
- disambigText = editor.edit(disambredir.get(), jumpIndex =
m.start(), highlight = disambredir.title())
+ disambigText = editor.edit(
+ disambredir.get(),
+ jumpIndex=m.start(),
+ highlight=disambredir.title())
else:
- disambigText = editor.edit(disambPage.get(), jumpIndex =
m.start(), highlight = disambPage.title())
+ disambigText = editor.edit(
+ disambPage.get(),
+ jumpIndex=m.start(),
+ highlight=disambPage.title())
elif choice in ['l', 'L']:
self.listAlternatives()
elif choice in ['m', 'M']:
@@ -743,8 +813,22 @@
trailing_chars = m.group('linktrail')
if trailing_chars:
link_text += trailing_chars
-
- if choice in ['u', 'U']:
+ # '?', '/' for old choice
+ if choice in ['t', 'T', '?', '/']:
+ #small chunk of text to search
+ search_text = text[m.end() : m.end() + context]
+ #figure out where the link (and sentance) ends, put note there
+ end_of_word_match = re.search("\s", search_text)
+ if end_of_word_match:
+ position_split = end_of_word_match.start(0)
+ else:
+ position_split = 0
+ #insert dab needed template
+ text = text[:m.end() + position_split] + dn_template_str \
+ + text[m.end() + position_split:]
+ dn = True
+ continue
+ elif choice in ['u', 'U']:
# unlink - we remove the section if there's any
text = text[:m.start()] + link_text + text[m.end():]
unlink = True
@@ -768,7 +852,9 @@
curpos -= 1
continue
if choice >= len(self.alternatives) or choice < 0:
- pywikibot.output(u"Choice out of range. Please select a
number between 0 and %i." % (len(self.alternatives) - 1))
+ pywikibot.output(
+u"Choice out of range. Please select a number between 0 and %i."
+ % (len(self.alternatives) - 1))
# show list of possible choices
self.listAlternatives()
# step back to ask the user again what to do with the
@@ -777,22 +863,36 @@
continue
new_page_title = self.alternatives[choice]
repPl = pywikibot.Page(disambPage.site(), new_page_title)
- if (new_page_title[0].isupper()) or (link_text[0].isupper()):
+ if (new_page_title[0].isupper()
+ or link_text[0].isupper()):
new_page_title = repPl.title()
else:
new_page_title = repPl.title()
- new_page_title = new_page_title[0].lower() + new_page_title[1:]
+ new_page_title = new_page_title[0].lower() \
+ + new_page_title[1:]
if new_page_title not in new_targets:
new_targets.append(new_page_title)
if replaceit and trailing_chars:
- newlink = "[[%s%s]]%s" % (new_page_title, section,
trailing_chars)
- elif replaceit or (new_page_title == link_text and not section):
+ newlink = "[[%s%s]]%s" % (new_page_title,
+ section,
+ trailing_chars)
+ elif replaceit or (new_page_title == link_text
+ and not section):
newlink = "[[%s]]" % new_page_title
- # check if we can create a link with trailing characters instead of a
pipelink
- elif len(new_page_title) <= len(link_text) and
firstcap(link_text[:len(new_page_title)]) == firstcap(new_page_title) and
re.sub(self.trailR, '', link_text[len(new_page_title):]) == '' and not
section:
- newlink = "[[%s]]%s" %
(link_text[:len(new_page_title)], link_text[len(new_page_title):])
+ # check if we can create a link with trailing characters
+ # instead of a pipelink
+ elif len(new_page_title) <= len(link_text) \
+ and firstcap(link_text[:len(new_page_title)]) \
+ == firstcap(new_page_title) \
+ and re.sub(self.trailR, '',
+ link_text[len(new_page_title):]) == '' \
+ and not section:
+ newlink = "[[%s]]%s" \
+ % (link_text[:len(new_page_title)],
+ link_text[len(new_page_title):])
else:
- newlink = "[[%s%s|%s]]" % (new_page_title, section,
link_text)
+ newlink = "[[%s%s|%s]]" \
+ % (new_page_title, section, link_text)
text = text[:m.start()] + newlink + text[m.end():]
continue
@@ -804,7 +904,7 @@
pywikibot.showDiff(original_text, text)
pywikibot.output(u'')
# save the page
- self.setSummaryMessage(disambPage, new_targets, unlink)
+ self.setSummaryMessage(disambPage, new_targets, unlink, dn)
try:
refPage.put_async(text,comment=self.comment)
except pywikibot.LockedPage:
@@ -815,20 +915,28 @@
def findAlternatives(self, disambPage):
if disambPage.isRedirectPage() and not self.primary:
- if disambPage.site().lang in self.primary_redir_template and
self.primary_redir_template[disambPage.site().lang] in disambPage.templates(get_redirect =
True):
+ if (disambPage.site().lang in self.primary_redir_template
+ and self.primary_redir_template[disambPage.site().lang]
+ in disambPage.templates(get_redirect = True)):
baseTerm = disambPage.title()
- for template in disambPage.templatesWithParams(get_redirect = True):
- if template[0] == self.primary_redir_template[disambPage.site().lang]
and len(template[1]) > 0:
+ for template in disambPage.templatesWithParams(
+ get_redirect=True):
+ if template[0] == self.primary_redir_template[
+ disambPage.site().lang] \
+ and len(template[1]) > 0:
baseTerm = template[1][1]
disambTitle = primary_topic_format[self.mylang] % baseTerm
try:
- disambPage2 = pywikibot.Page(self.mysite, disambTitle)
+ disambPage2 = pywikibot.Page(
+ self.mysite, disambTitle)
links = disambPage2.linkedPages()
links = [correctcap(l,disambPage2.get()) for l in links]
except pywikibot.NoPage:
- pywikibot.output(u"No page at %s, using redirect target." %
disambTitle)
+ pywikibot.output(u"No page at %s, using redirect target."
+ % disambTitle)
links = disambPage.linkedPages()[:1]
- links = [correctcap(l,disambPage.get(get_redirect = True)) for l in
links]
+ links = [correctcap(l, disambPage.get(get_redirect=True))
+ for l in links]
self.alternatives += links
else:
try:
@@ -851,20 +959,25 @@
try:
if self.primary:
try:
- disambPage2 = pywikibot.Page(self.mysite,
- primary_topic_format[self.mylang]
- % disambPage.title()
- )
+ disambPage2 = pywikibot.Page(
+ self.mysite,
+ primary_topic_format[self.mylang]
+ % disambPage.title())
links = disambPage2.linkedPages()
- links = [correctcap(l,disambPage2.get()) for l in links]
+ links = [correctcap(l, disambPage2.get())
+ for l in links]
except pywikibot.NoPage:
- pywikibot.output(u"Page does not exist, using the first link
in page %s." % disambPage.title())
+ pywikibot.output(
+u"Page does not exist, using the first link in page %s."
+ % disambPage.title())
links = disambPage.linkedPages()[:1]
- links = [correctcap(l,disambPage.get()) for l in links]
+ links = [correctcap(l, disambPage.get())
+ for l in links]
else:
try:
links = disambPage.linkedPages()
- links = [correctcap(l,disambPage.get()) for l in links]
+ links = [correctcap(l, disambPage.get())
+ for l in links]
except pywikibot.NoPage:
pywikibot.output(u"Page does not exist, skipping.")
return False
@@ -874,7 +987,8 @@
self.alternatives += links
return True
- def setSummaryMessage(self, disambPage, new_targets = [], unlink = False):
+ def setSummaryMessage(self, disambPage, new_targets=[], unlink=False,
+ dn=False):
# make list of new targets
targets = ''
for page_title in new_targets:
@@ -886,29 +1000,45 @@
targets = pywikibot.translate(self.mysite, unknown_msg)
# first check whether user has customized the edit comment
- if self.mysite.family.name in pywikibot.config.disambiguation_comment and
self.mylang in pywikibot.config.disambiguation_comment[self.mysite.family.name]:
+ if (self.mysite.family.name in pywikibot.config.disambiguation_comment
+ and self.mylang in pywikibot.config.disambiguation_comment
+ [self.mysite.family.name]):
try:
- self.comment = pywikibot.translate(self.mysite,
- pywikibot.config.disambiguation_comment[
- self.mysite.family.name]
- ) % (disambPage.title(), targets)
- #Backwards compatibility, type error probably caused by too many arguments
for format string
+ self.comment = pywikibot.translate(
+ self.mysite,
+ pywikibot.config.disambiguation_comment[
+ self.mysite.family.name]
+ ) % (disambPage.title(), targets)
+ # Backwards compatibility, type error probably caused by too
+ # many arguments for format string
except TypeError:
- self.comment = pywikibot.translate(self.mysite,
- pywikibot.config.disambiguation_comment[
- self.mysite.family.name]
- ) % disambPage.title()
+ self.comment = pywikibot.translate(
+ self.mysite,
+ pywikibot.config.disambiguation_comment[
+ self.mysite.family.name]
+ ) % disambPage.title()
elif disambPage.isRedirectPage():
# when working on redirects, there's another summary message
if unlink and not new_targets:
- self.comment = pywikibot.translate(self.mysite, msg_redir_unlink) %
disambPage.title()
+ self.comment = pywikibot.translate(self.mysite,
+ msg_redir_unlink) \
+ % disambPage.title()
+ elif dn and not new_targets:
+ self.comment = pywikibot.translate(self.mysite, msg_redir_dn) \
+ % disambPage.title()
else:
- self.comment = pywikibot.translate(self.mysite, msg_redir) %
(disambPage.title(), targets)
+ self.comment = pywikibot.translate(self.mysite, msg_redir) \
+ % (disambPage.title(), targets)
else:
if unlink and not new_targets:
- self.comment = pywikibot.translate(self.mysite, msg_unlink) %
disambPage.title()
+ self.comment = pywikibot.translate(self.mysite, msg_unlink) \
+ % disambPage.title()
+ elif dn and not new_targets:
+ self.comment = pywikibot.translate(self.mysite, msg_dn) \
+ % disambPage.title()
else:
- self.comment = pywikibot.translate(self.mysite, msg) %
(disambPage.title(), targets)
+ self.comment = pywikibot.translate(self.mysite, msg) \
+ % (disambPage.title(), targets)
def run(self):
if self.main_only:
@@ -920,7 +1050,8 @@
u'%s:' % namespace for namespace in self.mysite.namespaces()]
for disambPage in self.generator:
- self.primaryIgnoreManager = PrimaryIgnoreManager(disambPage,
enabled=self.primary)
+ self.primaryIgnoreManager = PrimaryIgnoreManager(
+ disambPage, enabled=self.primary)
if not self.findAlternatives(disambPage):
continue
@@ -933,7 +1064,8 @@
self.alternatives.sort()
self.listAlternatives()
- gen = ReferringPageGeneratorWithIgnore(disambPage, self.primary, minimum =
self.minimum)
+ gen = ReferringPageGeneratorWithIgnore(disambPage, self.primary,
+ minimum = self.minimum)
preloadingGen = pagegenerators.PreloadingGenerator(gen)
for refPage in preloadingGen:
if not self.primaryIgnoreManager.isIgnored(refPage):
@@ -944,12 +1076,13 @@
# clear alternatives before working on next disambiguation page
self.alternatives = []
-def main():
+def main(*args):
# the option that's always selected when the bot wonders what to do with
# a link. If it's None, the user is prompted (default behaviour).
always = None
alternatives = []
getAlternatives = True
+ dnSkip = False
# if the -file argument is used, page titles are dumped in this array.
# otherwise it will only contain one page.
generator = None
@@ -963,7 +1096,7 @@
ignoreCase = False
minimum = 0
- for arg in pywikibot.handleArgs():
+ for arg in pywikibot.handleArgs(*args):
if arg.startswith('-primary:'):
primary = True
getAlternatives = False
@@ -974,9 +1107,11 @@
always = arg[8:]
elif arg.startswith('-file'):
if len(arg) == 5:
- generator = pagegenerators.TextfilePageGenerator(filename = None)
+ generator = pagegenerators.TextfilePageGenerator(
+ filename=None)
else:
- generator = pagegenerators.TextfilePageGenerator(filename = arg[6:])
+ generator = pagegenerators.TextfilePageGenerator(
+ filename=arg[6:])
elif arg.startswith('-pos:'):
if arg[5]!=':':
mysite = pywikibot.getSite()
@@ -984,7 +1119,8 @@
if page.exists():
alternatives.append(page.title())
else:
- answer = pywikibot.inputChoice(u'Possibility %s does not actually
exist. Use it anyway?'
+ answer = pywikibot.inputChoice(
+ u'Possibility %s does not actually exist. Use it anyway?'
% page.title(), ['yes', 'no'], ['y',
'N'], 'N')
if answer == 'y':
alternatives.append(page.title())
@@ -992,6 +1128,8 @@
alternatives.append(arg[5:])
elif arg == '-just':
getAlternatives = False
+ elif arg == '-dnskip':
+ dnSkip = True
elif arg == '-main':
main_only = True
elif arg.startswith('-min:'):
@@ -999,10 +1137,14 @@
elif arg.startswith('-start'):
try:
if len(arg) <= len('-start:'):
- generator =
pagegenerators.CategorizedPageGenerator(pywikibot.getSite().disambcategory())
+ generator = pagegenerators.CategorizedPageGenerator(
+ pywikibot.getSite().disambcategory())
else:
- generator =
pagegenerators.CategorizedPageGenerator(pywikibot.getSite().disambcategory(), start =
arg[7:])
- generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0])
+ generator = pagegenerators.CategorizedPageGenerator(
+ pywikibot.getSite().disambcategory(),
+ start=arg[7:])
+ generator = pagegenerators.NamespaceFilterPageGenerator(
+ generator, [0])
except pywikibot.NoPage:
print "Disambiguation category for your wiki is not known."
raise
@@ -1020,18 +1162,20 @@
page = pywikibot.Page(pywikibot.getSite(), pageTitle)
generator = iter([page])
- # if no disambiguation pages was given as an argument, and none was
+ # if no disambiguation page was given as an argument, and none was
# read from a file, query the user
if not generator:
- pageTitle = pywikibot.input(u'On which disambiguation page do you want to
work?')
+ pageTitle = pywikibot.input(
+ u'On which disambiguation page do you want to work?')
page = pywikibot.Page(pywikibot.getSite(), pageTitle)
generator = iter([page])
- bot = DisambiguationRobot(always, alternatives, getAlternatives, generator, primary,
main_only, minimum = minimum)
+ bot = DisambiguationRobot(always, alternatives, getAlternatives, dnSkip,
+ generator, primary, main_only,
+ minimum=minimum)
bot.run()
-
if __name__ == "__main__":
try:
main()