http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9883
Revision: 9883
Author: xqt
Date: 2012-02-11 14:03:09 +0000 (Sat, 11 Feb 2012)
Log Message:
-----------
i18n support for category.py, follow up for r9876
Modified Paths:
--------------
trunk/pywikipedia/category.py
Modified: trunk/pywikipedia/category.py
===================================================================
--- trunk/pywikipedia/category.py 2012-02-11 14:01:32 UTC (rev 9882)
+++ trunk/pywikipedia/category.py 2012-02-11 14:03:09 UTC (rev 9883)
@@ -377,12 +377,6 @@
class CategoryMoveRobot:
"""Robot to move pages from one category to another."""
- #Section title and edit summary for keeping page history:
- historySectionTitle = {
- 'en':u'Page history of former %s',
- 'hu':u'A megszűnt %s laptörténete',
- }
-
def __init__(self, oldCatTitle, newCatTitle, batchMode=False,
editSummary='', inPlace=False, moveCatPage=True,
deleteEmptySourceCat=True, titleRegex=None,
@@ -413,11 +407,6 @@
reason = i18n.twtranslate(site, deletion_reason_move) \
% {'newcat': self.newCatTitle, 'title': self.newCatTitle}
- # Set the section title for the old cat's history on the new cat's
- # talk page.
- sectionTitle = pywikibot.translate(site,
- self.historySectionTitle) % self.oldCat.title()
-
# Copy the category contents to the new category page
copied = False
oldMovedTalk = None
@@ -443,6 +432,10 @@
#Whether or not there was an old talk page, we write
#the page history to the new talk page
history = self.oldCat.getVersionHistoryTable()
+ # Set the section title for the old cat's history on the new cat's
+ # talk page.
+ sectionTitle = i18n.twtranslate(site, 'category-section-title',
+ self.oldCat.title()
#Should be OK, we are within if self.oldCat.exists()
historySection = u'\n== %s ==\n%s' % (sectionTitle, history)
try:
@@ -450,7 +443,9 @@
except pywikibot.NoPage:
text = historySection
try:
- newCat.toggleTalkPage().put(text,sectionTitle)
+ newCat.toggleTalkPage().put(
+ text, i18n.twtranslate(site, 'category-version-history',
+ self.oldCat.title())
except:
pywikibot.output(
'History of the category has not been saved to new talk page')
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9882
Revision: 9882
Author: xqt
Date: 2012-02-11 14:01:32 +0000 (Sat, 11 Feb 2012)
Log Message:
-----------
i18n support for category.py, follow up for r9876
Modified Paths:
--------------
branches/rewrite/scripts/i18n/category.py
Modified: branches/rewrite/scripts/i18n/category.py
===================================================================
--- branches/rewrite/scripts/i18n/category.py 2012-02-11 13:16:03 UTC (rev 9881)
+++ branches/rewrite/scripts/i18n/category.py 2012-02-11 14:01:32 UTC (rev 9882)
@@ -276,8 +276,10 @@
'category-listifying': u'Bot: Liste aus %(fromcat)s ({{PLURAL:num|1 Eintrag|%(num)d Einträge}})',
'category-removing': u'Bot: Entferne aus %(oldcat)s',
'category-replacing': u'Bot: Ersetze Kategorie %(oldcat)s durch %(newcat)s',
+ 'category-section-title': u'Versionsgeschichte of ursprünglichen %(oldcat)s',
'category-was-disbanded': u'Bot: Kategorie wurde aufgelöst',
'category-was-moved': u'Bot: Kategorie wurde nach [[:Kategorie:%(newcat)s|%(title)s]] verschoben',
+ 'category-version-history': u'Bot: Sichere Versionsgeschichte der alten %(oldcat)s',
},
# Author: Eruedin
'de-ch': {
@@ -314,8 +316,10 @@
'category-listifying': u'Robot: Listifying from %(fromcat)s ({{PLURAL:%(num)d|1 entry|%(num)d entries}})',
'category-removing': u'Robot: Removing from %(oldcat)s',
'category-replacing': u'Robot: Replacing category %(oldcat)s with %(newcat)s',
+ 'category-section-title': u'Page history of former %(oldcat)s',
'category-was-disbanded': u'Robot: Category was disbanded',
'category-was-moved': u'Robot: Category was moved to [[:Category:%(newcat)s|%(title)s]]',
+ 'category-version-history': u'Robot: Saving version history of former %(oldcat)s',
},
# Author: Mihxil
# Author: Airon90
@@ -520,6 +524,7 @@
'category-listifying': u'%(fromcat)s listázása bottal (%(num)d lap)',
'category-removing': u'Bot: eltávolítás [[%(oldcat)s]] kategóriából',
'category-replacing': u'Bot: következő kategória cseréje: %(oldcat)s erre: %(newcat)s',
+ 'category-section-title': u'A megszűnt %(oldcat)s laptörténete',
'category-was-disbanded': u'Bot: a kategória kiürítve',
'category-was-moved': u'A bot áthelyezte a kategória tartalmát ide: [[:Kategória:%(newcat)s|%(title)s]]',
},
@@ -1062,6 +1067,8 @@
'category-also-in': u'Translations to say that the current category is in more categories than the one we are coming from',
'category-changing': u'Edit summary when the bot moves pages from one category to another. <code>%(oldcat)s</code> is the source category, <code>%(newcat)s</code> the target.',
'category-listifying': u'Definition of [http://meta.wikimedia.org/wiki/Pywikipediabot/category.py#Syntax listify] - make a list of all of the articles that are in a category.\n\n*Variable "%(fromcat)s" = the category to make a list of in the listify option.\n*Variable "%(num)d" is probably a number.\n*You may use PLURAL tag like (<code><nowiki>{{PLURAL:%(num)d|1 entry|%(num)d entries}}</nowiki></code>)\nDo not translate the variables.',
+ 'category-section-title': u'Section title for keeping page history',
+ 'category-version-history': u'Edit summary when the bot saves page\' version history while category moving',
},
# Author: Xqt
'rm': {
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9881
Revision: 9881
Author: xqt
Date: 2012-02-11 13:16:03 +0000 (Sat, 11 Feb 2012)
Log Message:
-----------
strip trailing whitespaces
Modified Paths:
--------------
trunk/pywikipedia/apispec.py
trunk/pywikipedia/category.py
trunk/pywikipedia/solve_disambiguation.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/apispec.py
===================================================================
--- trunk/pywikipedia/apispec.py 2012-02-10 10:12:04 UTC (rev 9880)
+++ trunk/pywikipedia/apispec.py 2012-02-11 13:16:03 UTC (rev 9881)
@@ -93,12 +93,12 @@
Each dictionary represents a block. Keys are shown at
http://www.mediawiki.org/wiki/API:Blocks
under 'bkprop'. Note that 'user' key is not present if it is an autoblock,
- and flags are not present if they are not valid for that block. It is your
+ and flags are not present if they are not valid for that block. It is your
task to handle this. Even numeric values such as id's come as strings!
The list is ordered by timestamp of applying the block unless noted.
Timestamps of beginning and expiry are in UTC and ISO 8601 format.
http://www.mediawiki.org/wiki/API:Data_formats#Timestamps
- Convert them to local time if necessary.
+ Convert them to local time if necessary.
See http://docs.python.org/library/datetime.html for help.
Methods:
@@ -134,7 +134,7 @@
beginning of the string.
2. Simplified lists of blocked users/IPs:
- These lists, unlike others, contain simple Unicode strings rather than
+ These lists, unlike others, contain simple Unicode strings rather than
block directories. The first one is ordered chronologically by date of
blocking, which is useful for hunting the reincarnations of a vandal,
while the others in alphanumerical order.
@@ -148,12 +148,12 @@
finiteblocks List of all finite blocks
infiniteblocks List of all infinite and indefinite blocks
expindays Blocks expiring within n days (from the second of calling)
- expnotindays Finite blocks not expiring within n days
+ expnotindays Finite blocks not expiring within n days
(use with infiniteblocks to get all the remaining blocks)
expuntil Blocks expiring by the given timestamp
expafter Finite blocks expiring after the given timestamp
(use with infiniteblocks to get all the remaining blocks)
- For valid timestamps see
+ For valid timestamps see
http://www.mediawiki.org/wiki/API:Data_formats#Timestamps
The next three are ordered by ascending duration:
@@ -167,8 +167,8 @@
human-readable details of the given dictionary that
represents a block (with English keywords at this time).
You may use it with pywikibot.output or insert into
- a wikipage between <pre> tags. This may be iterated or
- joined on a list of blocks, but don't blame me if your
+ a wikipage between <pre> tags. This may be iterated or
+ joined on a list of blocks, but don't blame me if your
monitor is not tall enough.
Should 'bot' be an instance of this class and b a block,
bot.display(b) returns the text in Unicode.
@@ -186,7 +186,7 @@
site site as usual, autodetected if missing
This has some bug at the moment, but works well in home wiki.
top 'new'/'old' (newest or oldest block on top; default='new')
- limit Maximum number of blocks to get in one query as integer or
+ limit Maximum number of blocks to get in one query as integer or
string. Defaults to 5000. That is the allowed maximum for bots
in Wikimedia wikis. You MUST set it to no more than 500 if your
bot does not have a flag.
@@ -196,16 +196,16 @@
!! Setting this value too low may result in an infinite loop
or duplicated results. Use as great limit as possible.
See https://bugzilla.wikimedia.org/show_bug.cgi?id=34029
- Additionally, decreasing this limit will cause a
+ Additionally, decreasing this limit will cause a
quasi-exponential increase of running time!
-
+
help: http://www.mediawiki.org/wiki/API:Blocks
TODO:
* Explore the bug of site parameter
* A function listing all blocked IPs, expanding ranges
* Some statistics from blocks
"""
-
+
#################################################
# Methods for internal use #
#################################################
@@ -213,7 +213,7 @@
self.site = site
self.bkdir = ['older','newer'][top=='old'] #a bit strange
# bkdir: Direction to list in.
- #older: List newest blocks first (default).
+ #older: List newest blocks first (default).
#Note: bkstart has to be later than bkend.
#newer: List oldest blocks first. Note: bkstart has to be before bkend.
self.bklimit = limit #Allowed maximum for bots=5000
@@ -228,7 +228,7 @@
'list': 'blocks',
'bklimit': self.bklimit,
'bkdir': self.bkdir,
- 'bkprop':
+ 'bkprop':
'id|user|userid|by|byid|timestamp|expiry|reason|range|flags',
}
@@ -246,8 +246,8 @@
break
result = query.GetData(self.params)
blocklist += result['query']['blocks']
- #Finally we remove possible duplicates. This piece of code may be
- #removed after successful closing of
+ #Finally we remove possible duplicates. This piece of code may be
+ #removed after successful closing of
#https://bugzilla.wikimedia.org/show_bug.cgi?id=34029
for b in blocklist:
if blocklist.count(b) > 1:
@@ -298,7 +298,7 @@
"""Returns anonblocks, excluding range blocks"""
self.empty()
try:
- return filter(lambda x: x['rangestart'] == x['rangeend'],
+ return filter(lambda x: x['rangestart'] == x['rangeend'],
self.anonblocks())
except KeyError:
return [errordic]
@@ -307,7 +307,7 @@
"""Returns range blocks"""
self.empty()
try:
- return filter(lambda x: x['rangestart'] != x['rangeend'],
+ return filter(lambda x: x['rangestart'] != x['rangeend'],
self.anonblocks())
except KeyError:
return [errordic]
@@ -321,68 +321,68 @@
"""Returns blocks raised by given admin"""
self.empty()
return filter(lambda x: x['by']==admin, self.query())
-
+
def user(self, user):
"""Returns blocks of the given user or single IP"""
self.empty()
self.params['bkusers'] = user
return self.query()
-
+
def userfragment(self, user):
"""Returns blocks of the given user or single IP (part of name)"""
self.empty()
return filter(lambda x: user in x['user'], self.notautoblocks())
-
+
def userregex(self, regex):
"""Returns blocks of the given user or single IP (regex)"""
self.empty()
return filter(
lambda x: re.search(regex, x['user']), self.notautoblocks())
-
+
def IP(self, IP):
"""Returns blocks of the given single IP or range (max. /16)"""
self.empty()
self.params['bkip'] = IP
return self.query()
-
+
def reason(self, reason):
"""Returns blocks raised with the given reason (exact text)"""
self.empty()
return filter(lambda x: x['reason'] == reason, self.query())
-
+
def reasonfragment(self, reason):
"""Returns blocks raised with the given reason (part of it)"""
self.empty()
return filter(lambda x: reason in x['reason'], self.query())
-
+
def reasonregex(self, regex):
"""Returns blocks raised with the given reason (regex)"""
self.empty()
return filter(
lambda x: re.search(regex, x['reason']), self.allblocks())
-
+
#################################################
# Lists of blocked users/IPs #
#################################################
#These methods return ordered list of Unicode strings
def blockedusernames_chrono(self):
return [b['user'] for b in self.reguserblocks()]
-
+
def blockedusernames(self):
return sorted(self.blockedusernames_chrono())
-
+
def blockedanons(self):
return sorted(
[b['user'] for b in self.anonblocks()], key=self.IPsortkey)
-
+
def blockedanons_norange(self):
return sorted(
[b['user'] for b in self.anonblocks_norange()], key=self.IPsortkey)
-
+
def blockedranges(self):
return sorted(
[b['user'] for b in self.rangeblocks()], key=self.IPsortkey)
-
+
#################################################
# Lists by expiry #
#################################################
@@ -395,7 +395,7 @@
"""Returns infinite and indefinite blocks"""
self.empty()
return filter(lambda x: x['expiry'].isalpha(), self.query())
-
+
def expindays(self, days):
"""Returns blocks expiring within days days"""
limit = iso(datetime.datetime.utcnow() + datetime.timedelta(days))
Modified: trunk/pywikipedia/category.py
===================================================================
--- trunk/pywikipedia/category.py 2012-02-10 10:12:04 UTC (rev 9880)
+++ trunk/pywikipedia/category.py 2012-02-11 13:16:03 UTC (rev 9881)
@@ -413,9 +413,9 @@
reason = i18n.twtranslate(site, deletion_reason_move) \
% {'newcat': self.newCatTitle, 'title': self.newCatTitle}
- # Set the section title for the old cat's history on the new cat's
+ # Set the section title for the old cat's history on the new cat's
# talk page.
- sectionTitle = pywikibot.translate(site,
+ sectionTitle = pywikibot.translate(site,
self.historySectionTitle) % self.oldCat.title()
# Copy the category contents to the new category page
Modified: trunk/pywikipedia/solve_disambiguation.py
===================================================================
--- trunk/pywikipedia/solve_disambiguation.py 2012-02-10 10:12:04 UTC (rev 9880)
+++ trunk/pywikipedia/solve_disambiguation.py 2012-02-11 13:16:03 UTC (rev 9881)
@@ -57,10 +57,10 @@
-first Uses only the first link of every line on the disambiguation
page that begins with an asterisk. Useful if the page is full
of irrelevant links that are not subject to disambiguation.
- You won't get all af them as options, just the first on each
+ You won't get all af them as options, just the first on each
line. For a moderated example see
http://en.wikipedia.org/wiki/Szerdahely
- A really exotic one is
+ A really exotic one is
http://hu.wikipedia.org/wiki/Brabant_(egyértelműsítő lap)
-start:XY goes through all disambiguation pages in the category on your
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2012-02-10 10:12:04 UTC (rev 9880)
+++ trunk/pywikipedia/wikipedia.py 2012-02-11 13:16:03 UTC (rev 9881)
@@ -495,7 +495,7 @@
if underscore:
title = title.replace(' ', '_')
return title
-
+
#@deprecated("Page.title(withNamespace=False)")
def titleWithoutNamespace(self, underscore=False):
"""Return title of Page without namespace and without section."""
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9880
Revision: 9880
Author: alexsh
Date: 2012-02-10 10:12:04 +0000 (Fri, 10 Feb 2012)
Log Message:
-----------
config.py:revert(sry)
Modified Paths:
--------------
trunk/pywikipedia/config.py
Modified: trunk/pywikipedia/config.py
===================================================================
--- trunk/pywikipedia/config.py 2012-02-10 10:10:53 UTC (rev 9879)
+++ trunk/pywikipedia/config.py 2012-02-10 10:12:04 UTC (rev 9880)
@@ -194,7 +194,7 @@
# Per default, logging of interwiki.py is enabled because its logfiles can
# be used to generate so-called warnfiles.
# This setting can be overridden by the -log or -nolog command-line arguments.
-log = ['*']
+log = ['interwiki']
############## INTERWIKI SETTINGS ##############
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9876
Revision: 9876
Author: binbot
Date: 2012-02-08 21:07:16 +0000 (Wed, 08 Feb 2012)
Log Message:
-----------
With -hist, action "move" will write the edit history of old cat to the talk page of new cat for improved respect for copyright (not i18nized yet)
Modified Paths:
--------------
trunk/pywikipedia/category.py
Modified: trunk/pywikipedia/category.py
===================================================================
--- trunk/pywikipedia/category.py 2012-02-07 21:53:06 UTC (rev 9875)
+++ trunk/pywikipedia/category.py 2012-02-08 21:07:16 UTC (rev 9876)
@@ -35,6 +35,10 @@
deletion reason. Instead, it uses the default deletion reason
for the language, which is "Category was disbanded" in English.
+Options for "move" action:
+ * -hist - Creates a nice wikitable on the talk page of target category
+ that contains detailed page history of the source category.
+
Options for several actions:
* -rebuild - reset the database
* -from: - The category to move from (for the move option)
@@ -372,6 +376,13 @@
class CategoryMoveRobot:
"""Robot to move pages from one category to another."""
+
+ #Section title and edit summary for keeping page history:
+ historySectionTitle = {
+ 'en':u'Page history of former %s',
+ 'hu':u'A megszűnt %s laptörténete',
+ }
+
def __init__(self, oldCatTitle, newCatTitle, batchMode=False,
editSummary='', inPlace=False, moveCatPage=True,
deleteEmptySourceCat=True, titleRegex=None,
@@ -402,6 +413,10 @@
reason = i18n.twtranslate(site, deletion_reason_move) \
% {'newcat': self.newCatTitle, 'title': self.newCatTitle}
+ # Set the section title for the old cat's history on the new cat's
+ # talk page.
+ sectionTitle = pywikibot.translate(site,
+ self.historySectionTitle) % self.oldCat.title()
# Copy the category contents to the new category page
copied = False
@@ -425,6 +440,21 @@
else:
if talkMoved:
oldMovedTalk = oldTalk
+ #Whether or not there was an old talk page, we write
+ #the page history to the new talk page
+ history = self.oldCat.getVersionHistoryTable()
+ #Should be OK, we are within if self.oldCat.exists()
+ historySection = u'\n== %s ==\n%s' % (sectionTitle, history)
+ try:
+ text = newCat.toggleTalkPage().get() + historySection
+ except pywikibot.NoPage:
+ text = historySection
+ try:
+ newCat.toggleTalkPage().put(text,sectionTitle)
+ except:
+ pywikibot.output(
+ 'History of the category has not been saved to new talk page')
+ #TODO: some nicer exception handling (not too important)
# Move articles
gen = pagegenerators.CategorizedPageGenerator(self.oldCat,
@@ -822,6 +852,7 @@
showImages = False
talkPages = False
recurse = False
+ withHistory = False
titleRegex = None
# This factory is responsible for processing command line arguments
@@ -888,8 +919,10 @@
talkPages = True
elif arg == '-recurse':
recurse = True
+ elif arg == '-hist':
+ create_pages = True
elif arg == '-create':
- create_pages = True
+ withHistory = True
else:
genFactory.handleArg(arg)
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9875
Revision: 9875
Author: binbot
Date: 2012-02-07 21:53:06 +0000 (Tue, 07 Feb 2012)
Log Message:
-----------
A new parameter, -first is introduced to handle overcrowded disambpages.
Modified Paths:
--------------
trunk/pywikipedia/solve_disambiguation.py
Modified: trunk/pywikipedia/solve_disambiguation.py
===================================================================
--- trunk/pywikipedia/solve_disambiguation.py 2012-02-07 20:45:24 UTC (rev 9874)
+++ trunk/pywikipedia/solve_disambiguation.py 2012-02-07 21:53:06 UTC (rev 9875)
@@ -54,6 +54,15 @@
-main only check pages in the main namespace, not in the talk,
wikipedia, user, etc. namespaces.
+ -first Uses only the first link of every line on the disambiguation
+ page that begins with an asterisk. Useful if the page is full
+ of irrelevant links that are not subject to disambiguation.
+ You won't get all af them as options, just the first on each
+ line. For a moderated example see
+ http://en.wikipedia.org/wiki/Szerdahely
+ A really exotic one is
+ http://hu.wikipedia.org/wiki/Brabant_(egyértelműsítő lap)
+
-start:XY goes through all disambiguation pages in the category on your
wiki that is defined (to the bot) as the category containing
disambiguation pages, starting at XY. If only '-start' or
@@ -73,7 +82,7 @@
# (C) Daniel Herding, 2004
# (C) Andre Engels, 2003-2004
# (C) WikiWichtel, 2004
-# (C) Pywikipedia team, 2003-2009
+# (C) Pywikipedia team, 2003-2012
#
__version__='$Id$'
#
@@ -364,6 +373,22 @@
else:
return linkupper
+def firstlinks(page):
+ #Returns a list of first links of every line beginning with *
+ #When a disambpage is full of unnecessary links, this may be useful
+ #to sort out the relevant links. E.g. from line
+ #*[[Jim Smith (smith)|Jim Smith]] ([[1832]]-[[1932]]) [[English]] [[smith]]
+ #it returns only 'Jim Smith (smith)'
+ #Lines without an asterisk at the beginning will be disregarded.
+ #No check for page existence, it has already been done.
+ list = []
+ reg = re.compile(r'\*.*?\[\[(.*?)(\||\]\])')
+ for line in page.get().splitlines():
+ found = reg.match(line)
+ if found:
+ list.append(found.group(1))
+ return list
+
class ReferringPageGeneratorWithIgnore:
def __init__(self, disambPage, primary=False, minimum = 0):
self.disambPage = disambPage
@@ -458,7 +483,7 @@
}
def __init__(self, always, alternatives, getAlternatives, dnSkip, generator,
- primary, main_only, minimum = 0):
+ primary, main_only, first_only, minimum = 0):
self.always = always
self.alternatives = alternatives
self.getAlternatives = getAlternatives
@@ -466,6 +491,7 @@
self.generator = generator
self.primary = primary
self.main_only = main_only
+ self.first_only = first_only
self.minimum = minimum
self.mysite = pywikibot.getSite()
@@ -523,6 +549,16 @@
# note that the definition of 'letter' varies from language to language.
self.linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' + linktrail + ')')
+ def firstize(self, page, links):
+ #This will remove a lot of silly redundant links from overdecorated
+ #disambiguation pages and leave the first link of each asterisked
+ #line only. This must be done if -first is used in command line.
+ titles = [firstcap(t) for t in firstlinks(page)]
+ for l in links[:]: #uses a copy because of remove!
+ if l.title() not in titles:
+ links.remove(l)
+ return links
+
def treat(self, refPage, disambPage):
"""
Parameters:
@@ -874,6 +910,8 @@
primary_topic_format[self.mylang]
% disambPage.title())
links = disambPage2.linkedPages()
+ if self.first_only:
+ links = self.firstize(disambPage2, links)
links = [correctcap(l, disambPage2.get())
for l in links]
except pywikibot.NoPage:
@@ -886,6 +924,8 @@
else:
try:
links = disambPage.linkedPages()
+ if self.first_only:
+ links = self.firstize(disambPage, links)
links = [correctcap(l, disambPage.get())
for l in links]
except pywikibot.NoPage:
@@ -1005,6 +1045,8 @@
pageTitle = []
primary = False
main_only = False
+ #Shall we use only the first link from each asterisked line?
+ first_only = False
# For sorting the linked pages, case can be ignored
ignoreCase = False
@@ -1046,6 +1088,8 @@
dnSkip = True
elif arg == '-main':
main_only = True
+ elif arg == '-first':
+ first_only = True
elif arg.startswith('-min:'):
minimum = int(arg[5:])
elif arg.startswith('-start'):
@@ -1085,7 +1129,7 @@
generator = iter([page])
bot = DisambiguationRobot(always, alternatives, getAlternatives, dnSkip,
- generator, primary, main_only,
+ generator, primary, main_only, first_only,
minimum=minimum)
bot.run()
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9874
Revision: 9874
Author: binbot
Date: 2012-02-07 20:45:24 +0000 (Tue, 07 Feb 2012)
Log Message:
-----------
Oops, that was a merged version, will come later, this is the right one.
Modified Paths:
--------------
trunk/pywikipedia/solve_disambiguation.py
Modified: trunk/pywikipedia/solve_disambiguation.py
===================================================================
--- trunk/pywikipedia/solve_disambiguation.py 2012-02-07 20:40:12 UTC (rev 9873)
+++ trunk/pywikipedia/solve_disambiguation.py 2012-02-07 20:45:24 UTC (rev 9874)
@@ -364,21 +364,6 @@
else:
return linkupper
-def firstlinks(page):
- #Returns a list of first links of every line beginning with *
- #When a disambpage is full of unnecessary links, this may be useful
- #to sort out the relevant links. E.g. from line
- #*[[Jim Smith (smith)|Jim Smith]] ([[1832]]-[[1932]]) [[English]] [[smith]]
- #it returns only Jim Smith (smith)
- #No check for page existence, it has already been done.
- list = []
- reg = re.compile(r'\*.*?\[\[(.*?)(\||\]\])')
- for line in page.get().splitlines():
- found = reg.match(line)
- if found:
- list.append(found.group(1))
- return list
-
class ReferringPageGeneratorWithIgnore:
def __init__(self, disambPage, primary=False, minimum = 0):
self.disambPage = disambPage
@@ -538,20 +523,6 @@
# note that the definition of 'letter' varies from language to language.
self.linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' + linktrail + ')')
- def firstize(self, page, links):
- #duma
- #check param
- titles = [t.capitalize() for t in firstlinks(page)]
- pywikibot.output('\t'.join(titles))
- print len (titles), len(links)
- pywikibot.output('\t'.join(l.title() for l in links))
- for l in links[:]:
- pywikibot.output(l.title())
- if l.title() not in titles:
- links.remove(l)
- print 'meghalt'
- return links
-
def treat(self, refPage, disambPage):
"""
Parameters:
@@ -903,7 +874,6 @@
primary_topic_format[self.mylang]
% disambPage.title())
links = disambPage2.linkedPages()
- links = self.firstize(disambPage2, links)
links = [correctcap(l, disambPage2.get())
for l in links]
except pywikibot.NoPage: