Revision: 7882
Author: xqt
Date: 2010-01-15 12:20:27 +0000 (Fri, 15 Jan 2010)
Log Message:
-----------
summary for de-wiki; summary shorten to 200 chars; don't ask for choice if it's not needed
Modified Paths:
--------------
trunk/pywikipedia/add_text.py
Modified: trunk/pywikipedia/add_text.py
===================================================================
--- trunk/pywikipedia/add_text.py 2010-01-15 11:44:43 UTC (rev 7881)
+++ trunk/pywikipedia/add_text.py 2010-01-15 12:20:27 UTC (rev 7882)
@@ -83,6 +83,7 @@
msg = {
'ar': u'بوت: إضافة %s',
'cs': u'Robot přidal %s',
+ 'de': u'Bot: "%s" hinzugefügt',
'en': u'Bot: Adding %s',
'fr': u'Robot : Ajoute %s',
'he': u'בוט: מוסיף %s',
@@ -150,7 +151,7 @@
if not addText:
raise NoEnoughData('You have to specify what text you want to add!')
if not summary:
- summary = wikipedia.translate(wikipedia.getSite(), msg) % addText
+ summary = wikipedia.translate(wikipedia.getSite(), msg) % addText[:200]
# When a page is tagged as "really well written" it has a star in the interwiki links.
# This is a list of all the templates used (in regex format) to make the stars appear.
@@ -260,18 +261,18 @@
if putText and text != newtext:
wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
wikipedia.showDiff(text, newtext)
- choice = ''
# Let's put the changes.
- while 1:
+ while True:
# If someone load it as module, maybe it's not so useful to put the text in the page
if putText:
if not always:
- choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
- if choice == 'a':
- always = True
- if choice == 'n':
- return (False, False, always)
- if choice == 'y' or always:
+ choice = wikipedia.inputChoice(u'Do you want to accept these changes?',
+ ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
+ if choice == 'a':
+ always = True
+ elif choice == 'n':
+ return (False, False, always)
+ if always or choice == 'y':
try:
if always:
page.put(newtext, summary)
@@ -284,7 +285,7 @@
errorCount += 1
if errorCount < 5:
wikipedia.output(u'Server Error! Wait..')
- time.sleep(3)
+ time.sleep(5)
continue
else:
raise wikipedia.ServerError(u'Fifth Server Error!')
Revision: 7881
Author: xqt
Date: 2010-01-15 11:44:43 +0000 (Fri, 15 Jan 2010)
Log Message:
-----------
Additional options:
* read addtext from a textfile
* put addtext to the corresponding talk page
* create it if necessary
Modified Paths:
--------------
trunk/pywikipedia/add_text.py
Modified: trunk/pywikipedia/add_text.py
===================================================================
--- trunk/pywikipedia/add_text.py 2010-01-15 08:23:54 UTC (rev 7880)
+++ trunk/pywikipedia/add_text.py 2010-01-15 11:44:43 UTC (rev 7881)
@@ -13,8 +13,13 @@
-page Use a page as generator
+-talkpage Put the text onto the talk page instead the generated on
+-talk
+
-text Define which text to add. "\n" are interpreted as newlines.
+-textfile Define a texfile name which contains the text to add
+
-summary Define the summary to use
-except Use a regex to check if the text is already in the page
@@ -67,6 +72,7 @@
import re, pagegenerators, urllib2, urllib
import wikipedia
+import codecs, config
# This is required for the text that is shown when you run this script
# with the parameter -help.
@@ -140,7 +146,7 @@
yield wikipedia.Page(wikipedia.getSite(), result)
def add_text(page = None, addText = None, summary = None, regexSkip = None, regexSkipUrl = None,
- always = False, up = False, putText = True, oldTextGiven = None):
+ always = False, up = False, putText = True, oldTextGiven = None, create=False):
if not addText:
raise NoEnoughData('You have to specify what text you want to add!')
if not summary:
@@ -186,8 +192,12 @@
try:
text = page.get()
except wikipedia.NoPage:
- wikipedia.output(u"%s doesn't exist, skip!" % page.title())
- return (False, False, always) # continue
+ if create:
+ wikipedia.output(u"%s doesn't exist, creating it!" % page.title())
+ text = u''
+ else:
+ wikipedia.output(u"%s doesn't exist, skip!" % page.title())
+ return (False, False, always) # continue
except wikipedia.IsRedirectPage:
wikipedia.output(u"%s is a redirect, skip!" % page.title())
return (False, False, always) # continue
@@ -298,13 +308,21 @@
# If none, the var is setted only for check purpose.
summary = None; addText = None; regexSkip = None; regexSkipUrl = None;
generator = None; always = False
+ textfile=None
+ talkPage=False
+ namespaces=[]
# Load a lot of default generators
genFactory = pagegenerators.GeneratorFactory()
# Put the text above or below the text?
up = False
# Loading the arguments
for arg in wikipedia.handleArgs():
- if arg.startswith('-text'):
+ if arg.startswith('-textfile'):
+ if len(arg) == 9:
+ textfile = wikipedia.input(u'Which textfile do you want to add?')
+ else:
+ textfile = arg[10:]
+ elif arg.startswith('-text'):
if len(arg) == 5:
addText = wikipedia.input(u'What text do you want to add?')
else:
@@ -339,17 +357,30 @@
up = True
elif arg == '-always':
always = True
+ elif arg == '-talk' or arg == '-talkpage':
+ talkPage = True
else:
genFactory.handleArg(arg)
-
+ if textfile and not addText:
+ f = codecs.open(textfile, 'r', config.textfile_encoding)
+ addText = f.read()
+ f.close()
if not generator:
generator = genFactory.getCombinedGenerator()
# Check if there are the minimal settings
if not generator:
raise NoEnoughData('You have to specify the generator you want to use for the script!')
+ if talkPage:
+ generator = pagegenerators.PageWithTalkPageGenerator(generator)
+ site = wikipedia.getSite()
+ for namespace in site.namespaces():
+ index = site.getNamespaceIndex(namespace)
+ if index%2==1 and index>0:
+ namespaces += [index]
+ generator = pagegenerators.NamespaceFilterPageGenerator(generator, namespaces)
# Main Loop
for page in generator:
- (text, newtext, always) = add_text(page, addText, summary, regexSkip, regexSkipUrl, always, up, True)
+ (text, newtext, always) = add_text(page, addText, summary, regexSkip, regexSkipUrl, always, up, True, create=talkPage)
if __name__ == "__main__":
try:
Revision: 7876
Author: xqt
Date: 2010-01-14 09:22:39 +0000 (Thu, 14 Jan 2010)
Log Message:
-----------
Changes wikipedia to pywikibot for easier compare with rewrite
Modified Paths:
--------------
trunk/pywikipedia/pagegenerators.py
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2010-01-14 08:53:49 UTC (rev 7875)
+++ trunk/pywikipedia/pagegenerators.py 2010-01-14 09:22:39 UTC (rev 7876)
@@ -152,17 +152,14 @@
docuReplacements = {'¶ms;': parameterHelp}
-
-
-
-
# Standard library imports
import re, codecs, sys
import threading, Queue, traceback
import urllib, urllib2, time
# Application specific imports
-import wikipedia, date, catlib, userlib, query
+import wikipedia as pywikibot
+import date, catlib, userlib, query
import config
# For python 2.4 compatibility
@@ -235,7 +232,7 @@
def stop(self):
"""Stop the background thread."""
## if not self.finished.isSet():
-## wikipedia.output("DEBUG: signalling %s to stop." % self)
+## pywikibot.output("DEBUG: signalling %s to stop." % self)
self.finished.set()
def run(self):
@@ -244,7 +241,7 @@
for result in self.__gen:
while True:
if self.finished.isSet():
-## wikipedia.output("DEBUG: %s received stop signal." % self)
+## pywikibot.output("DEBUG: %s received stop signal." % self)
return
try:
self.queue.put_nowait(result)
@@ -256,7 +253,7 @@
while not self.finished.isSet() and not self.queue.empty():
time.sleep(0.25)
self.stop()
-## wikipedia.output("DEBUG: %s stopped because generator exhausted." % self)
+## pywikibot.output("DEBUG: %s stopped because generator exhausted." % self)
def AllpagesPageGenerator(start ='!', namespace = None, includeredirects = True, site = None):
@@ -267,14 +264,14 @@
includeredirects equals the string 'only', only redirects are added.
"""
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for page in site.allpages(start = start, namespace = namespace, includeredirects = includeredirects):
yield page
def PrefixingPageGenerator(prefix, namespace = None, includeredirects = True, site = None):
if site is None:
- site = wikipedia.getSite()
- page = wikipedia.Page(site, prefix)
+ site = pywikibot.getSite()
+ page = pywikibot.Page(site, prefix)
if namespace is None:
namespace = page.namespace()
title = page.titleWithoutNamespace()
@@ -283,7 +280,7 @@
def NewpagesPageGenerator(number = 100, get_redirect = False, repeat = False, site = None, namespace = 0):
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for page in site.newpages(number=number, get_redirect=get_redirect, repeat=repeat, namespace=namespace):
yield page[0]
@@ -297,13 +294,13 @@
def UnusedFilesGenerator(number = 100, repeat = False, site = None, extension = None):
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for page in site.unusedfiles(number=number, repeat=repeat, extension=extension):
- yield wikipedia.ImagePage(page.site(), page.title())
+ yield pywikibot.ImagePage(page.site(), page.title())
def WithoutInterwikiPageGenerator(number = 100, repeat = False, site = None):
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for page in site.withoutinterwiki(number=number, repeat=repeat):
yield page
@@ -355,61 +352,61 @@
def UnCategorizedCategoryGenerator(number = 100, repeat = False, site = None):
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for page in site.uncategorizedcategories(number=number, repeat=repeat):
yield page
def UnCategorizedImageGenerator(number = 100, repeat = False, site = None):
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for page in site.uncategorizedimages(number=number, repeat=repeat):
yield page
def NewimagesPageGenerator(number = 100, repeat = False, site = None):
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for page in site.newimages(number, repeat=repeat):
yield page[0]
def UnCategorizedPageGenerator(number = 100, repeat = False, site = None):
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for page in site.uncategorizedpages(number=number, repeat=repeat):
yield page
def LonelyPagesPageGenerator(number = 100, repeat = False, site = None):
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for page in site.lonelypages(number=number, repeat=repeat):
yield page
def UnwatchedPagesPageGenerator(number = 100, repeat = False, site = None):
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for page in site.unwatchedpages(number=number, repeat=repeat):
yield page
def AncientPagesPageGenerator(number = 100, repeat = False, site = None):
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for page in site.ancientpages(number=number, repeat=repeat):
yield page[0]
def DeadendPagesPageGenerator(number = 100, repeat = False, site = None):
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for page in site.deadendpages(number=number, repeat=repeat):
yield page
def LongPagesPageGenerator(number = 100, repeat = False, site = None):
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for page in site.longpages(number=number, repeat=repeat):
yield page[0]
def ShortPagesPageGenerator(number = 100, repeat = False, site = None):
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for page in site.shortpages(number=number, repeat=repeat):
yield page[0]
@@ -420,19 +417,19 @@
def RandomPageGenerator(number = 10, site = None):
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for i in range(number):
yield site.randompage()
def RandomRedirectPageGenerator(number = 10, site = None):
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for i in range(number):
yield site.randomredirectpage()
def RecentchangesPageGenerator(number = 100, site = None):
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for page in site.recentchanges(number=number):
yield page[0]
@@ -444,9 +441,9 @@
name is given, the generator prompts the user.
'''
if filename is None:
- filename = wikipedia.input(u'Please enter the filename:')
+ filename = pywikibot.input(u'Please enter the filename:')
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
f = codecs.open(filename, 'r', config.textfile_encoding)
R = re.compile(ur'\[\[(.+?)(?:\]\]|\|)') # title ends either before | or before ]]
pageTitle = None
@@ -456,30 +453,30 @@
# This makes it possible to work on different wikis using a single
# text file, but also could be dangerous because you might
# inadvertently change pages on another wiki!
- yield wikipedia.Page(site, pageTitle)
+ yield pywikibot.Page(site, pageTitle)
if pageTitle is None:
f.seek(0)
for title in f:
title = title.strip()
if title:
- yield wikipedia.Page(site, title)
+ yield pywikibot.Page(site, title)
f.close()
def PagesFromTitlesGenerator(iterable, site=None):
"""Generate pages from the titles (unicode strings) yielded by iterable."""
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for title in iterable:
if not isinstance(title, basestring):
break
- yield wikipedia.Page(site, title)
+ yield pywikibot.Page(site, title)
def LinksearchPageGenerator(link, step=500, site=None):
"""Yields all pages that include a specified link, according to
[[Special:Linksearch]].
"""
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for page in site.linksearch(link, limit=step):
yield page
@@ -490,7 +487,7 @@
"""
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
if number > 500:
# the api does not allow more than 500 results for anonymous users
number = 500
@@ -503,7 +500,7 @@
Provides a list of results using the internal MediaWiki search engine
"""
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for page in site.search(query, number=number, namespaces = namespaces):
yield page[0]
@@ -512,10 +509,10 @@
To use this generator, install pYsearch
'''
def __init__(self, query = None, count = 100, site = None): # values larger than 100 fail
- self.query = query or wikipedia.input(u'Please enter the search query:')
+ self.query = query or pywikibot.input(u'Please enter the search query:')
self.count = count
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
self.site = site
def queryYahoo(self, query):
@@ -535,7 +532,7 @@
for url in self.queryYahoo(localQuery):
if url[:len(base)] == base:
title = url[len(base):]
- page = wikipedia.Page(self.site, title)
+ page = pywikibot.Page(self.site, title)
yield page
class GoogleSearchPageGenerator:
@@ -546,9 +543,9 @@
license key in your configuration.
'''
def __init__(self, query = None, site = None):
- self.query = query or wikipedia.input(u'Please enter the search query:')
+ self.query = query or pywikibot.input(u'Please enter the search query:')
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
self.site = site
#########
@@ -581,27 +578,27 @@
while True:
try:
- wikipedia.output(u'Querying Google AJAX Search API...') #, offset %i' % offset)
+ pywikibot.output(u'Querying Google AJAX Search API...') #, offset %i' % offset)
result = json.loads(self.site.getUrl(url, refer = config.google_api_refer, no_hostname=True))
for res in result['responseData']['results']:
yield res['url']
except:
- wikipedia.output(u"An error occured. Retrying in 10 seconds...")
+ pywikibot.output(u"An error occured. Retrying in 10 seconds...")
time.sleep(10)
continue
def queryViaSoapApi(self, query):
import google
-
google.LICENSE_KEY = config.google_key
offset = 0
estimatedTotalResultsCount = None
- while not estimatedTotalResultsCount or offset < estimatedTotalResultsCount:
+ while not estimatedTotalResultsCount \
+ or offset < estimatedTotalResultsCount:
while (True):
# Google often yields 502 errors.
try:
- wikipedia.output(u'Querying Google, offset %i' % offset)
+ pywikibot.output(u'Querying Google, offset %i' % offset)
data = google.doGoogleSearch(query, start = offset, filter = False)
break
except KeyboardInterrupt:
@@ -611,7 +608,7 @@
# can happen here, depending on the module used. It's not easy
# to catch this properly because pygoogle decides which one of
# the soap modules to use.
- wikipedia.output(u"An error occured. Retrying in 10 seconds...")
+ pywikibot.output(u"An error occured. Retrying in 10 seconds...")
time.sleep(10)
continue
@@ -620,7 +617,7 @@
yield result.URL
# give an estimate of pages to work on, but only once.
if not estimatedTotalResultsCount:
- wikipedia.output(u'Estimated total result count: %i pages.' % data.meta.estimatedTotalResultsCount)
+ pywikibot.output(u'Estimated total result count: %i pages.' % data.meta.estimatedTotalResultsCount)
estimatedTotalResultsCount = data.meta.estimatedTotalResultsCount
#print 'estimatedTotalResultsCount: ', estimatedTotalResultsCount
offset += 10
@@ -640,7 +637,7 @@
#offset = 0
#while True:
- #wikipedia.output("Google: Querying page %d" % (offset / 100 + 1))
+ #pywikibot.output("Google: Querying page %d" % (offset / 100 + 1))
#address = "http://www.google.com/search?q=%s&num=100&hl=en&start=%d" % (urllib.quote_plus(query), offset)
## we fake being Firefox because Google blocks unknown browsers
#request = urllib2.Request(address, None, {'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; de; rv:1.8) Gecko/20051128 SUSE/1.5-0.1 Firefox/1.5'})
@@ -658,11 +655,12 @@
def __iter__(self):
# restrict query to local site
localQuery = '%s site:%s' % (self.query, self.site.hostname())
- base = 'http://%s%s' % (self.site.hostname(), self.site.nice_get_address(''))
+ base = 'http://%s%s' % (self.site.hostname(),
+ self.site.nice_get_address(''))
for url in self.queryGoogle(localQuery):
if url[:len(base)] == base:
title = url[len(base):]
- page = wikipedia.Page(self.site, title)
+ page = pywikibot.Page(self.site, title)
# Google contains links in the format http://de.wikipedia.org/wiki/en:Foobar
if page.site() == self.site:
yield page
@@ -670,12 +668,12 @@
def MySQLPageGenerator(query, site = None):
import MySQLdb as mysqldb
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
conn = mysqldb.connect(config.db_hostname, db = site.dbName(),
user = config.db_username,
passwd = config.db_password)
cursor = conn.cursor()
- wikipedia.output(u'Executing query:\n%s' % query)
+ pywikibot.output(u'Executing query:\n%s' % query)
query = query.encode(site.encoding())
cursor.execute(query)
while True:
@@ -693,30 +691,30 @@
pageTitle = '%s:%s' % (namespace, pageName)
else:
pageTitle = pageName
- page = wikipedia.Page(site, pageTitle)
+ page = pywikibot.Page(site, pageTitle)
yield page
def YearPageGenerator(start = 1, end = 2050, site = None):
if site is None:
- site = wikipedia.getSite()
- wikipedia.output(u"Starting with year %i" % start)
+ site = pywikibot.getSite()
+ pywikibot.output(u"Starting with year %i" % start)
for i in xrange(start, end + 1):
if i % 100 == 0:
- wikipedia.output(u'Preparing %i...' % i)
+ pywikibot.output(u'Preparing %i...' % i)
# There is no year 0
if i != 0:
current_year = date.formatYear(site.lang, i )
- yield wikipedia.Page(site, current_year)
+ yield pywikibot.Page(site, current_year)
def DayPageGenerator(startMonth = 1, endMonth = 12, site = None):
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
fd = date.FormatDate(site)
- firstPage = wikipedia.Page(site, fd(startMonth, 1))
- wikipedia.output(u"Starting with %s" % firstPage.aslink())
+ firstPage = pywikibot.Page(site, fd(startMonth, 1))
+ pywikibot.output(u"Starting with %s" % firstPage.aslink())
for month in xrange(startMonth, endMonth+1):
for day in xrange(1, date.getNumberOfDaysInMonth(month)+1):
- yield wikipedia.Page(site, fd(month, day))
+ yield pywikibot.Page(site, fd(month, day))
def NamespaceFilterPageGenerator(generator, namespaces, site = None):
"""
@@ -728,7 +726,7 @@
"""
# convert namespace names to namespace numbers
if site is None:
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
for i in xrange(len(namespaces)):
ns = namespaces[i]
if isinstance(ns, unicode) or isinstance(ns, str):
@@ -761,8 +759,8 @@
for page in generator:
if isIgnored(page):
- if wikipedia.verbose:
- wikipedia.output('Ignoring page %s' % page.title())
+ if pywikibot.verbose:
+ pywikibot.output('Ignoring page %s' % page.title())
else:
yield page
@@ -867,7 +865,7 @@
pass
except Exception, e:
traceback.print_exc()
- wikipedia.output(unicode(e))
+ pywikibot.output(unicode(e))
def preload(self, page_list, retry=False):
try:
@@ -880,13 +878,13 @@
if page.site() == site]
page_list = [page for page in page_list
if page.site() != site]
- wikipedia.getall(site, pagesThisSite)
+ pywikibot.getall(site, pagesThisSite)
for page in pagesThisSite:
yield page
except IndexError:
# Can happen if the pages list is empty. Don't care.
pass
- except wikipedia.SaxError:
+ except pywikibot.SaxError:
if not retry:
# Retry once.
self.preload(page_list, retry=True)
@@ -926,9 +924,9 @@
return genToReturn
def getCategoryGen(self, arg, length, recurse = False):
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
if len(arg) == length:
- categoryname = wikipedia.input(u'Please enter the category name:')
+ categoryname = pywikibot.input(u'Please enter the category name:')
else:
categoryname = arg[length + 1:]
categoryname = categoryname.replace('#', '|')
@@ -943,9 +941,9 @@
return CategorizedPageGenerator(cat, start=startfrom, recurse=recurse)
def setSubCategoriesGen(self, arg, length, recurse = False):
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
if len(arg) == length:
- categoryname = wikipedia.input(u'Please enter the category name:')
+ categoryname = pywikibot.input(u'Please enter the category name:')
else:
categoryname = arg[length + 1:]
@@ -970,19 +968,19 @@
arguments have been parsed to get the final output generator.
"""
- site = wikipedia.getSite()
+ site = pywikibot.getSite()
gen = None
if arg.startswith('-filelinks'):
fileLinksPageTitle = arg[11:]
if not fileLinksPageTitle:
- fileLinksPageTitle = wikipedia.input(
+ fileLinksPageTitle = pywikibot.input(
u'Links to which image page should be processed?')
if fileLinksPageTitle.startswith(site.namespace(6)
+ ":"):
- fileLinksPage = wikipedia.ImagePage(site,
+ fileLinksPage = pywikibot.ImagePage(site,
fileLinksPageTitle)
else:
- fileLinksPage = wikipedia.ImagePage(site,
+ fileLinksPage = pywikibot.ImagePage(site,
'Image:' + fileLinksPageTitle)
gen = FileLinksGenerator(fileLinksPage)
elif arg.startswith('-unusedfiles'):
@@ -1005,8 +1003,8 @@
elif arg.startswith('-interwiki'):
title = arg[11:]
if not title:
- title = wikipedia.input(u'Which page should be processed?')
- page = wikipedia.Page(site, title)
+ title = pywikibot.input(u'Which page should be processed?')
+ page = pywikibot.Page(site, title)
gen = InterwikiPageGenerator(page)
elif arg.startswith('-randomredirect'):
if len(arg) == 15:
@@ -1026,12 +1024,12 @@
elif arg.startswith('-file'):
textfilename = arg[6:]
if not textfilename:
- textfilename = wikipedia.input(
+ textfilename = pywikibot.input(
u'Please enter the local file name:')
gen = TextfilePageGenerator(textfilename)
elif arg.startswith('-namespace'):
if len(arg) == len('-namespace'):
- self.namespaces.append(wikipedia.input(u'What namespace are you filtering on?'))
+ self.namespaces.append(pywikibot.input(u'What namespace are you filtering on?'))
else:
self.namespaces.extend(arg[len('-namespace:'):].split(","))
return True
@@ -1050,11 +1048,11 @@
gen = self.getCategoryGen(arg, 7, recurse = True)
elif arg.startswith('-page'):
if len(arg) == len('-page'):
- gen = [wikipedia.Page(site,
- wikipedia.input(
+ gen = [pywikibot.Page(site,
+ pywikibot.input(
u'What page do you want to use?'))]
else:
- gen = [wikipedia.Page(site, arg[len('-page:'):])]
+ gen = [pywikibot.Page(site, arg[len('-page:'):])]
elif arg.startswith('-uncatfiles'):
gen = UnCategorizedImageGenerator()
elif arg.startswith('-uncatcat'):
@@ -1064,29 +1062,29 @@
elif arg.startswith('-ref'):
referredPageTitle = arg[5:]
if not referredPageTitle:
- referredPageTitle = wikipedia.input(
+ referredPageTitle = pywikibot.input(
u'Links to which page should be processed?')
- referredPage = wikipedia.Page(site, referredPageTitle)
+ referredPage = pywikibot.Page(site, referredPageTitle)
gen = ReferringPageGenerator(referredPage)
elif arg.startswith('-links'):
linkingPageTitle = arg[7:]
if not linkingPageTitle:
- linkingPageTitle = wikipedia.input(
+ linkingPageTitle = pywikibot.input(
u'Links from which page should be processed?')
- linkingPage = wikipedia.Page(site, linkingPageTitle)
+ linkingPage = pywikibot.Page(site, linkingPageTitle)
gen = LinkedPageGenerator(linkingPage)
elif arg.startswith('-weblink'):
url = arg[9:]
if not url:
- url = wikipedia.input(
+ url = pywikibot.input(
u'Pages with which weblink should be processed?')
gen = LinksearchPageGenerator(url)
elif arg.startswith('-transcludes'):
transclusionPageTitle = arg[len('-transcludes:'):]
if not transclusionPageTitle:
- transclusionPageTitle = wikipedia.input(
+ transclusionPageTitle = pywikibot.input(
u'Pages that transclude which page should be processed?')
- transclusionPage = wikipedia.Page(site,
+ transclusionPage = pywikibot.Page(site,
"%s:%s" % (site.namespace(10),
transclusionPageTitle))
gen = ReferringPageGenerator(transclusionPage,
@@ -1094,35 +1092,35 @@
elif arg.startswith('-gorandom'):
for firstPage in RandomPageGenerator(number = 1):
firstPageTitle = firstPage.title()
- namespace = wikipedia.Page(site, firstPageTitle).namespace()
- firstPageTitle = wikipedia.Page(site,
+ namespace = pywikibot.Page(site, firstPageTitle).namespace()
+ firstPageTitle = pywikibot.Page(site,
firstPageTitle).titleWithoutNamespace()
gen = AllpagesPageGenerator(firstPageTitle, namespace,
includeredirects=False)
elif arg.startswith('-start'):
if arg.startswith('-startxml'):
- wikipedia.output(u'-startxml : wrong parameter')
+ pywikibot.output(u'-startxml : wrong parameter')
sys.exit()
firstPageTitle = arg[7:]
if not firstPageTitle:
- firstPageTitle = wikipedia.input(
+ firstPageTitle = pywikibot.input(
u'At which page do you want to start?')
if self.namespaces != []:
namespace = self.namespaces[0]
else:
- namespace = wikipedia.Page(site, firstPageTitle).namespace()
+ namespace = pywikibot.Page(site, firstPageTitle).namespace()
- firstPageTitle = wikipedia.Page(site,
+ firstPageTitle = pywikibot.Page(site,
firstPageTitle).titleWithoutNamespace()
gen = AllpagesPageGenerator(firstPageTitle, namespace,
includeredirects=False)
elif arg.startswith('-redirectonly'):
firstPageTitle = arg[14:]
if not firstPageTitle:
- firstPageTitle = wikipedia.input(
+ firstPageTitle = pywikibot.input(
u'At which page do you want to start?')
- namespace = wikipedia.Page(site, firstPageTitle).namespace()
- firstPageTitle = wikipedia.Page(site,
+ namespace = pywikibot.Page(site, firstPageTitle).namespace()
+ firstPageTitle = pywikibot.Page(site,
firstPageTitle).titleWithoutNamespace()
gen = AllpagesPageGenerator(firstPageTitle, namespace,
includeredirects='only')
@@ -1130,11 +1128,11 @@
prefix = arg[13:]
namespace = None
if not prefix:
- prefix = wikipedia.input(
+ prefix = pywikibot.input(
u'What page names are you looking for?')
gen = PrefixingPageGenerator(prefix = prefix)
elif arg.startswith('-newimages'):
- limit = arg[11:] or wikipedia.input(
+ limit = arg[11:] or pywikibot.input(
u'How many images do you want to load?')
gen = NewimagesPageGenerator(number = int(limit))
elif arg.startswith('-new'):
@@ -1145,14 +1143,14 @@
elif arg.startswith('-imagelinks'):
imagelinkstitle = arg[len('-imagelinks:'):]
if not imagelinkstitle:
- imagelinkstitle = wikipedia.input(
+ imagelinkstitle = pywikibot.input(
u'Images on which page should be processed?')
- imagelinksPage = wikipedia.Page(site, imagelinkstitle)
+ imagelinksPage = pywikibot.Page(site, imagelinkstitle)
gen = ImagesPageGenerator(imagelinksPage)
elif arg.startswith('-search'):
mediawikiQuery = arg[8:]
if not mediawikiQuery:
- mediawikiQuery = wikipedia.input(
+ mediawikiQuery = pywikibot.input(
u'What do you want to search for?')
# In order to be useful, all namespaces are required
gen = SearchPageGenerator(mediawikiQuery, namespaces = [])
@@ -1160,7 +1158,7 @@
gen = GoogleSearchPageGenerator(arg[8:])
elif arg.startswith('-titleregex'):
if len(arg) == 11:
- regex = wikipedia.input(u'What page names are you looking for?')
+ regex = pywikibot.input(u'What page names are you looking for?')
else:
regex = arg[12:]
gen = RegexFilterPageGenerator(site.allpages(), regex)
@@ -1177,16 +1175,16 @@
if __name__ == "__main__":
try:
genFactory = GeneratorFactory()
- for arg in wikipedia.handleArgs():
+ for arg in pywikibot.handleArgs():
if not genFactory.handleArg(arg):
- wikipedia.showHelp('pagegenerators')
+ pywikibot.showHelp('pagegenerators')
break
else:
gen = genFactory.getCombinedGenerator()
if gen:
for page in gen:
- wikipedia.output(page.title(), toStdout = True)
+ pywikibot.output(page.title(), toStdout = True)
else:
- wikipedia.showHelp('pagegenerators')
+ pywikibot.showHelp('pagegenerators')
finally:
- wikipedia.stopme()
+ pywikibot.stopme()
Revision: 7874
Author: xqt
Date: 2010-01-13 07:15:42 +0000 (Wed, 13 Jan 2010)
Log Message:
-----------
Update from rewrite
Modified Paths:
--------------
trunk/pywikipedia/date.py
trunk/pywikipedia/families/mac_wikia_family.py
trunk/pywikipedia/families/meta_family.py
trunk/pywikipedia/families/strategy_family.py
trunk/pywikipedia/families/wikipedia_family.py
trunk/pywikipedia/families/wowwiki_family.py
trunk/pywikipedia/family.py
trunk/pywikipedia/login.py
trunk/pywikipedia/pagegenerators.py
trunk/pywikipedia/userlib.py
Modified: trunk/pywikipedia/date.py
===================================================================
--- trunk/pywikipedia/date.py 2010-01-13 07:10:29 UTC (rev 7873)
+++ trunk/pywikipedia/date.py 2010-01-13 07:15:42 UTC (rev 7874)
@@ -19,7 +19,6 @@
import re
import wikipedia
-
#
# Different collections of well known formats
#
@@ -147,7 +146,6 @@
return dh_noConv( value, pattern, formatLimits['MillenniumBC'][0] )
-
def decSinglVal( v ):
return v[0]
@@ -199,34 +197,34 @@
to accept all other values"""
return True
-def monthName(lang,ind):
+def monthName(lang, ind):
return formats['MonthName'][lang](ind)
# Helper for KN: digits representation
-_knDigits=u'೦೧೨೩೪೫೬೭೮೯'
-_knDigitsToLocal=dict([(ord(unicode(i)), _knDigits[i]) for i in range(10)])
-_knLocalToDigits=dict([(ord(_knDigits[i]), unicode(i)) for i in range(10)])
+_knDigits = u'೦೧೨೩೪೫೬೭೮೯'
+_knDigitsToLocal = dict([(ord(unicode(i)), _knDigits[i]) for i in range(10)])
+_knLocalToDigits = dict([(ord(_knDigits[i]), unicode(i)) for i in range(10)])
# Helper for Urdu/Persian languages
-_faDigits=u'۰۱۲۳۴۵۶۷۸۹'
-_faDigitsToLocal=dict([(ord(unicode(i)), _faDigits[i]) for i in range(10)])
-_faLocalToDigits=dict([(ord(_faDigits[i]), unicode(i)) for i in range(10)])
+_faDigits = u'۰۱۲۳۴۵۶۷۸۹'
+_faDigitsToLocal = dict([(ord(unicode(i)), _faDigits[i]) for i in range(10)])
+_faLocalToDigits = dict([(ord(_faDigits[i]), unicode(i)) for i in range(10)])
# Helper for HI:, MR:
-_hiDigits=u'०१२३४५६७८९'
-_hiDigitsToLocal=dict([(ord(unicode(i)), _hiDigits[i]) for i in range(10)])
-_hiLocalToDigits=dict([(ord(_hiDigits[i]), unicode(i)) for i in range(10)])
+_hiDigits = u'०१२३४५६७८९'
+_hiDigitsToLocal = dict([(ord(unicode(i)), _hiDigits[i]) for i in range(10)])
+_hiLocalToDigits = dict([(ord(_hiDigits[i]), unicode(i)) for i in range(10)])
# Helper for BN:
-_bnDigits=u'০১২৩৪৫৬৭৮৯'
-_bnDigitsToLocal=dict([(ord(unicode(i)), _bnDigits[i]) for i in range(10)])
-_bnLocalToDigits=dict([(ord(_bnDigits[i]), unicode(i)) for i in range(10)])
+_bnDigits = u'০১২৩৪৫৬৭৮৯'
+_bnDigitsToLocal = dict([(ord(unicode(i)), _bnDigits[i]) for i in range(10)])
+_bnLocalToDigits = dict([(ord(_bnDigits[i]), unicode(i)) for i in range(10)])
# Helper for GU:
-_guDigits=u'૦૧૨૩૪૫૬૭૮૯'
-_guDigitsToLocal=dict([(ord(unicode(i)), _guDigits[i]) for i in range(10)])
-_guLocalToDigits=dict([(ord(_guDigits[i]), unicode(i)) for i in range(10)])
+_guDigits = u'૦૧૨૩૪૫૬૭૮૯'
+_guDigitsToLocal = dict([(ord(unicode(i)), _guDigits[i]) for i in range(10)])
+_guLocalToDigits = dict([(ord(_guDigits[i]), unicode(i)) for i in range(10)])
def intToLocalDigitsStr( value, digitsToLocalDict ):
# Encode an integer value into a textual form.
@@ -279,7 +277,7 @@
'T' : ( _decimalDigits, lambda v: unicode(v+543), lambda v: int(v)-543 ),
}
-# Allows to search for '(%%)|(%d)|(%R)|...", and allows one digit 1-9 too set the size of zero-padding for numbers
+# Allows to search for '(%%)|(%d)|(%R)|...", and allows one digit 1-9 to set the size of zero-padding for numbers
_reParameters = re.compile(u'|'.join([ u'(%%[1-9]?%s)' % s for s in _digitDecoders.keys() ]))
# A map of sitecode+pattern to (re matching object and corresponding decoders)
@@ -301,7 +299,7 @@
for s in _reParameters.split(pattern):
if s is None:
pass
- elif len(s) in [2,3] and s[0]=='%' and s[-1] in _digitDecoders and (len(s)==2 or s[1] in _decimalDigits):
+ elif len(s) in [2, 3] and s[0] == '%' and s[-1] in _digitDecoders and (len(s) == 2 or s[1] in _decimalDigits):
# Must match a "%2d" or "%d" style
dec = _digitDecoders[s[-1]]
if type(dec) in _stringTypes:
@@ -741,7 +739,7 @@
'ca' : lambda m: multi( m, [
(lambda v: dh_decAD( v, u'Dècada de %d' ), lambda p: p == 1970),
(lambda v: dh_decAD( v, u'Dècada del %d' ), alwaysTrue)]),
-
+
#1970s => '1970-1979'
'cs' : lambda m: multi( m, [
(lambda v: dh_constVal( v, 1, u'1-9'), lambda p: p == 1),
@@ -1033,29 +1031,29 @@
(lambda v: dh_centuryBC( v, u'%dde eeu v.C.' ), alwaysTrue)]),
'bg' : lambda v: dh_centuryBC( v, u'%d век пр.н.е.' ),
'br' : lambda m: multi( m, [
- (lambda v: dh_constVal( v, 1, u'Iañ kantved kt JK'), lambda p: p == 1),
- (lambda v: dh_constVal( v, 2, u'Eil kantved kt JK'), lambda p: p == 2),
- (lambda v: dh_centuryBC( v, u'%Re kantved kt JK'), lambda p: p in [2,3]),
- (lambda v: dh_centuryBC( v, u'%Rvet kantved kt JK'), alwaysTrue)]),
+ (lambda v: dh_constVal( v, 1, u'Iañ kantved kt JK'), lambda p: p == 1),
+ (lambda v: dh_constVal( v, 2, u'Eil kantved kt JK'), lambda p: p == 2),
+ (lambda v: dh_centuryBC( v, u'%Re kantved kt JK'), lambda p: p in [2,3]),
+ (lambda v: dh_centuryBC( v, u'%Rvet kantved kt JK'), alwaysTrue)]),
'ca' : lambda v: dh_centuryBC( v, u'Segle %R aC' ),
'cs' : lambda v: dh_centuryBC( v, u'%d. století př. n. l.' ),
'da' : lambda v: dh_centuryBC( v, u'%d. århundrede f.Kr.' ),
'de' : lambda v: dh_centuryBC( v, u'%d. Jahrhundert v. Chr.' ),
'el' : lambda v: dh_centuryBC( v, u'%dος αιώνας π.Χ.' ),
'en' : lambda m: multi( m, [
- (lambda v: dh_centuryBC( v, u'%dst century BC' ), lambda p: p == 1 or (p > 20 and p%10 == 1)),
- (lambda v: dh_centuryBC( v, u'%dnd century BC' ), lambda p: p == 2 or (p > 20 and p%10 == 2)),
- (lambda v: dh_centuryBC( v, u'%drd century BC' ), lambda p: p == 3 or (p > 20 and p%10 == 3)),
- (lambda v: dh_centuryBC( v, u'%dth century BC' ), alwaysTrue)]),
+ (lambda v: dh_centuryBC( v, u'%dst century BC' ), lambda p: p == 1 or (p > 20 and p%10 == 1)),
+ (lambda v: dh_centuryBC( v, u'%dnd century BC' ), lambda p: p == 2 or (p > 20 and p%10 == 2)),
+ (lambda v: dh_centuryBC( v, u'%drd century BC' ), lambda p: p == 3 or (p > 20 and p%10 == 3)),
+ (lambda v: dh_centuryBC( v, u'%dth century BC' ), alwaysTrue)]),
'eo' : lambda v: dh_centuryBC( v, u'%d-a jarcento a.K.' ),
'es' : lambda v: dh_centuryBC( v, u'Siglo %R adC' ),
'et' : lambda v: dh_centuryBC( v, u'%d. aastatuhat eKr' ),
'fi' : lambda m: multi( m, [
- (lambda v: dh_constVal( v, 1, u'Ensimmäinen vuosisata eaa.'), lambda p: p == 1),
- (lambda v: dh( v, u'%d00-luku eaa.', lambda i: i-1, lambda ii: ii[0]+1 ), alwaysTrue)]),
+ (lambda v: dh_constVal( v, 1, u'Ensimmäinen vuosisata eaa.'), lambda p: p == 1),
+ (lambda v: dh( v, u'%d00-luku eaa.', lambda i: i-1, lambda ii: ii[0]+1 ), alwaysTrue)]),
'fr' : lambda m: multi( m, [
- (lambda v: dh_centuryBC( v, u'%Rer siècle av. J.-C.' ), lambda p: p == 1),
- (lambda v: dh_centuryBC( v, u'%Re siècle av. J.-C.' ), alwaysTrue)]),
+ (lambda v: dh_centuryBC( v, u'%Rer siècle av. J.-C.' ), lambda p: p == 1),
+ (lambda v: dh_centuryBC( v, u'%Re siècle av. J.-C.' ), alwaysTrue)]),
'he' : lambda v: dh_centuryBC( v, u'המאה ה־%d לפני הספירה' ),
'hr' : lambda v: dh_centuryBC( v, u'%d. stoljeće p.n.e.' ),
'id' : lambda v: dh_centuryBC( v, u'Abad ke-%d SM' ),
@@ -1491,21 +1489,21 @@
# In addition, tuple contains start, end, and step values that will be used to test the formats table for internal consistency.
#
formatLimits = {
- 'MonthName' : (lambda v: 1<=v and v<13, 1,13),
- 'Number' : (lambda v: 0<=v and v<1000000, 0,1001),
+ 'MonthName' : (lambda v: 1 <=v and v < 13, 1, 13),
+ 'Number' : (lambda v: 0 <=v and v < 1000000, 0, 1001),
- 'YearAD' : (lambda v: 0<=v and v<2501, 0,2501),
- 'YearBC' : (lambda v: 0<=v and v<4001, 0,501), # zh: has years as old as 前1700年
- 'DecadeAD' : (lambda v: 0<=v and v<2501, 0,2501), # At some point need to re-add "and v%10==0" to the limitation
- 'DecadeBC' : (lambda v: 0<=v and v<4001, 0,501), # zh: has decades as old as 前1700年代
- 'CenturyAD' : (lambda v: 1<=v and v<41, 1,23), # Some centuries use Roman numerals or a given list - do not exceed them in testing
- 'CenturyBC' : (lambda v: 1<=v and v<91, 1,23), # Some centuries use Roman numerals or a given list - do not exceed them in testing
- 'MillenniumAD' : (lambda v: 1<=v and v<6, 1,4), # For milleniums, only test first 3 AD Milleniums,
- 'MillenniumBC' : (lambda v: 1<=v and v<20, 1,2), # And only 1 BC Millenium
- 'CenturyAD_Cat' : (lambda v: 1<=v and v<41, 1,23), # Some centuries use Roman numerals or a given list - do not exceed them in testing
- 'CenturyBC_Cat' : (lambda v: 1<=v and v<41, 1,23), # Some centuries use Roman numerals or a given list - do not exceed them in testing
- 'Cat_Year_MusicAlbums' : (lambda v: 1950<=v and v<2021, 1950,2021),
- 'CurrEvents' : (lambda v: 0<=v and v<1, 0,1),
+ 'YearAD' : (lambda v: 0 <=v and v < 2501, 0, 2501),
+ 'YearBC' : (lambda v: 0 <=v and v < 4001, 0, 501), # zh: has years as old as 前1700年
+ 'DecadeAD' : (lambda v: 0 <=v and v < 2501, 0, 2501), # At some point need to re-add "and v%10==0" to the limitation
+ 'DecadeBC' : (lambda v: 0 <=v and v < 4001, 0, 501), # zh: has decades as old as 前1700年代
+ 'CenturyAD' : (lambda v: 1 <=v and v < 41, 1, 23), # Some centuries use Roman numerals or a given list - do not exceed them in testing
+ 'CenturyBC' : (lambda v: 1 <=v and v < 91, 1, 23), # Some centuries use Roman numerals or a given list - do not exceed them in testing
+ 'MillenniumAD' : (lambda v: 1 <=v and v < 6, 1, 4), # For milleniums, only test first 3 AD Milleniums,
+ 'MillenniumBC' : (lambda v: 1 <=v and v < 20, 1, 2), # And only 1 BC Millenium
+ 'CenturyAD_Cat' : (lambda v: 1 <=v and v < 41, 1, 23), # Some centuries use Roman numerals or a given list - do not exceed them in testing
+ 'CenturyBC_Cat' : (lambda v: 1 <=v and v < 41, 1, 23), # Some centuries use Roman numerals or a given list - do not exceed them in testing
+ 'Cat_Year_MusicAlbums' : (lambda v: 1950 <= v and v < 2021, 1950, 2021),
+ 'CurrEvents' : (lambda v: 0 <= v and v < 1, 0, 1),
}
# All month of year articles are in the same format
@@ -1517,7 +1515,7 @@
_formatLimit_DayOfMonth30 = (lambda v: 1 <= v and v < 31, 1, 31)
_formatLimit_DayOfMonth29 = (lambda v: 1 <= v and v < 30, 1, 30)
for monthId in range(12):
- if (monthId+1) in [1,3,5,7,8,10,12]:
+ if (monthId + 1) in [1, 3, 5, 7, 8, 10, 12]:
formatLimits[dayMnthFmts[monthId]] = _formatLimit_DayOfMonth31 # 31 days a month
elif (monthId+1) == 2: # February
formatLimits[dayMnthFmts[monthId]] = _formatLimit_DayOfMonth29 # 29 days a month
@@ -1567,8 +1565,6 @@
else:
return formats['YearAD'][lang](year)
-
-
#
#
# Map testing methods
@@ -1603,7 +1599,7 @@
for code, convFunc in formats[formatName].iteritems():
# import time
# startClock = time.clock()
- for value in range(start,stop,step):
+ for value in range(start, stop, step):
try:
if not predicate(value):
raise AssertionError(" Not a valid value for this format.")
@@ -1618,11 +1614,14 @@
# wikipedia.output( u"%s\t%s\t%f" % (formatName, code, time.clock() - startClock) )
def test(quick = False, showAll = False):
- """This is a test function, to be used interactivelly to test entire format convesion map at once
+ """This is a test function, to be used interactively to test entire
+ format conversion map at once
+
Usage example:
run python interpreter
>>> import date
>>> date.test()
+
"""
for formatName in formats.keys():
Modified: trunk/pywikipedia/families/mac_wikia_family.py
===================================================================
--- trunk/pywikipedia/families/mac_wikia_family.py 2010-01-13 07:10:29 UTC (rev 7873)
+++ trunk/pywikipedia/families/mac_wikia_family.py 2010-01-13 07:15:42 UTC (rev 7874)
@@ -76,7 +76,7 @@
self.languages_by_size = ['en','de']
def version(self, code):
- return "1.10alpha"
+ return "1.14"
def scriptpath(self, code):
return ''
Modified: trunk/pywikipedia/families/meta_family.py
===================================================================
--- trunk/pywikipedia/families/meta_family.py 2010-01-13 07:10:29 UTC (rev 7873)
+++ trunk/pywikipedia/families/meta_family.py 2010-01-13 07:15:42 UTC (rev 7874)
@@ -133,7 +133,6 @@
'mediawiki', 'test', 'incubator', 'commons', 'species',
]
-
def version(self,code):
return '1.16alpha-wmf'
Modified: trunk/pywikipedia/families/strategy_family.py
===================================================================
--- trunk/pywikipedia/families/strategy_family.py 2010-01-13 07:10:29 UTC (rev 7873)
+++ trunk/pywikipedia/families/strategy_family.py 2010-01-13 07:15:42 UTC (rev 7874)
@@ -31,7 +31,6 @@
self.interwiki_forward = 'wikipedia'
-
def version(self, code):
return '1.16alpha-wmf'
Modified: trunk/pywikipedia/families/wikipedia_family.py
===================================================================
--- trunk/pywikipedia/families/wikipedia_family.py 2010-01-13 07:10:29 UTC (rev 7873)
+++ trunk/pywikipedia/families/wikipedia_family.py 2010-01-13 07:15:42 UTC (rev 7874)
@@ -1175,7 +1175,7 @@
def get_known_families(self, site):
# In Swedish Wikipedia 's:' is part of page title not a family
# prefix for 'wikisource'.
- if site.lang == 'sv':
+ if site.language() == 'sv':
d = self.known_families.copy()
d.pop('s') ; d['src'] = 'wikisource'
return d
Modified: trunk/pywikipedia/families/wowwiki_family.py
===================================================================
--- trunk/pywikipedia/families/wowwiki_family.py 2010-01-13 07:10:29 UTC (rev 7873)
+++ trunk/pywikipedia/families/wowwiki_family.py 2010-01-13 07:15:42 UTC (rev 7874)
@@ -28,8 +28,8 @@
'nl': 'nl.wow.wikia.com',
'no': 'no.wow.wikia.com',
'pl': 'pl.wow.wikia.com',
+ 'pt': 'pt.wow.wikia.com',
'pt-br': 'pt-br.wow.wikia.com',
- 'pt': 'pt.wow.wikia.com',
'ro': 'ro.wow.wikia.com',
'ru': 'ru.wow.wikia.com',
'sk': 'sk.wow.wikia.com',
Modified: trunk/pywikipedia/family.py
===================================================================
--- trunk/pywikipedia/family.py 2010-01-13 07:10:29 UTC (rev 7873)
+++ trunk/pywikipedia/family.py 2010-01-13 07:15:42 UTC (rev 7874)
@@ -2677,7 +2677,8 @@
},
}
- # letters that can follow a wikilink and are regarded as part of this link
+ # letters that can follow a wikilink and are regarded as part
+ # of this link
# This depends on the linktrail setting in LanguageXx.php and on
# [[MediaWiki:Linktrail]].
# Note: this is a regular expression.
@@ -2710,6 +2711,8 @@
# for line in f.readlines():
# s = line[:line.index('\t')]
# print ((" '%s':" % s).ljust(20) + ("'%s'," % s))
+
+ # TODO: replace this with API interwikimap call
self.known_families = {
'abbenormal': 'abbenormal',
'aboutccc': 'aboutccc',
@@ -3157,7 +3160,7 @@
# a list of languages. If there are at least the number of interwiki
# links, all languages in the list should be placed at the front as
# well as in the normal list.
- self.interwiki_putfirst_doubled = {}
+ self.interwiki_putfirst_doubled = {} # THIS APPEARS TO BE UNUSED!
# Some families, e. g. commons and meta, are not multilingual and
# forward interlanguage links to another family (wikipedia).
@@ -3274,7 +3277,9 @@
elif fallback:
return self.linktrails[fallback]
else:
- raise KeyError('ERROR: linktrail in language %s unknown' % code)
+ raise KeyError(
+ "ERROR: linktrail in language %s unknown"
+ % code)
def namespace(self, code, ns_number, fallback = '_default', all = False):
if not self.isDefinedNS(ns_number):
@@ -3408,7 +3413,7 @@
return self.disambiguationTemplates[fallback]
else:
raise KeyError(
- 'ERROR: title for disambig template in language %s unknown'
+ "ERROR: title for disambig template in language %s unknown"
% code)
# Returns the title of the special namespace in language 'code', taken from
@@ -3912,6 +3917,16 @@
wiki"""
return self.code2encoding(code),
+ # aliases
+ def encoding(self, code):
+ """Return the encoding for a specific language wiki"""
+ return self.code2encoding(code)
+
+ def encodings(self, code):
+ """Return a list of historical encodings for a specific language
+ wiki"""
+ return self.code2encodings(code)
+
def __cmp__(self, otherfamily):
try:
return cmp(self.name, otherfamily.name)
@@ -3921,6 +3936,9 @@
def __hash__(self):
return hash(self.name)
+ def __repr__(self):
+ return 'Family("%s")' % self.name
+
def RversionTab(self, code):
"""Change this to some regular expression that shows the page we
found is an existing page, in case the normal regexp does not work."""
@@ -3940,7 +3958,7 @@
return datetime.utcnow() + self.servergmtoffset
def isPublic(self):
- """Does the wiki require logging in before viewing it ?"""
+ """Does the wiki require logging in before viewing it?"""
return True
def post_get_convert(self, site, getText):
Modified: trunk/pywikipedia/login.py
===================================================================
--- trunk/pywikipedia/login.py 2010-01-13 07:10:29 UTC (rev 7873)
+++ trunk/pywikipedia/login.py 2010-01-13 07:15:42 UTC (rev 7874)
@@ -1,11 +1,10 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
-
"""
Script to log the robot in to a wiki account.
Suggestion is to make a special account to use for robot use only. Make
-sure this robot account is well known on your home wikipedia before using.
+sure this robot account is well known on your home wiki before using.
Parameters:
@@ -37,19 +36,20 @@
check the output. Using -log is recommended: this will output a
lot of data
-If not given as parameter, the script will ask for your username and password
-(password entry will be hidden), log in to your home wiki using this
-combination, and store the resulting cookies (containing your password hash,
+If not given as parameter, the script will ask for your username and
+password (password entry will be hidden), log in to your home wiki using
+this combination, and store the resulting cookies (containing your password
so keep it secured!) in a file in the login-data subdirectory.
-All scripts in this library will be looking for this cookie file and will use
-the login information if it is present.
+All scripts in this library will be looking for this cookie file and will
+use the login information if it is present.
To log out, throw away the XX-login.data file that is created in the login-data
subdirectory.
"""
#
# (C) Rob W.W. Hooft, 2003
+# (C) Pywikipedia bot team, 2003-2010
#
# Distributed under the terms of the MIT license.
#
@@ -80,11 +80,11 @@
wikipedia.output(u"You are not logged in on %s." % repr(mysite))
class LoginManager:
- def __init__(self, password = None, sysop = False, site = None, username=None, verbose=False):
+ def __init__(self, password=None, sysop=False, site=None, username=None, verbose=False):
self.site = site or wikipedia.getSite()
self.sysop = sysop
if username:
- self.username=username
+ self.username = username
# perform writeback.
if site.family.name not in config.usernames:
config.usernames[site.family.name]={}
@@ -92,7 +92,8 @@
else:
if sysop:
try:
- self.username = config.sysopnames[self.site.family.name][self.site.lang]
+ self.username = config.sysopnames\
+ [self.site.family.name][self.site.lang]
except:
raise wikipedia.NoUsername(u'ERROR: Sysop username for %s:%s is undefined.\nIf you have a sysop account for that site, please add such a line to user-config.py:\n\nsysopnames[\'%s\'][\'%s\'] = \'myUsername\'' % (self.site.family.name, self.site.lang, self.site.family.name, self.site.lang))
else:
@@ -110,7 +111,8 @@
Checks whether the bot is listed on a specific page to comply with
the policy on the respective wiki.
"""
- if self.site.family.name in botList and self.site.language() in botList[self.site.family.name]:
+ if self.site.family.name in botList \
+ and self.site.language() in botList[self.site.family.name]:
botListPageTitle = wikipedia.translate(self.site.language(), botList)
botListPage = wikipedia.Page(self.site, botListPageTitle)
for linkedPage in botListPage.linkedPages():
@@ -225,11 +227,12 @@
def storecookiedata(self, filename, data):
"""
- Stores cookie data.
+ Store cookie data.
The argument data is the raw data, as returned by getCookie().
- Returns nothing."""
+ Returns nothing.
+ """
s = u''
for v, k in data.iteritems():
s += "%s=%s\n" % (v, k)
@@ -239,8 +242,10 @@
def readPassword(self):
"""
- Reads passwords from a file. DO NOT FORGET TO REMOVE READ
- ACCESS FOR OTHER USERS!!! Use chmod 600 password-file.
+ Read passwords from a file.
+
+ DO NOT FORGET TO REMOVE READ ACCESS FOR OTHER USERS!!!
+ Use chmod 600 password-file.
All lines below should be valid Python tuples in the form
(code, family, username, password) or (username, password)
to set a default password for an username. Default usernames
@@ -252,8 +257,8 @@
("my_sysop_user", "my_sysop_password")
("en", "wikipedia", "my_en_user", "my_en_pass")
"""
- file = open(wikipedia.config.datafilepath(config.password_file), 'r')
- for line in file:
+ password_f = open(wikipedia.config.datafilepath(config.password_file), 'r')
+ for line in password_f:
if not line.strip(): continue
entry = eval(line)
if len(entry) == 2: #for default userinfo
@@ -263,13 +268,16 @@
entry[1] == self.site.family.name and \
entry[2] == self.username:
self.password = entry[3]
- file.close()
+ password_f.close()
def login(self, api = config.use_api_login, retry = False):
if not self.password:
# As we don't want the password to appear on the screen, we set
# password = True
- self.password = wikipedia.input(u'Password for user %s on %s:' % (self.username, self.site), password = True)
+ self.password = wikipedia.input(
+ u'Password for user %s on %s:'
+ % (self.username, self.site),
+ password = True)
self.password = self.password.encode(self.site.encoding())
@@ -335,7 +343,8 @@
for arg in wikipedia.handleArgs():
if arg.startswith("-pass"):
if len(arg) == 5:
- password = wikipedia.input(u'Password for all accounts:', password = True)
+ password = wikipedia.input(u'Password for all accounts:',
+ password = True)
else:
password = arg[6:]
elif arg == "-clean":
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2010-01-13 07:10:29 UTC (rev 7873)
+++ trunk/pywikipedia/pagegenerators.py 2010-01-13 07:15:42 UTC (rev 7874)
@@ -15,26 +15,26 @@
"""
__version__='$Id$'
-parameterHelp = """\
+parameterHelp = u"""\
-cat Work on all pages which are in a specific category.
Argument can also be given as "-cat:categoryname" or
- as "-cat:categoryname#fromtitle" (using | instead of #
+ as "-cat:categoryname|fromtitle" (using # instead of |
is also allowed in this one and the following)
-catr Like -cat, but also recursively includes pages in
subcategories, sub-subcategories etc. of the
given category.
Argument can also be given as "-catr:categoryname" or
- as "-catr:categoryname#fromtitle".
+ as "-catr:categoryname|fromtitle".
-subcats Work on all subcategories of a specific category.
Argument can also be given as "-subcats:categoryname" or
- as "-subcats:categoryname#fromtitle".
+ as "-subcats:categoryname|fromtitle".
-subcatsr Like -subcats, but also includes sub-subcategories etc. of
the given category.
Argument can also be given as "-subcatsr:categoryname" or
- as "-subcatsr:categoryname#fromtitle".
+ as "-subcatsr:categoryname|fromtitle".
-uncat Work on all pages which are not categorised.
@@ -50,22 +50,12 @@
-filelinks Work on all pages that use a certain image/media file.
Argument can also be given as "-filelinks:filename".
--yahoo Work on all pages that are found in a Yahoo search.
- Depends on python module pYsearch. See yahoo_appid in
- config.py for instructions.
-
-search Work on all pages that are found in a MediaWiki search
across all namespaces.
--google Work on all pages that are found in a Google search.
- You need a Google Web API license key. Note that Google
- doesn't give out license keys anymore. See google_key in
- config.py for instructions.
- Argument can also be given as "-google:searchstring".
-
--namespace Filters the page generator to only yield pages in the
+-namespace Filter the page generator to only yield pages in the
specified namespaces. Separate multiple namespace
- numbers with commas.
+ numbers with commas. Example "-namespace:0,2,4"
-interwiki Work on the given page and all equivalent pages in other
languages. This can, for example, be used to fight
@@ -77,15 +67,20 @@
-links Work on all pages that are linked from a certain page.
Argument can also be given as "-links:linkingpagetitle".
--new Work on the 60 newest pages. If given as -new:x, will work
- on the x newest pages.
-
-imagelinks Work on all images that are linked from a certain page.
Argument can also be given as "-imagelinks:linkingpagetitle".
-newimages Work on the 100 newest images. If given as -newimages:x,
will work on the x newest images.
+-new Work on the 60 recent new pages. If given as -new:x,
+ will work on the x newest pages.
+
+-recentchanges Work on new and edited pages returned by
+ [[Special:Recentchanges]]. Can also be given as
+ "-recentchanges:n" where n is the number of pages to be
+ returned, else 100 pages are returned.
+
-ref Work on all pages that link to a certain page.
Argument can also be given as "-ref:referredpagetitle".
@@ -135,11 +130,6 @@
-gorandom Specifies that the robot should starting at the random pages
returned by [[Special:Random]].
--recentchanges Work on new and edited pages returned by
- [[Special:Recentchanges]]. Can also be given as
- "-recentchanges:n" where n is the number of pages to be
- returned, else 100 pages are returned.
-
-redirectonly Work on redirect pages only, not their target pages.
The robot goes alphabetically through all redirect pages
on the wiki, starting at the named page. The
@@ -148,15 +138,24 @@
You can also include a namespace. For example,
"-redirectonly:Template:!" will make the bot work on
all redirect pages in the template namespace.
+
+-google Work on all pages that are found in a Google search.
+ You need a Google Web API license key. Note that Google
+ doesn't give out license keys anymore. See google_key in
+ config.py for instructions.
+ Argument can also be given as "-google:searchstring".
+
+-yahoo Work on all pages that are found in a Yahoo search.
+ Depends on python module pYsearch. See yahoo_appid in
+ config.py for instructions.
"""
+docuReplacements = {'¶ms;': parameterHelp}
-docuReplacements = {
- '¶ms;': parameterHelp
-}
+
# Standard library imports
import re, codecs, sys
import threading, Queue, traceback
@@ -309,9 +308,10 @@
yield page
def InterwikiPageGenerator(page):
+ """Iterator over all interwiki (non-language) links on a page."""
yield page
- for iwPage in page.interwiki():
- yield iwPage
+ for link in page.interwiki():
+ yield link
def ReferringPageGenerator(referredPage, followRedirects=False,
withTemplateInclusion=True,
@@ -333,9 +333,10 @@
If start is a string value, only pages whose title comes after start
alphabetically are included.
'''
- for page in category.articles(recurse = recurse, startFrom = start):
- if page.title() >= start:
- yield page
+ # TODO: page generator could be modified to use cmstartsortkey ...
+ for a in category.articles(recurse = recurse, startFrom = start):
+ if start is None or a.title() >= start:
+ yield a
def SubCategoriesPageGenerator(category, recurse=False, start=None):
'''
@@ -345,9 +346,12 @@
recurse is an int, only subcategories to that depth will be included
(e.g., recurse=2 will get pages in subcats and sub-subcats, but will
not go any further).
+ If start is a string value, only categories whose sortkey comes after
+ start alphabetically are included.
'''
- for page in category.subcategories(recurse = recurse, startFrom = start):
- yield page
+ # TODO: page generator could be modified to use cmstartsortkey ...
+ for s in category.subcategories(recurse = recurse, startFrom = start):
+ yield s
def UnCategorizedCategoryGenerator(number = 100, repeat = False, site = None):
if site is None:
@@ -461,8 +465,8 @@
yield wikipedia.Page(site, title)
f.close()
-def PagesFromTitlesGenerator(iterable, site = None):
- """Generates pages from the titles (unicode strings) yielded by iterable"""
+def PagesFromTitlesGenerator(iterable, site=None):
+ """Generate pages from the titles (unicode strings) yielded by iterable."""
if site is None:
site = wikipedia.getSite()
for title in iterable:
Modified: trunk/pywikipedia/userlib.py
===================================================================
--- trunk/pywikipedia/userlib.py 2010-01-13 07:10:29 UTC (rev 7873)
+++ trunk/pywikipedia/userlib.py 2010-01-13 07:15:42 UTC (rev 7874)
@@ -2,6 +2,11 @@
"""
Library to work with users, their pages and talk pages.
"""
+#
+# (C) Pywikipedia bot team, 2008-2010
+#
+# Distributed under the terms of the MIT license.
+#
__version__ = '$Id$'
import re