Revision: 8801
Author: xqt
Date: 2010-12-26 14:14:49 +0000 (Sun, 26 Dec 2010)
Log Message:
-----------
enable unlimit mediawiki search but limit all generators by -limit:n
Modified Paths:
--------------
trunk/pywikipedia/pagegenerators.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2010-12-26 12:30:13 UTC (rev 8800)
+++ trunk/pywikipedia/pagegenerators.py 2010-12-26 14:14:49 UTC (rev 8801)
@@ -84,6 +84,9 @@
pages on several wiki sites, this is not well tested,
so check your edits!
+-limit:n When used with any other argument that specifies a set
+ of pages, work on no more than n pages in total
+
-links Work on all pages that are linked from a certain page.
Argument can also be given as "-links:linkingpagetitle".
@@ -298,6 +301,7 @@
def __init__(self):
self.gens = []
self.namespaces = []
+ self.limit = None
def getCombinedGenerator(self, gen=None):
"""Returns the combination of all accumulated generators,
@@ -314,7 +318,7 @@
gensList = self.gens[0]
else:
gensList = CombinedPageGenerator(self.gens)
- genToReturn = DuplicateFilterPageGenerator(gensList)
+ genToReturn = DuplicateFilterPageGenerator(gensList, total=self.limit)
if (self.namespaces):
genToReturn = NamespaceFilterPageGenerator(genToReturn, map(int,
self.namespaces))
return genToReturn
@@ -443,6 +447,12 @@
else:
self.namespaces.extend(arg[len('-ns:'):].split(","))
return True
+ elif arg.startswith('-limit'):
+ if len(arg) == len('-limit'):
+ self.limit = int(pywikibot.input("What is the limit value?"))
+ else:
+ self.limit = int(arg[len('-limit:'):])
+ return True
elif arg.startswith('-catr'):
gen = self.getCategoryGen(arg, len('-catr'), recurse = True)
elif arg.startswith('-category'):
@@ -565,7 +575,7 @@
mediawikiQuery = pywikibot.input(
u'What do you want to search for?')
# In order to be useful, all namespaces are required
- gen = SearchPageGenerator(mediawikiQuery, namespaces = [])
+ gen = SearchPageGenerator(mediawikiQuery, number=None, namespaces=[])
elif arg.startswith('-google'):
gen = GoogleSearchPageGenerator(arg[8:])
elif arg.startswith('-titleregex'):
@@ -1148,16 +1158,21 @@
if not page.isRedirectPage():
yield page
-def DuplicateFilterPageGenerator(generator):
+def DuplicateFilterPageGenerator(generator, total=None):
"""
Wraps around another generator. Yields all pages, but prevents
duplicates.
"""
seenPages = dict()
+ count = 0
for page in generator:
_page = u"%s:%s:%s" % (page._site.family.name, page._site.lang,
page._title)
if _page not in seenPages:
seenPages[_page] = True
+ if total:
+ count += 1
+ if count > total:
+ break
yield page
def RegexFilterPageGenerator(generator, regex, inverse=False, ignore_namespace=True):
@@ -1309,8 +1324,10 @@
else:
gen = genFactory.getCombinedGenerator()
if gen:
+ i = 0
for page in gen:
- pywikibot.output(page.title(), toStdout = True)
+ i+=1
+ pywikibot.output("%s: %s" % (repr(i).rjust(4),
page.title()), toStdout = True)
else:
pywikibot.showHelp('pagegenerators')
finally:
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2010-12-26 12:30:13 UTC (rev 8800)
+++ trunk/pywikipedia/wikipedia.py 2010-12-26 14:14:49 UTC (rev 8801)
@@ -5938,7 +5938,7 @@
self._getUserDataOld(text, sysop = sysop, force = force)
- def search(self, key, number = 10, namespaces = None):
+ def search(self, key, number=10, namespaces=None):
"""
Yield search results for query.
Use API when enabled use_api and version >= 1.11,
@@ -5950,13 +5950,14 @@
'action': 'query',
'list': 'search',
'srsearch': key,
- 'srlimit': number
}
+ if number:
+ params['srlimit'] = number
if namespaces:
params['srnamespace'] = namespaces
offset = 0
- while offset < number:
+ while offset < number or not number:
params['sroffset'] = offset
data = query.GetData(params, self)['query']
if 'error' in data: