Revision: 5137
Author: btongminh
Date: 2008-03-16 20:02:16 +0000 (Sun, 16 Mar 2008)
Log Message:
-----------
Don't iterate on TTP or TSP if they are None. (patch by NicDumZ)
Modified Paths:
--------------
trunk/pywikipedia/blockpageschecker.py
Modified: trunk/pywikipedia/blockpageschecker.py
===================================================================
--- trunk/pywikipedia/blockpageschecker.py 2008-03-16 12:30:29 UTC (rev 5136)
+++ trunk/pywikipedia/blockpageschecker.py 2008-03-16 20:02:16 UTC (rev 5137)
@@ -161,14 +161,16 @@
def understandBlock(text, TTP, TSP, TSMP, TTMP):
""" Understand if the page is blocked and if it has the right template """
- for catchRegex in TTP: # TTP = templateTotalProtection
- resultCatch = re.findall(catchRegex, text)
- if resultCatch != []:
- return ('sysop-total', catchRegex)
- for catchRegex in TSP:
- resultCatch = re.findall(catchRegex, text)
- if resultCatch != []:
- return ('autoconfirmed-total', catchRegex)
+ if TTP:
+ for catchRegex in TTP: # TTP = templateTotalProtection
+ resultCatch = re.findall(catchRegex, text)
+ if resultCatch != []:
+ return ('sysop-total', catchRegex)
+ if TSP:
+ for catchRegex in TSP:
+ resultCatch = re.findall(catchRegex, text)
+ if resultCatch != []:
+ return ('autoconfirmed-total', catchRegex)
if TSMP != None and TTMP != None and TTP != TTMP and TSP != TSMP:
for catchRegex in TSMP:
resultCatch = re.findall(catchRegex, text)
Revision: 5136
Author: filnik
Date: 2008-03-16 12:30:29 +0000 (Sun, 16 Mar 2008)
Log Message:
-----------
Adding the option to select the namespace of the protected pages, added an example
Modified Paths:
--------------
trunk/pywikipedia/blockpageschecker.py
Modified: trunk/pywikipedia/blockpageschecker.py
===================================================================
--- trunk/pywikipedia/blockpageschecker.py 2008-03-16 11:45:10 UTC (rev 5135)
+++ trunk/pywikipedia/blockpageschecker.py 2008-03-16 12:30:29 UTC (rev 5136)
@@ -20,8 +20,9 @@
Argument can also be given as "-page:pagetitle". You can
give this parameter multiple times to edit multiple pages.
--protectedpages Check all the blocked pages (useful when you have not categories
- or when you have problems with them.
+-protectedpages: Check all the blocked pages (useful when you have not categories
+ or when you have problems with them. (add the namespace after ":" where
+ you want to check - default: 0)
Furthermore, the following command line parameters are supported:
@@ -46,6 +47,8 @@
python blockpageschecker.py -cat:Geography -always
+python blockpageschecker.py -debug -protectedpages:4
+
"""
#
# (C) Monobi a.k.a. Wikihermit, 2007
@@ -177,9 +180,10 @@
return ('autoconfirmed-move', catchRegex)
return ('editable', r'\A\n')
-def ProtectedPagesData():
+def ProtectedPagesData(namespace = 0):
""" Yield all the pages blocked, using Special:ProtectedPages """
- url = '/w/index.php?title=Speciale%3AProtectedPages&namespace=0&type=edit&level=0&size='
+ # Avoid problems of encoding and stuff like that, let it divided please
+ url = '/w/index.php?title=Speciale%3AProtectedPages' + '&namespace=%s&type=edit&level=0&size=' % namespace
site = wikipedia.getSite()
parser_text = site.getUrl(url)
while 1:
@@ -225,9 +229,9 @@
status = '%s' % level
return status
-def ProtectedPages():
+def ProtectedPages(namespace = 0):
""" Return only the wiki page object and not the tuple with all the data as above """
- for data in ProtectedPagesData():
+ for data in ProtectedPagesData(namespace):
yield wikipedia.Page(wikipedia.getSite(), data[0])
def debugQuest(site, page):
@@ -269,8 +273,11 @@
moveBlockCheck = True
elif arg == '-debug':
debug = True
- elif arg == '-protectedpages':
- generator = ProtectedPages()
+ elif arg.startswith('-protectedpages'):
+ if len(arg) == 15:
+ generator = ProtectedPages(0)
+ else:
+ generator = ProtectedPages(int(arg[16:]))
elif arg.startswith('-page'):
if len(arg) == 5:
generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))]
Bugs item #1915362, was opened at 2008-03-16 10:54
Message generated for change (Tracker Item Submitted) made by Item Submitter
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1915362&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Bernhard Mayr (falk_steinhauer)
Assigned to: Nobody/Anonymous (nobody)
Summary: IOError in "category.py tree"
Initial Comment:
Line 748 of category.py must be changed from
if not os.path.isabs(filename):
to
if not os.path.isabs(filename) and filename:
Otherwise the call of "category.py tree" would lead to an IOError when the tree should be printed to stdout (see output below).
C:\Python\pywikipedia>category.py tree
Checked for running processes. 1 processes currently running, including the current process.
For which category do you want to create a tree view? inoffiziell
Please enter the name of the file where the tree should be saved, or press enter to simply show the tree:
Getting [[Kategorie:Inoffiziell]]...
Getting [[Kategorie:Alternatives Aventurien]]...
...and so on...
Getting [[Kategorie:Inoffizieller Index]]...
Saving results in C:\Python\pywikipedia
Dumping to category.dump.bz2, please wait...
Traceback (most recent call last):
File "C:\Python\pywikipedia\category.py", line 933, in <module>
bot.run()
File "C:\Python\pywikipedia\category.py", line 826, in run
f = codecs.open(self.filename, 'a', 'utf-8')
File "C:\Python\lib\codecs.py", line 817, in open
file = __builtin__.open(filename, mode, buffering)
IOError: [Errno 13] Permission denied: 'C:\\Python\\pywikipedia'
Here you can see, that "filename" is the name of a folder, not a file. Opening a folder for writing is not possible.
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1915362&group_…
Revision: 5132
Author: filnik
Date: 2008-03-15 17:31:37 +0000 (Sat, 15 Mar 2008)
Log Message:
-----------
Adding pregenerator (pretty faster now! :-)), now checking the block of the page through APIs (more stable than HTML check) and adding the check in protectedpages special page so that you can check only that pages (also in order to find the pages without templates if you use the -debug option). So.. great update :-)
Modified Paths:
--------------
trunk/pywikipedia/blockpageschecker.py
Modified: trunk/pywikipedia/blockpageschecker.py
===================================================================
--- trunk/pywikipedia/blockpageschecker.py 2008-03-15 12:21:15 UTC (rev 5131)
+++ trunk/pywikipedia/blockpageschecker.py 2008-03-15 17:31:37 UTC (rev 5132)
@@ -182,19 +182,73 @@
url = '/w/index.php?title=Speciale%3AProtectedPages&namespace=0&type=edit&level=0&size='
site = wikipedia.getSite()
parser_text = site.getUrl(url)
- #<li><a href="/wiki/Pagina_principale" title="Pagina principale">Pagina principale</a> <small>(6.522 byte)</small> (protetta)</li>
- m = re.findall(r'<li><a href=".*?" title=".*?">(.*?)</a>.*?<small>\((.*?)\)</small>.*?\((.*?)\)</li>', parser_text)
- for data in m:
- title = data[0]
- size = data[1]
- status = data[2]
- yield (title, size, status)
+ while 1:
+ #<li><a href="/wiki/Pagina_principale" title="Pagina principale">Pagina principale</a> <small>(6.522 byte)</small> (protetta)</li>
+ m = re.findall(r'<li><a href=".*?" title=".*?">(.*?)</a>.*?<small>\((.*?)\)</small>.*?\((.*?)\)</li>', parser_text)
+ for data in m:
+ title = data[0]
+ size = data[1]
+ status = data[2]
+ yield (title, size, status)
+ nextpage = re.findall(r'<.ul>\(.*?\).*?\(.*?\).*?\(<a href="(.*?)".*?</a>\) +?\(<a href=', parser_text)
+ if nextpage != []:
+ parser_text = site.getUrl(nextpage[0].replace('&', '&'))
+ continue
+ else:
+ break
+def getRestrictions(page):
+ api_url = '/w/api.php?action=query&prop=info&inprop=protection&format=xml&titles=%s' % page.urlname()
+ text = wikipedia.getSite().getUrl(api_url)
+ if not 'pageid="' in text: # Avoid errors when you can't reach the APIs
+ raise wikipedia.Error("API problem, can't reach the APIs!")
+ match = re.findall(r'<protection>(.*?)</protection>', text)
+ status = 'editable'
+ if match != []:
+ text = match[0] # If there's the block "protection" take the settings inside it.
+ api_found = re.compile(r'<pr type="(.*?)" level="(.*?)" expiry="(.*?)" />')
+ results = api_found.findall(text)
+ if results != []:
+ if len(results) < 2:
+ result = results[0]
+ type_of_protection = result[0]; level = result[1]; expiry = result[2]
+ if type_of_protection == 'move':
+ status = '%s-%s' % (level, type_of_protection)
+ else:
+ status = '%s' % level
+ else:
+ for result in results:
+ # If blocked both move and edit, select edit.
+ if result[0] == 'move':
+ continue
+ type_of_protection = result[0]; level = result[1]; expiry = result[2]
+ status = '%s' % level
+ return status
+
def ProtectedPages():
""" Return only the wiki page object and not the tuple with all the data as above """
for data in ProtectedPagesData():
yield wikipedia.Page(wikipedia.getSite(), data[0])
+def debugQuest(site, page):
+ quest = wikipedia.input(u'Do you want to open the page on your [b]rowser, [g]ui or [n]othing?')
+ pathWiki = site.family.nicepath(site.lang)
+ url = 'http://%s%s%s?&redirect=no' % (wikipedia.getSite().hostname(), pathWiki, page.urlname())
+ while 1:
+ if quest.lower() in ['b', 'B']:
+ webbrowser.open(url)
+ break
+ elif quest.lower() in ['g', 'G']:
+ import editarticle
+ editor = editarticle.TextEditor()
+ text = editor.edit(page.get())
+ break
+ elif quest.lower() in ['n', 'N']:
+ break
+ else:
+ wikipedia.output(u'wrong entry, type "b", "g" or "n"')
+ continue
+
def main():
""" Main Function """
# Loading the comments
@@ -248,18 +302,20 @@
generator.append(pageCat)
wikipedia.output(u'Categories loaded, start!')
# Main Loop
- for page in generator:
+ preloadingGen = pagegenerators.PreloadingGenerator(generator, pageNumber = 60)
+ for page in preloadingGen:
pagename = page.title()
wikipedia.output('Loading %s...' % pagename)
try:
text = page.get()
- editRestriction = page.editRestriction
- moveRestriction = page.moveRestriction
+ editRestriction = getRestrictions(page)
except wikipedia.NoPage:
wikipedia.output("%s doesn't exist! Skipping..." % pagename)
continue
except wikipedia.IsRedirectPage:
wikipedia.output("%s is a redirect! Skipping..." % pagename)
+ if debug:
+ debugQuest(site, page)
continue
# Understand, according to the template in the page, what should be the protection
# and compare it with what there really is.
@@ -273,7 +329,7 @@
else:
wikipedia.output(u'The page is protected to the sysop, but the template seems not correct. Fixing...')
text = re.sub(TemplateInThePage[1], TNR[1], text)
- elif moveBlockCheck and moveRestriction == 'sysop':
+ elif moveBlockCheck and editRestriction == 'sysop-move':
if TemplateInThePage[0] == 'sysop-move' and TTMP != None:
wikipedia.output(u'The page is protected from moving to the sysop, skipping...')
continue
@@ -287,7 +343,7 @@
else:
wikipedia.output(u'The page is editable only for the autoconfirmed users, but the template seems not correct. Fixing...')
text = re.sub(TemplateInThePage[1], TNR[0], text)
- elif moveBlockCheck == True and moveRestriction == 'autoconfirmed' and TSMP != None:
+ elif moveBlockCheck == True and editRestriction == 'autoconfirmed-move' and TSMP != None:
if TemplateInThePage[0] == 'autoconfirmed-move':
wikipedia.output(u'The page is movable only for the autoconfirmed users, skipping...')
continue
@@ -344,23 +400,8 @@
else:
wikipedia.output(u'No changes! Strange! Check the regex!')
if debug == True:
- quest = wikipedia.input(u'Do you want to open the page on your [b]rowser, [g]ui or [n]othing?')
- pathWiki = site.family.nicepath(site.lang)
- url = 'http://%s%s%s' % (wikipedia.getSite().hostname(), pathWiki, page.urlname())
- while 1:
- if quest.lower() in ['b', 'B']:
- webbrowser.open(url)
- break
- elif quest.lower() in ['g', 'G']:
- import editarticle
- editor = editarticle.TextEditor()
- text = editor.edit(page.get())
- break
- elif quest.lower() in ['n', 'N']:
- break
- else:
- wikipedia.output(u'wrong entry, type "b", "g" or "n"')
- continue
+ debugQuest(site, page)
+
if __name__ == "__main__":
try:
Revision: 5131
Author: filnik
Date: 2008-03-15 12:21:15 +0000 (Sat, 15 Mar 2008)
Log Message:
-----------
adding preloading gen
Modified Paths:
--------------
trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py
===================================================================
--- trunk/pywikipedia/checkimages.py 2008-03-14 19:48:52 UTC (rev 5130)
+++ trunk/pywikipedia/checkimages.py 2008-03-15 12:21:15 UTC (rev 5131)
@@ -839,7 +839,8 @@
# No settings found, No problem, continue.
else: wikipedia.output(u'\t >> No additional settings found! <<')
# Not the main, but the most important loop.
- for image in generator:
+ preloadingGen = pagegenerators.PreloadingGenerator(generator, pageNumber = 60)
+ for image in preloadingGen:
# If I don't inizialize the generator, wait part and skip part are useless
if wait:
printWithTimeZone(u'Waiting %s seconds before checking the images,' % wait_number)
Bugs item #1914786, was opened at 2008-03-15 12:13
Message generated for change (Tracker Item Submitted) made by Item Submitter
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1914786&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 8
Private: No
Submitted By: Filnik (filnik)
Assigned to: Nobody/Anonymous (nobody)
Summary: Great Waste of CPU and/or RAM on interwiki.py and replace.py
Initial Comment:
Hello, I have a lot of regex to run with replace.py and I've group them in the fixes.py file and make it run through the whole italian wikipedia. But, after 1-2 days of running, the program was wasting this amount of resources:
filnik 32709 8.9 11.1 1074936 903376 pts/113 Sl+ Mar12 357:09 python2.5 pynik <etc...>
Re-running the bot (from the page that it has reached) I have that:
filnik 31372 2.8 0.2 193564 21992 pts/113 Sl+ 11:58 0:17 python2.5 pynik <etc...>
That's A LOT less than before! So, what the hell is happening? why the Bot doesn't release the resources that it uses when they aren't need any more?
The same appends for interwiki.py, I have tried to use python2.5 instead of 2.4 but without any results. I've also tried to use "del" on python but no results also with that.
So, any Idea? Filnik
P.S. Fix that bug would improve the use of interwiki.py on the toolserver and of replace.py, I think the most used scripts, so I've set 8 as importance.
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1914786&group_…
Revision: 5130
Author: filnik
Date: 2008-03-14 19:48:52 +0000 (Fri, 14 Mar 2008)
Log Message:
-----------
Adding a generator in order to add new functions in future, no time right now :(
Modified Paths:
--------------
trunk/pywikipedia/blockpageschecker.py
Modified: trunk/pywikipedia/blockpageschecker.py
===================================================================
--- trunk/pywikipedia/blockpageschecker.py 2008-03-14 18:12:43 UTC (rev 5129)
+++ trunk/pywikipedia/blockpageschecker.py 2008-03-14 19:48:52 UTC (rev 5130)
@@ -20,6 +20,8 @@
Argument can also be given as "-page:pagetitle". You can
give this parameter multiple times to edit multiple pages.
+-protectedpages Check all the blocked pages (useful when you have not categories
+ or when you have problems with them.
Furthermore, the following command line parameters are supported:
@@ -46,7 +48,7 @@
"""
#
-# (C) Wikihermit a.k.a. Monobi, 2007
+# (C) Monobi a.k.a. Wikihermit, 2007
# (C) Filnik, 2007-2008
#
# Distributed under the terms of the MIT license.
@@ -155,6 +157,7 @@
################## -- Edit above! -- ##################
def understandBlock(text, TTP, TSP, TSMP, TTMP):
+ """ Understand if the page is blocked and if it has the right template """
for catchRegex in TTP: # TTP = templateTotalProtection
resultCatch = re.findall(catchRegex, text)
if resultCatch != []:
@@ -174,7 +177,26 @@
return ('autoconfirmed-move', catchRegex)
return ('editable', r'\A\n')
+def ProtectedPagesData():
+ """ Yield all the pages blocked, using Special:ProtectedPages """
+ url = '/w/index.php?title=Speciale%3AProtectedPages&namespace=0&type=edit&level=0&size='
+ site = wikipedia.getSite()
+ parser_text = site.getUrl(url)
+ #<li><a href="/wiki/Pagina_principale" title="Pagina principale">Pagina principale</a> <small>(6.522 byte)</small> (protetta)</li>
+ m = re.findall(r'<li><a href=".*?" title=".*?">(.*?)</a>.*?<small>\((.*?)\)</small>.*?\((.*?)\)</li>', parser_text)
+ for data in m:
+ title = data[0]
+ size = data[1]
+ status = data[2]
+ yield (title, size, status)
+
+def ProtectedPages():
+ """ Return only the wiki page object and not the tuple with all the data as above """
+ for data in ProtectedPagesData():
+ yield wikipedia.Page(wikipedia.getSite(), data[0])
+
def main():
+ """ Main Function """
# Loading the comments
global templateToRemove; global categoryToCheck; global comment; global project_inserted
if config.mylang not in project_inserted:
@@ -193,6 +215,8 @@
moveBlockCheck = True
elif arg == '-debug':
debug = True
+ elif arg == '-protectedpages':
+ generator = ProtectedPages()
elif arg.startswith('-page'):
if len(arg) == 5:
generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))]