Revision: 6640
Author: russblau
Date: 2009-04-20 13:12:19 +0000 (Mon, 20 Apr 2009)
Log Message:
-----------
Patch #2776167 from liangent
Modified Paths:
--------------
branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2009-04-19 17:25:22 UTC (rev 6639)
+++ branches/rewrite/pywikibot/site.py 2009-04-20 13:12:19 UTC (rev 6640)
@@ -173,7 +173,7 @@
"""Calls to methods not defined in this object are passed to Family."""
if hasattr(self.__class__, attr):
- return self.__class__.attr
+ return getattr(self.__class__, attr)
try:
method = getattr(self.family, attr)
f = lambda *args, **kwargs: \
@@ -617,7 +617,7 @@
self.nocapitalize = self.code in self.family.nocapitalize
return
-# ANYTHING BELOW THIS POINT IS NOT YET IMPLEMENTED IN __init__()
+ # ANYTHING BELOW THIS POINT IS NOT YET IMPLEMENTED IN __init__()
# Calculating valid languages took quite long, so we calculate it once
# in initialization instead of each time it is used.
self._validlanguages = []
Revision: 6639
Author: shizhao
Date: 2009-04-19 17:25:22 +0000 (Sun, 19 Apr 2009)
Log Message:
-----------
add new parameter: "-day".
the first time found dead link longer than x day ago, it should probably be fixed or removed. if no set, default is 7 day.
Modified Paths:
--------------
trunk/pywikipedia/weblinkchecker.py
Modified: trunk/pywikipedia/weblinkchecker.py
===================================================================
--- trunk/pywikipedia/weblinkchecker.py 2009-04-19 15:52:04 UTC (rev 6638)
+++ trunk/pywikipedia/weblinkchecker.py 2009-04-19 17:25:22 UTC (rev 6639)
@@ -46,7 +46,9 @@
-notalk Overrides the report_dead_links_on_talk config variable, disabling
the feature.
-
+-day the first time found dead link longer than x day ago, it should
+ probably be fixed or removed. if no set, default is 7 day.
+
All other parameters will be regarded as part of the title of a single page,
and the bot will only work on that single page.
@@ -522,7 +524,7 @@
wikipedia.output('*Link to %s in [[%s]] is back alive.' % (self.url, self.page.title()))
else:
wikipedia.output('*[[%s]] links to %s - %s.' % (self.page.title(), self.url, message))
- self.history.setLinkDead(self.url, message, self.page)
+ self.history.setLinkDead(self.url, message, self.page, day)
class History:
'''
@@ -591,7 +593,7 @@
self.reportThread.report(url, errorReport, containingPage, archiveURL)
- def setLinkDead(self, url, error, page):
+ def setLinkDead(self, url, error, page, day):
"""
Adds the fact that the link was found dead to the .dat file.
"""
@@ -604,10 +606,10 @@
# ago, we won't save it in the history this time.
if timeSinceLastFound > 60 * 60:
self.historyDict[url].append((page.title(), now, error))
- # if the first time we found this link longer than a week ago,
+ # if the first time we found this link longer than x day ago (default is a week),
# it should probably be fixed or removed. We'll list it in a file
# so that it can be removed manually.
- if timeSinceFirstFound > 60 * 60 * 24 * 7:
+ if timeSinceFirstFound > 60 * 60 * 24 * day:
# search for archived page
iac = InternetArchiveConsulter(url)
archiveURL = iac.getArchiveURL()
@@ -787,7 +789,7 @@
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
-
+ day = 7
for arg in wikipedia.handleArgs():
if arg == '-talk':
config.report_dead_links_on_talk = True
@@ -802,6 +804,9 @@
gen = RepeatPageGenerator()
elif arg.startswith('-ignore:'):
HTTPignore.append(int(arg[8:]))
+ elif arg.startswith('-day:'):
+ global day
+ day = arg[5:]
else:
if not genFactory.handleArg(arg):
singlePageTitle.append(arg)
Revision: 6634
Author: filnik
Date: 2009-04-19 11:08:47 +0000 (Sun, 19 Apr 2009)
Log Message:
-----------
Bugfix: adding a dummy-edit feature in order to prevent errors in detection via APIs ;)
Modified Paths:
--------------
trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py
===================================================================
--- trunk/pywikipedia/checkimages.py 2009-04-19 10:01:29 UTC (rev 6633)
+++ trunk/pywikipedia/checkimages.py 2009-04-19 11:08:47 UTC (rev 6634)
@@ -1116,23 +1116,38 @@
"""
self.seems_ok = False
self.license_found = None
- self.hiddentemplates = self.loadHiddenTemplates()
- self.licenses_found = self.image.getTemplates()
self.whiteTemplatesFound = False
regex_find_licenses = re.compile(r'(?<!\{)\{\{(?:[Tt]emplate:|)([^{]+?)[|\n<}]', re.DOTALL)
- templatesInTheImageRaw = regex_find_licenses.findall(self.imageCheckText)
- if self.licenses_found == [] and templatesInTheImageRaw != []:
- raise wikipedia.Error("APIs seems down. No templates found with them but actually there are templates used in the image's page!")
- self.allLicenses = list()
- if self.list_licenses == []:
- raise wikipedia.Error(u'No licenses allowed provided, add that option to the code to make the script working correctly')
- # Found the templates ONLY in the image's description
- for template_selected in templatesInTheImageRaw:
- for templateReal in self.licenses_found:
- if self.convert_to_url(template_selected).lower().replace('template%3a', '') == \
- self.convert_to_url(templateReal.title()).lower().replace('template%3a', ''):
- if templateReal not in self.allLicenses: # don't put the same template, twice.
- self.allLicenses.append(templateReal)
+ dummy_edit = False
+ while 1:
+ self.hiddentemplates = self.loadHiddenTemplates()
+ self.licenses_found = self.image.getTemplates()
+ templatesInTheImageRaw = regex_find_licenses.findall(self.imageCheckText)
+ if self.licenses_found == [] and templatesInTheImageRaw != []:
+ raise wikipedia.Error("APIs seems down. No templates found with them but actually there are templates used in the image's page!")
+ self.allLicenses = list()
+ if self.list_licenses == []:
+ raise wikipedia.Error(u'No licenses allowed provided, add that option to the code to make the script working correctly')
+ # Found the templates ONLY in the image's description
+ for template_selected in templatesInTheImageRaw:
+ for templateReal in self.licenses_found:
+ if self.convert_to_url(template_selected).lower().replace('template%3a', '') == \
+ self.convert_to_url(templateReal.title()).lower().replace('template%3a', ''):
+ if templateReal not in self.allLicenses: # don't put the same template, twice.
+ self.allLicenses.append(templateReal)
+ # perform a dummy edit, sometimes there are problems with the Job queue
+ if self.allLicenses == self.licenses_found and not dummy_edit and self.licenses_found != []:
+ wikipedia.output(u"Seems that there's a problem regarding the Job queue, trying with a dummy edit to solve the problem.")
+ try:
+
+ self.imageCheckText = self.image.get()
+ self.image.put(self.imageCheckText, 'Bot: Dummy edit,if you see this comment write [[User talk:%s|here]].' % self.botnick)
+ except (wikipedia.NoPage, wikipedia.IsRedirectPage):
+ return (None, list())
+ dummy_edit = True
+ else:
+ break
+
if self.licenses_found != []:
self.templateInList()
if self.license_found == None and self.allLicenses != list():
Revision: 6633
Author: filnik
Date: 2009-04-19 10:01:29 +0000 (Sun, 19 Apr 2009)
Log Message:
-----------
Bugfix, double non-existing license wasn't detected correctly. Now it does. Thanks cosoleto :)
Modified Paths:
--------------
trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py
===================================================================
--- trunk/pywikipedia/checkimages.py 2009-04-19 09:25:42 UTC (rev 6632)
+++ trunk/pywikipedia/checkimages.py 2009-04-19 10:01:29 UTC (rev 6633)
@@ -1136,17 +1136,18 @@
if self.licenses_found != []:
self.templateInList()
if self.license_found == None and self.allLicenses != list():
- iterLicenses = self.allLicenses
- for template in iterLicenses:
+ # If only iterlist = self.AllLicenses if I remove something
+ # from iterlist it will be remove from self.AllLicenses too
+ iterlist = list(self.allLicenses)
+ for template in iterlist:
try:
template.pageAPInfo()
except wikipedia.IsRedirectPage:
template = template.getRedirectTarget()
except wikipedia.NoPage:
- self.allLicenses.remove(template)
+ self.allLicenses.remove(template)
if self.allLicenses != list():
self.license_found = self.allLicenses[0].title()
-
self.some_problem = False # If it has "some_problem" it must check
# the additional settings.
# if self.settingsData, use addictional settings