Revision: 8090
Author: xqt
Date: 2010-04-14 13:18:54 +0000 (Wed, 14 Apr 2010)
Log Message:
-----------
weblinkchecker.py - stripping ")" (bug #2985621, fix #2985625 by masti)
Modified Paths:
--------------
trunk/pywikipedia/pywikibot/textlib.py
Modified: trunk/pywikipedia/pywikibot/textlib.py
===================================================================
--- trunk/pywikipedia/pywikibot/textlib.py 2010-04-14 09:50:27 UTC (rev 8089)
+++ trunk/pywikipedia/pywikibot/textlib.py 2010-04-14 13:18:54 UTC (rev 8090)
@@ -677,10 +677,10 @@
# RFC 2396 says that URLs may only contain certain characters.
# For this regex we also accept non-allowed characters, so that the bot
# will later show these links as broken ('Non-ASCII Characters in URL').
- # Note: While allowing parenthesis inside URLs, MediaWiki will regard
- # right parenthesis at the end of the URL as not part of that URL.
- # The same applies to dot, comma, colon and some other characters.
- notAtEnd = '\]\s\)\.:;,<>"\|'
+ # Note: While allowing dots inside URLs, MediaWiki will regard
+ # dots at the end of the URL as not part of that URL.
+ # The same applies to comma, colon and some other characters.
+ notAtEnd = '\]\s\.:;,<>"\|'
# So characters inside the URL can be anything except whitespace,
# closing squared brackets, quotation marks, greater than and less
# than, and the last character also can't be parenthesis or another
Revision: 8089
Author: xqt
Date: 2010-04-14 09:50:27 +0000 (Wed, 14 Apr 2010)
Log Message:
-----------
Test whether a content is not None, enable empty contents.
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2010-04-13 12:24:36 UTC (rev 8088)
+++ trunk/pywikipedia/wikipedia.py 2010-04-14 09:50:27 UTC (rev 8089)
@@ -688,7 +688,7 @@
raise BadTitle('BadTitle: %s' % self)
elif 'revisions' in pageInfo: #valid Title
lastRev = pageInfo['revisions'][0]
- if lastRev['*']:
+ if lastRev['*'] is not None:
textareaFound = True
# I got page date with 'revisions' in pageInfo but
# lastRev['*'] = False instead of the content. The Page itself was
Revision: 8081
Author: xqt
Date: 2010-04-12 15:46:32 +0000 (Mon, 12 Apr 2010)
Log Message:
-----------
Limit the queue size for put_async() by config.max_queue_size
Modified Paths:
--------------
trunk/pywikipedia/config.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/config.py
===================================================================
--- trunk/pywikipedia/config.py 2010-04-12 15:36:23 UTC (rev 8080)
+++ trunk/pywikipedia/config.py 2010-04-12 15:46:32 UTC (rev 8081)
@@ -466,6 +466,12 @@
# up to 30 minutes)
retry_on_fail = True
+# How many pages should be put to a queue in asynchroneous mode.
+# If maxsize is <= 0, the queue size is infinite.
+# Increasing this value will increase memory space but could speed up
+# processing. As higher this value this effect will decrease.
+max_queue_size = 64
+
# End of configuration section
# ============================
# System-level and User-level changes.
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2010-04-12 15:36:23 UTC (rev 8080)
+++ trunk/pywikipedia/wikipedia.py 2010-04-12 15:46:32 UTC (rev 8081)
@@ -7449,7 +7449,7 @@
raise
output(u'Sorry, no help available for %s' % moduleName)
-page_put_queue = Queue.Queue()
+page_put_queue = Queue.Queue(config.max_queue_size)
def async_put():
"""Daemon; take pages from the queue and try to save them on the wiki."""
while True: