http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11020
Revision: 11020
Author: drtrigon
Date: 2013-01-31 17:36:03 +0000 (Thu, 31 Jan 2013)
Log Message:
-----------
catch AssertionError (if charset missmatches) but print warning
suppress output (if back_response=True) but still print error in verbose mode
warning if/before attempt to download big content (>10MB)
PEP 8 and docu
Modified Paths:
--------------
trunk/pywikipedia/pywikibot/comms/http.py
Modified: trunk/pywikipedia/pywikibot/comms/http.py
===================================================================
--- trunk/pywikipedia/pywikibot/comms/http.py 2013-01-30 23:56:18 UTC (rev 11019)
+++ trunk/pywikipedia/pywikibot/comms/http.py 2013-01-31 17:36:03 UTC (rev 11020)
@@ -58,20 +58,25 @@
return self._buffer[name]
-def request(site, uri, retry = None, sysop = False, data = None, compress = True,
- no_hostname = False, cookie_only=False, refer=None, back_response=False):
+def request(site, uri, retry=None, sysop=False, data=None, compress=True,
+ no_hostname=False, cookie_only=False, refer=None,
+ back_response=False):
"""
Low-level routine to get a URL from any source (may be the wiki).
Parameters:
- @param site - The Site to connect to.
- @param uri - The absolute uri, without the hostname.
- @param retry - If True, retries loading the page when a network error
- occurs.
- @param sysop - If True, the sysop account's cookie will be used.
- @param data - An optional dict providing extra post request
- parameters.
- @param cookie_only - Only return the cookie the server sent us back
+ @param site - The Site to connect to.
+ @param uri - The absolute uri, without the hostname.
+ @param retry - If True, retries loading the page when a network
+ error occurs.
+ @param sysop - If True, the sysop account's cookie will be used.
+ @param data - An optional dict providing extra post request
+ parameters.
+ @param compress - Accept compressed page content transfer also.
+ @param no_hostname - Do query to foreign host (any kind of web-server).
+ @param cookie_only - Only return the cookie the server sent us back
+ @param refer - ...
+ @param back_response - Return the addinfourl object from request too.
@return: Returns the HTML text of the page converted to unicode.
"""
@@ -114,20 +119,24 @@
f = buffered_addinfourl(MyURLopener.open(req))
# read & info can raise socket.error
+ headers = f.info()
+ if (int(headers.get('content-length', '-1')) > 1E7):
+ pywikibot.output(u'WARNING: Target is of huge size (>10MB) is
'
+ u'that correct? Downloading will take some '
+ u'time, please be patient.')
text = f.read()
- headers = f.info()
break
except KeyboardInterrupt:
raise
except urllib2.HTTPError, e:
if e.code in [401, 404]:
raise PageNotFound(
-u'Page %s could not be retrieved. Check your family file.'
- % url)
+ u'Page %s could not be retrieved. Check your family file.'
+ % url)
elif e.code in [403]:
raise PageNotFound(
-u'Page %s could not be retrieved. Check your virus wall.'
- % url)
+ u'Page %s could not be retrieved. Check your virus wall.'
+ % url)
elif e.code == 504:
pywikibot.output(u'HTTPError: %s %s' % (e.code, e.msg))
if retry:
@@ -135,8 +144,9 @@
if retry_attempt > config.maxretries:
raise MaxTriesExceededError()
pywikibot.output(
-u"WARNING: Could not open '%s'.Maybe the server or\n your connection is
down. Retrying in %i minutes..."
- % (url, retry_idle_time))
+ u"WARNING: Could not open '%s'.Maybe the server or\n
"
+ u"your connection is down. Retrying in %i minutes..."
+ % (url, retry_idle_time))
time.sleep(retry_idle_time * 60)
# Next time wait longer,
# but not longer than half an hour
@@ -155,8 +165,9 @@
if retry_attempt > config.maxretries:
raise MaxTriesExceededError()
pywikibot.output(
-u"WARNING: Could not open '%s'. Maybe the server or\n your connection is
down. Retrying in %i minutes..."
- % (url, retry_idle_time))
+ u"WARNING: Could not open '%s'. Maybe the server or\n
your "
+ u"connection is down. Retrying in %i minutes..."
+ % (url, retry_idle_time))
time.sleep(retry_idle_time * 60)
retry_idle_time *= 2
if retry_idle_time > 30:
@@ -206,17 +217,29 @@
# UTF-8 as default
charset = 'utf-8'
# Check if this is the charset we expected
- site.checkCharset(charset)
+ try:
+ site.checkCharset(charset)
+ except AssertionError, e:
+ if (not back_response) or verbose:
+ pywikibot.output(u'%s' %e)
+ if no_hostname:
+ pywikibot.output(u'ERROR: Invalid charset found on %s.' % uri)
+ else:
+ pywikibot.output(u'ERROR: Invalid charset found on %s://%s%s.'
+ % (site.protocol(), site.hostname(), uri))
# Convert HTML to Unicode
try:
text = unicode(text, charset, errors = 'strict')
except UnicodeDecodeError, e:
- if verbose:
+ if (not back_response) or verbose:
pywikibot.output(u'%s' %e)
- if no_hostname:
- pywikibot.output(u'ERROR: Invalid characters found on %s, replaced by
\\ufffd.' % uri)
- else:
- pywikibot.output(u'ERROR: Invalid characters found on %s://%s%s, replaced
by \\ufffd.' % (site.protocol(), site.hostname(), uri))
+ if no_hostname:
+ pywikibot.output(u'ERROR: Invalid characters found on %s, '
+ u'replaced by \\ufffd.' % uri)
+ else:
+ pywikibot.output(u'ERROR: Invalid characters found on %s://%s%s,
'
+ u'replaced by \\ufffd.'
+ % (site.protocol(), site.hostname(), uri))
# We use error='replace' in case of bad encoding.
text = unicode(text, charset, errors = 'replace')