jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/329360 )
Change subject: Query string dictionary parameter for pwb.comms.http.fetch and friends
......................................................................
Query string dictionary parameter for pwb.comms.http.fetch and friends
This will break usages of the affected methods that rely on the order of
the parameters, rather than specifying the parameters dictionary-style.
* New "params" parameter to pass unencoded query string parameters as a
dictionary to pywikibot.comms.http.{fetch,request,_enqueue}
* PetScanPageGenerator has been updated to use this new parameter.
* "data" parameter added to fetch/request/_enqueue as an alias of
"body"
to make the method parameters correspond to requests.Session.request
* Unit tests for "params" and "data" parameters
Bug: T153559
Change-Id: I96da6d4c719aba24d35e58dd5f0694e628be86a3
---
M pywikibot/comms/http.py
M pywikibot/comms/threadedhttp.py
M pywikibot/pagegenerators.py
M tests/http_tests.py
4 files changed, 105 insertions(+), 27 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/comms/http.py b/pywikibot/comms/http.py
index 7bf6235..3006dde 100644
--- a/pywikibot/comms/http.py
+++ b/pywikibot/comms/http.py
@@ -276,8 +276,8 @@
@deprecate_arg('ssl', None)
-def request(site=None, uri=None, method='GET', body=None, headers=None,
- **kwargs):
+def request(site=None, uri=None, method='GET', params=None, body=None,
+ headers=None, data=None, **kwargs):
"""
Request to Site with default error handling and response decoding.
@@ -299,12 +299,17 @@
@return: The received data
@rtype: a unicode string
"""
+ # body and data parameters both map to the data parameter of
+ # requests.Session.request.
+ if data:
+ body = data
+
assert(site or uri)
if not site:
# +1 because of @deprecate_arg
issue_deprecation_warning(
'Invoking http.request without argument site',
'http.fetch()', 3)
- r = fetch(uri, method, body, headers, **kwargs)
+ r = fetch(uri, method, params, body, headers, **kwargs)
return r.content
baseuri = site.base_url(uri)
@@ -320,7 +325,7 @@
headers['user-agent'] = user_agent(site, format_string)
- r = fetch(baseuri, method, body, headers, **kwargs)
+ r = fetch(baseuri, method, params, body, headers, **kwargs)
return r.content
@@ -350,6 +355,7 @@
def _http_process(session, http_request):
method = http_request.method
uri = http_request.uri
+ params = http_request.params
body = http_request.body
headers = http_request.headers
if PY2 and headers:
@@ -370,8 +376,8 @@
# Note that the connections are pooled which mean that a future
# HTTPS request can succeed even if the certificate is invalid and
# verify=True, when a request with verify=False happened before
- response = session.request(method, uri, data=body, headers=headers,
- auth=auth, timeout=timeout,
+ response = session.request(method, uri, params=params, data=body,
+ headers=headers, auth=auth, timeout=timeout,
verify=not ignore_validation)
except Exception as e:
http_request.data = e
@@ -407,7 +413,8 @@
warning('Http response status {0}'.format(request.data.status_code))
-def _enqueue(uri, method="GET", body=None, headers=None, **kwargs):
+def _enqueue(uri, method="GET", params=None, body=None, headers=None,
data=None,
+ **kwargs):
"""
Enqueue non-blocking threaded HTTP request with callback.
@@ -432,6 +439,11 @@
@type callbacks: list of callable
@rtype: L{threadedhttp.HttpRequest}
"""
+ # body and data parameters both map to the data parameter of
+ # requests.Session.request.
+ if data:
+ body = data
+
default_error_handling = kwargs.pop('default_error_handling', None)
callback = kwargs.pop('callback', None)
@@ -451,13 +463,14 @@
all_headers['user-agent'] = user_agent(None, user_agent_format_string)
request = threadedhttp.HttpRequest(
- uri, method, body, all_headers, callbacks, **kwargs)
+ uri, method, params, body, all_headers, callbacks, **kwargs)
_http_process(session, request)
return request
-def fetch(uri, method="GET", body=None, headers=None,
- default_error_handling=True, use_fake_user_agent=False, **kwargs):
+def fetch(uri, method="GET", params=None, body=None, headers=None,
+ default_error_handling=True, use_fake_user_agent=False, data=None,
+ **kwargs):
"""
Blocking HTTP request.
@@ -474,6 +487,11 @@
overridden by domain in config.
@rtype: L{threadedhttp.HttpRequest}
"""
+ # body and data parameters both map to the data parameter of
+ # requests.Session.request.
+ if data:
+ body = data
+
# Change user agent depending on fake UA settings.
# Set header to new UA if needed.
headers = headers or {}
@@ -489,7 +507,7 @@
elif use_fake_user_agent is True:
headers['user-agent'] = fake_user_agent()
- request = _enqueue(uri, method, body, headers, **kwargs)
+ request = _enqueue(uri, method, params, body, headers, **kwargs)
assert(request._data is not None) # if there's no data in the answer we're
in trouble
# Run the error handling callback in the callers thread so exceptions
# may be caught.
diff --git a/pywikibot/comms/threadedhttp.py b/pywikibot/comms/threadedhttp.py
index a166929..03386cf 100644
--- a/pywikibot/comms/threadedhttp.py
+++ b/pywikibot/comms/threadedhttp.py
@@ -31,7 +31,7 @@
* an exception
"""
- def __init__(self, uri, method="GET", body=None, headers=None,
+ def __init__(self, uri, method="GET", params=None, body=None,
headers=None,
callbacks=None, charset=None, **kwargs):
"""
Constructor.
@@ -40,6 +40,7 @@
"""
self.uri = uri
self.method = method
+ self.params = params
self.body = body
self.headers = headers
if isinstance(charset, codecs.CodecInfo):
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index 3db16a7..61dacdb 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -47,7 +47,6 @@
intersect_generators,
IteratorNextMixin,
filter_unique,
- PY2,
)
from pywikibot import date, config, i18n, xmlreader
@@ -55,13 +54,6 @@
from pywikibot.exceptions import ArgumentDeprecationWarning, UnknownExtension
from pywikibot.logentries import LogEntryFactory
from pywikibot.proofreadpage import ProofreadPage
-
-if PY2:
- from urllib import urlencode
- import urlparse
-else:
- import urllib.parse as urlparse
- from urllib.parse import urlencode
if sys.version_info[0] > 2:
basestring = (str, )
@@ -2840,14 +2832,9 @@
def query(self):
"""Query PetScan."""
- url = urlparse.urlunparse(('https', # scheme
- 'petscan.wmflabs.org', # netloc
- '', # path
- '', # params
- urlencode(self.opts), # query
- '')) # fragment
+ url = 'https://petscan.wmflabs.org'
- req = http.fetch(url)
+ req = http.fetch(url, params=self.opts)
j = json.loads(req.content)
raw_pages = j['*'][0]['a']['*']
for raw_page in raw_pages:
diff --git a/tests/http_tests.py b/tests/http_tests.py
index 0cb28bd..26663f1 100644
--- a/tests/http_tests.py
+++ b/tests/http_tests.py
@@ -9,6 +9,7 @@
__version__ = '$Id$'
+import json
import re
import warnings
@@ -556,6 +557,77 @@
self.assertIs(main_module_cookie_jar, http.cookie_jar)
+class QueryStringParamsTestCase(TestCase):
+
+ """
+ Test the query string parameter of request methods.
+
+ The /get endpoint of httpbin returns JSON that can include an 'args' key
with
+ urldecoded query string parameters.
+ """
+
+ sites = {
+ 'httpbin': {
+ 'hostname': 'httpbin.org',
+ },
+ }
+
+ def test_no_params(self):
+ """Test fetch method with no parameters."""
+ r = http.fetch(uri='https://httpbin.org/get', params={})
+ self.assertEqual(r.status, 200)
+
+ content = json.loads(r.content)
+ self.assertDictEqual(content['args'], {})
+
+ def test_unencoded_params(self):
+ """
+ Test fetch method with unencoded parameters, which should be encoded internally.
+
+ HTTPBin returns the args in their urldecoded form, so what we put in should be
+ the same as what we get out.
+ """
+ r = http.fetch(uri='https://httpbin.org/get',
params={'fish&chips': 'delicious'})
+ self.assertEqual(r.status, 200)
+
+ content = json.loads(r.content)
+ self.assertDictEqual(content['args'], {'fish&chips':
'delicious'})
+
+ def test_encoded_params(self):
+ """
+ Test fetch method with encoded parameters, which should be re-encoded
internally.
+
+ HTTPBin returns the args in their urldecoded form, so what we put in should be
+ the same as what we get out.
+ """
+ r = http.fetch(uri='https://httpbin.org/get',
+ params={'fish%26chips': 'delicious'})
+ self.assertEqual(r.status, 200)
+
+ content = json.loads(r.content)
+ self.assertDictEqual(content['args'], {'fish%26chips':
'delicious'})
+
+
+class DataBodyParameterTestCase(TestCase):
+ """Test that the data and body parameters of fetch/request methods are
equivalent."""
+
+ sites = {
+ 'httpbin': {
+ 'hostname': 'httpbin.org',
+ },
+ }
+
+ def test_fetch(self):
+ """Test that using the data parameter and body parameter produce
same results."""
+ r_data = http.fetch(uri='https://httpbin.org/post',
method='POST',
+ data={'fish&chips': 'delicious'})
+ r_body = http.fetch(uri='https://httpbin.org/post',
method='POST',
+ body={'fish&chips': 'delicious'})
+
+ self.assertDictEqual(json.loads(r_data.content),
+ json.loads(r_body.content))
+
+
if __name__ == '__main__': # pragma: no cover
try:
unittest.main()
--
To view, visit
https://gerrit.wikimedia.org/r/329360
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I96da6d4c719aba24d35e58dd5f0694e628be86a3
Gerrit-PatchSet: 6
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Sn1per <geofbot(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: jenkins-bot <>