jenkins-bot merged this change.

View Change

Approvals: Framawiki: Looks good to me, approved jenkins-bot: Verified
[IMPR] use retry-after value

- 'retry-after' from response_header is a recommended minimum number of
seconds that the client should wait before retrying. As a first step
we use this value when retrying a request due to maxlag error.
- fallback to the lag value found by lagpattern in the api warning,
fallback to 5 seconds if both values are lost.
- Retrieve the 'retry-after' value via http.request.
- Throttle.lag should not shorten the lagtime anymore because we expect
the right value from upstream. But keep the max delay.

See https://www.mediawiki.org/wiki/Manual:Maxlag_parameter

Bug: T144023
Change-Id: Ie9666ba7654ab071bfeda2be53b29c3c568e508d
---
M pywikibot/comms/http.py
M pywikibot/data/api.py
M pywikibot/throttle.py
M tests/api_tests.py
4 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/pywikibot/comms/http.py b/pywikibot/comms/http.py
index 0ac5187..995f7ff 100644
--- a/pywikibot/comms/http.py
+++ b/pywikibot/comms/http.py
@@ -305,8 +305,6 @@
r = fetch(uri, method, params, body, headers, **kwargs)
return r.text

- baseuri = site.base_url(uri)
-
kwargs.setdefault("disable_ssl_certificate_validation",
site.ignore_certificate_error())

@@ -314,11 +312,12 @@
headers = {}
format_string = None
else:
- format_string = headers.get('user-agent', None)
-
+ format_string = headers.get('user-agent')
headers['user-agent'] = user_agent(site, format_string)

+ baseuri = site.base_url(uri)
r = fetch(baseuri, method, params, body, headers, **kwargs)
+ site.throttle.retry_after = int(r.response_headers.get('retry-after', 0))
return r.text


diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py
index b9c54e0..274aba4 100644
--- a/pywikibot/data/api.py
+++ b/pywikibot/data/api.py
@@ -2077,11 +2077,12 @@

if code == "maxlag":
lag = lagpattern.search(info)
+ pywikibot.log('Pausing due to database lag: ' + info)
if lag:
- pywikibot.log(
- u"Pausing due to database lag: " + info)
- self.site.throttle.lag(int(lag.group("lag")))
- continue
+ lag = lag.group('lag')
+ self.site.throttle.lag(int(lag or 0))
+ continue
+
elif code == 'help' and self.action == 'help':
# The help module returns an error result with the complete
# API information. As this data was requested, return the
diff --git a/pywikibot/throttle.py b/pywikibot/throttle.py
index ebad441..ab56ac7 100644
--- a/pywikibot/throttle.py
+++ b/pywikibot/throttle.py
@@ -65,6 +65,7 @@
# Free the process id after this many seconds:
self.releasepid = 1200

+ self.retry_after = 0 # set by http.request
self.delay = 0
self.checktime = 0
self.multiplydelay = multiplydelay
@@ -274,18 +275,27 @@
else:
self.last_read = time.time()

- def lag(self, lagtime):
+ def lag(self, lagtime=None):
"""Seize the throttle lock due to server lag.

- This will prevent any thread from accessing this site.
+ Usually the self.retry-after value from response_header of the last
+ request if available which will be used for wait time. Default value
+ set by api and stored in self.retry_after is 5. If neither retry_after
+ nor lagtime is set, fallback to 5.

+ This method is used by api.request. It will prevent any thread from
+ accessing this site.
+
+ @param lagtime: The time to wait for the next request which is the
+ last maxlag time from api warning. This is only used as a fallback
+ if self.retry-after isn't set.
+ @type lagtime: int
"""
started = time.time()
with self.lock:
- # start at 1/2 the current server lag time
- # wait at least 5 seconds but not more than 120 seconds
- delay = min(max(5, lagtime // 2), 120)
+ waittime = self.retry_after or lagtime or 5
+ # wait not more than 120 seconds
+ delay = min(waittime, 120)
# account for any time we waited while acquiring the lock
wait = delay - (time.time() - started)
-
self.wait(wait)
diff --git a/tests/api_tests.py b/tests/api_tests.py
index 041af5f..c98b482 100644
--- a/tests/api_tests.py
+++ b/tests/api_tests.py
@@ -1084,9 +1084,10 @@
pywikibot.warning(
'Wrong api.lagpattern regex, cannot retrieve lag value')
raise e
- value = mysite.throttle._lagvalue
- self.assertIsInstance(value, int)
- self.assertGreaterEqual(value, 0)
+ self.assertIsInstance(mythrottle._lagvalue, int)
+ self.assertGreaterEqual(mythrottle._lagvalue, 0)
+ self.assertIsInstance(mythrottle.retry_after, int)
+ self.assertGreaterEqual(mythrottle.retry_after, 0)

def test_individual_patterns(self):
"""Test api.lagpattern with example patterns."""

To view, visit change 307127. To unsubscribe, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: Ie9666ba7654ab071bfeda2be53b29c3c568e508d
Gerrit-Change-Number: 307127
Gerrit-PatchSet: 17
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: Framawiki <framawiki@tools.wmflabs.org>
Gerrit-Reviewer: John Vandenberg <jayvdb@gmail.com>
Gerrit-Reviewer: Magul <tomasz.magulski@gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw@arctus.nl>
Gerrit-Reviewer: Mpaa <mpaa.wiki@gmail.com>
Gerrit-Reviewer: Multichill <maarten@mdammers.nl>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot <>