jenkins-bot has submitted this change and it was merged.
Change subject: Make sure all variables passed to request are str
......................................................................
Make sure all variables passed to request are str
All parameters passed to request should be bytestrings - str. If any
of them is accidentally unicode, this will try to convert all bytestrings
to unicode via str.decode(sys.defaultencoding). This is problematic,
because non-ascii data will then throw an UnicodeDecodeError as shown in [1].
[1] http://lists.wikimedia.org/pipermail/pywikipedia-l/2013-August/008218.html
Change-Id: I1b66a31c8752dee1f950d425d532479f1b671926
---
M wikipedia.py
1 file changed, 4 insertions(+), 4 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/wikipedia.py b/wikipedia.py
index 976a310..4e3a543 100644
--- a/wikipedia.py
+++ b/wikipedia.py
@@ -6488,12 +6488,12 @@
address = address[:-1]
headers = {
- 'User-agent': useragent,
+ 'User-agent': str(useragent),
'Content-Length': str(len(data)),
- 'Content-type':contentType,
+ 'Content-type': str(contentType),
}
if cookies:
- headers['Cookie'] = cookies
+ headers['Cookie'] = str(cookies)
if compress:
headers['Accept-encoding'] = 'gzip'
@@ -6507,7 +6507,7 @@
retry_attempt = 0
while True:
try:
- request = urllib2.Request(url, data, headers)
+ request = urllib2.Request(str(url), str(data), headers)
f = MyURLopener.open(request)
# read & info can raise socket.error
--
To view, visit https://gerrit.wikimedia.org/r/80228
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I1b66a31c8752dee1f950d425d532479f1b671926
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
jenkins-bot has submitted this change and it was merged.
Change subject: hostname and paths should be str in generated family files
......................................................................
hostname and paths should be str in generated family files
All parameters passed to request should be bytestrings - str. If any
of them is accidentally unicode, this will try to convert all bytestrings
to unicode via str.decode(sys.defaultencoding). This is problematic,
because non-ascii data will then throw an UnicodeDecodeError as shown in [1].
The url will be converted to str on the postData level, but it is also good
to prevent it from being unicode in the first place.
See also: I1b66a31c8752dee1f950d425d532479f1b671926
[1] http://lists.wikimedia.org/pipermail/pywikipedia-l/2013-August/008218.html
Change-Id: I5eb13d8853b6ad08b48f40b5ad881616a2f4fd2a
---
M generate_family_file.py
1 file changed, 2 insertions(+), 2 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/generate_family_file.py b/generate_family_file.py
index 06464c9..aef4d11 100644
--- a/generate_family_file.py
+++ b/generate_family_file.py
@@ -171,7 +171,7 @@
""".lstrip() % {'url': self.base_url, 'name': self.name})
for w in self.wikis.itervalues():
- f.write(" '%(lang)s': u'%(hostname)s',\n" % {'lang': w.lang, 'hostname': urlparse(w.server).netloc})
+ f.write(" '%(lang)s': '%(hostname)s',\n" % {'lang': w.lang, 'hostname': urlparse(w.server).netloc})
f.write(" }\n\n")
@@ -182,7 +182,7 @@
f.write(" return {\n")
for w in self.wikis.itervalues():
- f.write(" '%(lang)s': u'%(path)s',\n" % {'lang': w.lang, 'path': w.scriptpath})
+ f.write(" '%(lang)s': '%(path)s',\n" % {'lang': w.lang, 'path': w.scriptpath})
f.write(" }[code]\n")
f.write("\n")
--
To view, visit https://gerrit.wikimedia.org/r/80229
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I5eb13d8853b6ad08b48f40b5ad881616a2f4fd2a
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
jenkins-bot has submitted this change and it was merged.
Change subject: Start with a fresh list in html2unicode every time
......................................................................
Start with a fresh list in html2unicode every time
def x(..., something=[]):
something.extend([1,2,3])
means something becomes [1,2,3] on the first call, but
[1,2,3,1,2,3] on the *second* call. This meant html2unicode
got a longer list of replacements every time it is called.
This commit changes it to the standard
def x(..., something=None):
if something is None:
something = []
which means it's always an empty list, instead of what's left from
the last call.
Change-Id: Ie490b575a8a0cc4b5d45bbb97c0606e0fd64d4f9
---
M wikipedia.py
1 file changed, 5 insertions(+), 2 deletions(-)
Approvals:
Ladsgroup: Looks good to me, approved
Malafaya: Checked; Looks good to me, but someone else must approve
jenkins-bot: Verified
diff --git a/wikipedia.py b/wikipedia.py
index 976a310..f304932 100644
--- a/wikipedia.py
+++ b/wikipedia.py
@@ -5657,13 +5657,16 @@
# Utility functions for parsing page titles
-def html2unicode(text, ignore = []):
+def html2unicode(text, ignore = None):
"""Return text, replacing HTML entities by equivalent unicode characters."""
+
+ if ignore is None:
+ ignore = []
# This regular expression will match any decimal and hexadecimal entity and
# also entities that might be named entities.
entityR = re.compile(
r'&(?:amp;)?(#(?P<decimal>\d+)|#x(?P<hex>[0-9a-fA-F]+)|(?P<name>[A-Za-z]+));')
-
+
ignore.extend((38, # Ampersand (&)
39, # Bugzilla 24093
60, # Less than (<)
--
To view, visit https://gerrit.wikimedia.org/r/79811
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ie490b575a8a0cc4b5d45bbb97c0606e0fd64d4f9
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: DrTrigon <dr.trigon(a)surfeu.ch>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com>
Gerrit-Reviewer: Malafaya <malafaya(a)clix.pt>
Gerrit-Reviewer: Multichill <maarten(a)mdammers.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Xqt has submitted this change and it was merged.
Change subject: Make generate_user_files.py work for other directories
......................................................................
Make generate_user_files.py work for other directories
Use os.path.abspath(__file__) instead of os.path.abspath(sys.argv[0])
Change-Id: I3172c29352f9e7bddc8de173e6057fd9172e167a
---
M generate_user_files.py
1 file changed, 1 insertion(+), 4 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/generate_user_files.py b/generate_user_files.py
index b8a0064..8f4f7dd 100644
--- a/generate_user_files.py
+++ b/generate_user_files.py
@@ -176,11 +176,8 @@
if choice in "SE":
break
- #
- # I don't like this solution. Temporary for me.
- #
# determine what directory this script (generate_user_files.py) lives in
- install = os.path.dirname(os.path.abspath(sys.argv[0]))
+ install = os.path.dirname(os.path.abspath(__file__))
# config2.py will be in the pywikibot/ directory
f = codecs.open(os.path.join(install, "pywikibot", "config2.py"),
"r", "utf-8")
--
To view, visit https://gerrit.wikimedia.org/r/79577
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I3172c29352f9e7bddc8de173e6057fd9172e167a
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Xqt has submitted this change and it was merged.
Change subject: Change title whitelist to title blacklist
......................................................................
Change title whitelist to title blacklist
Titles with characters outside the BMP [1] (>\uFFFF) are now no longer
detected as illegal. See this thread: [2]
[1] https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane
[2] http://thread.gmane.org/gmane.comp.python.pywikipediabot.general/13197/
This list of characters was generated by using the old re and by
enumerating characters:
import re
m = re.compile(u'''[^ %!\"$&'()*,\\-.\\/0-9:;=?@A-Z\\\\^_`a-z~\u0080-\uFFFF+]''')
for x in range(0,0x80):
if m.match(unichr(x)):
print "%x" % x,
0 1 2 3 4 5 6 7 8 9 a b c d e f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 23 3c 3e 5b 5d 7b 7c 7d 7f
Change-Id: I02c26be9ad814ce11d9adf2f997d3d1e05764fd1
---
M pywikibot/page.py
1 file changed, 2 insertions(+), 2 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/page.py b/pywikibot/page.py
index e51977c..58debb7 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -2853,8 +2853,8 @@
"""
illegal_titles_pattern = re.compile(
- # Matching titles will be held as illegal.
- u'''[^ %!\"$&'()*,\\-.\\/0-9:;=?@A-Z\\\\^_`a-z~\u0080-\uFFFF+]'''
+ # Matching titles will be held as illegal.
+ ur'''[\x00-\x1f\x23\x3c\x3e\x5b\x5d\x7b\x7c\x7d\x7f]'''
# URL percent encoding sequences interfere with the ability
# to round-trip titles -- you can't link to them consistently.
u'|%[0-9A-Fa-f]{2}'
--
To view, visit https://gerrit.wikimedia.org/r/78525
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I02c26be9ad814ce11d9adf2f997d3d1e05764fd1
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
jenkins-bot has submitted this change and it was merged.
Change subject: Revert "Finish implementation of must_be decorator"
......................................................................
Revert "Finish implementation of must_be decorator"
Russblau reported that something broke with this and
prevented his bot from editing. Reverting until we can
debug and figure out what went wrong.
This reverts commit 650ea9d2c319d5a58b708c87ad9048ad84491240.
Change-Id: I80dd97a75b909f9d2db2485f3681af444ebcd9b9
---
M pywikibot/site.py
1 file changed, 18 insertions(+), 38 deletions(-)
Approvals:
Legoktm: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/site.py b/pywikibot/site.py
index 943e82b..bca3feb 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -637,43 +637,28 @@
def globalusers_address(self, target='', limit=500, offset='', group=''):
raise NotImplementedError
-
def must_be(group=None,right=None):
+ """ Decorator to require a certain user status. For now, only the values
+ group = 'user' and group = 'sysop' are supported. The right property
+ will be ignored for now.
+
+ @param group: the group the logged in user should belong to
+ legal values: 'user' and 'sysop'
+ @param right: the rights the logged in user hsould have
+ not supported yet and thus ignored.
+ @returns: a decorator to make sure the requirement is statisfied when
+ the decorated function is called.
"""
- Decorator to require a certain user status.
- You can use the group and right independently or together.
- Example:
- @must_be(group='user', right='edit)
- def edit_page(...):
-
- @param group: any arbitrary group the user should belong to
- @param right: the rights the logged in user should have.
- @return: a decorator to make sure the requirement is statisfied when
- the decorated function is called.
- """
-
- if group:
- if group == 'user':
- grp = lambda self: self.login(False)
- elif group == 'sysop':
- grp = lambda self: self.login(True)
- else:
- grp = lambda self: self.has_group(group)
+ if group == 'user':
+ run = lambda self: self.login(False)
+ elif group == 'sysop':
+ run = lambda self: self.login(True)
else:
- grp = lambda self: True # No group provided
-
- if right:
- rht = lambda self: self.has_right(group)
- else:
- rht = lambda self: True # No right provided
-
- run = lambda self: grp(self) and rht(self)
+ raise Exception("Not implemented")
def decorator(fn):
def callee(self, *args, **kwargs):
- ok = run(self)
- if not ok:
- raise NoUsername('') # FIXME: Pick a better error
+ run(self)
return fn(self, *args, **kwargs)
callee.__name__ = fn.__name__
callee.__doc__ = fn.__doc__
@@ -848,11 +833,6 @@
self._loginstatus = LoginStatus.NOT_LOGGED_IN # failure
if not hasattr(self, "_siteinfo"):
self._getsiteinfo()
-
- if sysop:
- return self._loginstatus == LoginStatus.AS_SYSOP
- else:
- return self._loginstatus == LoginStatus.AS_USER
forceLogin = login # alias for backward-compatibility
@@ -2563,7 +2543,7 @@
"editconflict": "Page %(title)s not saved due to edit conflict.",
}
- @must_be(group='user', right='edit')
+ @must_be(group='user')
def editpage(self, page, summary, minor=True, notminor=False,
bot=True, recreate=True, createonly=False, watch=None):
"""Submit an edited Page object to be saved to the wiki.
@@ -2921,7 +2901,7 @@
#TODO: implement patrol
- @must_be(right='block')
+ @must_be(group='sysop')
def blockuser(self, user, expiry, reason, anononly=True, nocreate=True, autoblock=True,
noemail=False, reblock=False):
--
To view, visit https://gerrit.wikimedia.org/r/80173
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I80dd97a75b909f9d2db2485f3681af444ebcd9b9
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Legoktm <legoktm.wikipedia(a)gmail.com>
Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com>
Gerrit-Reviewer: jenkins-bot