[Gerrit] Remove all ur's - change (pywikibot/core) - Pywikibot-commits

30 Nov 2013

jenkins-bot has submitted this change and it was merged.

Change subject: Remove all ur's
......................................................................


Remove all ur's

PEP 414 re-introduced the u'...' syntax, but not the ur'...' syntax. This
means we have to re-write all non-ASCII regexps in some way.

I used different methods, depending on the context.
 1) ASCII regexps, or regexp that get the unicode parts formatted (%s) in:
    --> replace ur'...' by r'...'
 2) Regexps that do not contain any backslashes
    --> replace ur'...' by u'...'
 2) Regexps with a single unicode part, e.g. ur'\{\{ä\}\}'
    --> replace by r'\{\{' + u'ä' + r'\}\}'
 4) Complicated regexps
    --> copy-paste into python 2, and copy-paste the repr() back

Change-Id: Ifd17b508baafcb30016d314852472371e47fa79c
---
M pywikibot/fixes.py
M pywikibot/page.py
M pywikibot/textlib.py
M pywikibot/titletranslate.py
M scripts/blockpageschecker.py
M scripts/category_redirect.py
M scripts/cosmetic_changes.py
M scripts/featured.py
M scripts/interwiki.py
M scripts/reflinks.py
10 files changed, 155 insertions(+), 146 deletions(-)

Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified

diff --git a/pywikibot/fixes.py b/pywikibot/fixes.py
index aafc6f0..02d6b31 100644
--- a/pywikibot/fixes.py
+++ b/pywikibot/fixes.py
@@ -122,7 +122,7 @@
             # weggelassen wegen vieler falsch Positiver: s, A, V, C, S, J, %
            
(u'(?<!\w)(\d+|\d+[\.,]\d+)(\$|€|DM|£|¥|mg|g|kg|ml|cl|l|t|ms|min|µm|mm|cm|dm|m|km|ha|°C|kB|MB|GB|TB|W|kW|MW|GW|PS|Nm|eV|kcal|mA|mV|kV|Ω|Hz|kHz|MHz|GHz|mol|Pa|Bq|Sv|mSv)(?=\W|²|³|$)',
         r'\1 \2'),
             # Temperaturangabe mit falsch gesetztem Leerzeichen
-            (u'(?<!\w)(\d+|\d+[\.,]\d+)° C(?=\W|²|³|$)',          ur'\1
°C'),
+            (u'(?<!\w)(\d+|\d+[\.,]\d+)° C(?=\W|²|³|$)',         
r'\1' + u' °C'),
             # Kein Leerzeichen nach Komma
             (u'([a-zäöüß](\]\])?,)((\[\[)?[a-zäöüA-ZÄÖÜ])',                      
                                                   r'\1 \3'),
             # Leerzeichen und Komma vertauscht
@@ -152,8 +152,8 @@
             'inside': [
                 r'<code>.*</code>',  # because of code examples
                 r'{{[Zz]itat\|.*?}}',
-                ur'{{§\|.*?}}',   # Gesetzesparagraph
-                ur'§ ?\d+[a-z]',  # Gesetzesparagraph
+                r'{{' + u'§' + r'\|.*?}}',   # Gesetzesparagraph
+                u'§' + r'?\d+[a-z]',  # Gesetzesparagraph
                 r'Ju 52/1m',  # Flugzeugbezeichnung
                 r'Ju 52/3m',  # Flugzeugbezeichnung
                 r'AH-1W',     # Hubschrauberbezeichnung
@@ -425,8 +425,17 @@
             # hyphen-minus as separator, or spaces between digits and separators.
             # Note that these regular expressions also match valid ISBNs, but
             # these won't be changed.
-            (ur'ISBN (978|979) *[\- −\.‐-―] *(\d+) *[\- −\.‐-―] *(\d+) *[\- −\.‐-―]
*(\d+) *[\- −\.‐-―] *(\d)(?!\d)', r'ISBN \1-\2-\3-\4-\5'),  # ISBN-13
-            (ur'ISBN (\d+) *[\- −\.‐-―] *(\d+) *[\- −\.‐-―] *(\d+) *[\- −\.‐-―]
*(\d|X|x)(?!\d)', r'ISBN \1-\2-\3-\4'),  # ISBN-10
+
+            # NOTE
+            # The following regexps are in u'...' format because Python 3.3 does
not support
+            # ur'...' strings. They have been converted by copy-pasting them to
Python 2.7
+            # and copying back the results.
+
+            # ur'ISBN (978|979) *[\- −\.‐-―] *(\d+) *[\- −\.‐-―] *(\d+) *[\- −\.‐-―]
*(\d+) *[\- −\.‐-―] *(\d)(?!\d)'
+            (u'ISBN (978|979) *[\\- \u2212\\.\u2010-\u2015] *(\\d+) *[\\-
\u2212\\.\u2010-\u2015] *(\\d+) *[\\- \u2212\\.\u2010-\u2015] *(\\d+) *[\\-
\u2212\\.\u2010-\u2015] *(\\d)(?!\\d)', r'ISBN \1-\2-\3-\4-\5'),  # ISBN-13
+
+            # ur'ISBN (\d+) *[\- −\.‐-―] *(\d+) *[\- −\.‐-―] *(\d+) *[\- −\.‐-―]
*(\d|X|x)(?!\d)'
+            (u'ISBN (\\d+) *[\\- \u2212\\.\u2010-\u2015] *(\\d+) *[\\-
\u2212\\.\u2010-\u2015] *(\\d+) *[\\- \u2212\\.\u2010-\u2015] *(\\d|X|x)(?!\\d)',
r'ISBN \1-\2-\3-\4'),  # ISBN-10
             # missing space before ISBN-10 or before ISBN-13,
             # or non-breaking space.
             (r'ISBN(|&nbsp;| )((\d(-?)){12}\d|(\d(-?)){9}[\dXx])', r'ISBN
\2'),
@@ -453,96 +462,96 @@
         },
         'replacements': [
             #(u' ,', u' ،'), #FIXME: Do not replace comma in non-Arabic
text, interwiki, image links or <math> syntax.
-            (ur'\bإمرأة\b', u'امرأة'),
-            (ur'\bالى\b', ur'إلى'),
-            (ur'\bإسم\b', u'اسم'),
-            (ur'\bالأن\b', u'الآن'),
-            (ur'\bالة\b', u'آلة'),
-            (ur'\bفى\b', u'في'),
-            (ur'\bإبن\b', u'ابن'),
-            (ur'\bإبنة\b', u'ابنة'),
-            (ur'\bإقتصاد\b', u'اقتصاد'),
-            (ur'\bإجتماع\b', u'اجتماع'),
-            (ur'\bانجيل\b', u'إنجيل'),
-            (ur'\bاجماع\b', u'إجماع'),
-            (ur'\bاكتوبر\b', u'أكتوبر'),
-            (ur'\bإستخراج\b', u'استخراج'),
-            (ur'\bإستعمال\b', u'استعمال'),
-            (ur'\bإستبدال\b', u'استبدال'),
-            (ur'\bإشتراك\b', u'اشتراك'),
-            (ur'\bإستعادة\b', u'استعادة'),
-            (ur'\bإستقلال\b', u'استقلال'),
-            (ur'\bإنتقال\b', u'انتقال'),
-            (ur'\bإتحاد\b', u'اتحاد'),
-            (ur'\bاملاء\b', u'إملاء'),
-            (ur'\bإستخدام\b', u'استخدام'),
-            (ur'\bأحدى\b', u'إحدى'),
-            (ur'\bلاكن\b', u'لكن'),
-            (ur'\bإثنان\b', u'اثنان'),
-            (ur'\bإحتياط\b', u'احتياط'),
-            (ur'\bإقتباس\b', u'اقتباس'),
-            (ur'\bادارة\b', u'إدارة'),
-            (ur'\bابناء\b', u'أبناء'),
-            (ur'\bالانصار\b', u'الأنصار'),
-            (ur'\bاشارة\b', u'إشارة'),
-            (ur'\bإقرأ\b', u'اقرأ'),
-            (ur'\bإمتياز\b', u'امتياز'),
-            (ur'\bارق\b', u'أرق'),
-            (ur'\bاللة\b', u'الله'),
-            (ur'\bإختبار\b', u'اختبار'),
-            (ur'==[ ]?روابط خارجية[ ]?==', u'== وصلات خارجية =='),
-            (ur'\bارسال\b', u'إرسال'),
-            (ur'\bإتصالات\b', u'اتصالات'),
-            (ur'\bابو\b', u'أبو'),
-            (ur'\bابا\b', u'أبا'),
-            (ur'\bاخو\b', u'أخو'),
-            (ur'\bاخا\b', u'أخا'),
-            (ur'\bاخي\b', u'أخي'),
-            (ur'\bاحد\b', u'أحد'),
-            (ur'\bاربعاء\b', u'أربعاء'),
-            (ur'\bاول\b', u'أول'),
-            (ur'\b(ال|)اهم\b', ur'\1أهم'),
-            (ur'\b(ال|)اثقل\b', ur'\1أثقل'),
-            (ur'\b(ال|)امجد\b', ur'\1أمجد'),
-            (ur'\b(ال|)اوسط\b', ur'\1أوسط'),
-            (ur'\b(ال|)اشقر\b', ur'\1أشقر'),
-            (ur'\b(ال|)انور\b', ur'\1أنور'),
-            (ur'\b(ال|)اصعب\b', ur'\1أصعب'),
-            (ur'\b(ال|)اسهل\b', ur'\1أسهل'),
-            (ur'\b(ال|)اجمل\b', ur'\1أجمل'),
-            (ur'\b(ال|)اقبح\b', ur'\1أقبح'),
-            (ur'\b(ال|)اطول\b', ur'\1أطول'),
-            (ur'\b(ال|)اقصر\b', ur'\1أقصر'),
-            (ur'\b(ال|)اسمن\b', ur'\1أسمن'),
-            (ur'\b(ال|)اذكى\b', ur'\1أذكى'),
-            (ur'\b(ال|)اكثر\b', ur'\1أكثر'),
-            (ur'\b(ال|)افضل\b', ur'\1أفضل'),
-            (ur'\b(ال|)اكبر\b', ur'\1أكبر'),
-            (ur'\b(ال|)اشهر\b', ur'\1أشهر'),
-            (ur'\b(ال|)ابطأ\b', ur'\1أبطأ'),
-            (ur'\b(ال|)اماني\b', ur'\1أماني'),
-            (ur'\b(ال|)احلام\b', ur'\1أحلام'),
-            (ur'\b(ال|)اسماء\b', ur'\1أسماء'),
-            (ur'\b(ال|)اسامة\b', ur'\1أسامة'),
-            (ur'\bابراهيم\b', u'إبراهيم'),
-            (ur'\bاسماعيل\b', u'إسماعيل'),
-            (ur'\bايوب\b', u'أيوب'),
-            (ur'\bايمن\b', u'أيمن'),
-            (ur'\bاوزبكستان\b', u'أوزبكستان'),
-            (ur'\bاذربيجان\b', u'أذربيجان'),
-            (ur'\bافغانستان\b', u'أفغانستان'),
-            (ur'\bانجلترا\b', u'إنجلترا'),
-            (ur'\bايطاليا\b', u'إيطاليا'),
-            (ur'\bاوربا\b', u'أوروبا'),
-            (ur'\bأوربا\b', u'أوروبا'),
-            (ur'\bاوغندة\b', u'أوغندة'),
-            (ur'\b(ال|)ا(لماني|فريقي|سترالي)(ا|ة|تان|ان|ين|ي|ون|و|ات|)\b',
ur'\1أ\2\3'),
-            (ur'\b(ال|)ا(وروب|مريك)(ا|ي|ية|يتان|يان|يين|يي|يون|يو|يات|)\b',
ur'\1أ\2\3'),
-           
(ur'\b(ال|)ا(ردن|رجنتين|وغند|سبان|وكران|فغان)(ي|ية|يتان|يان|يين|يي|يون|يو|يات|)\b',
ur'\1أ\2\3'),
-           
(ur'\b(ال|)ا(سرائيل|يران|مارات|نكليز|نجليز)(ي|ية|يتان|يان|يين|يي|يون|يو|يات|)\b',
ur'\1إ\2\3'),
-           
(ur'\b(ال|)(ا|أ)(رثوذكس|رثوذوكس)(ي|ية|يتان|يان|يين|يي|يون|يو|يات|)\b',
ur'\1أرثوذكس\4'),
-           
(ur'\bإست(عمل|خدم|مر|مد|مال|عاض|قام|حال|جاب|قال|زاد|عان|طال)(ت|ا|وا|)\b',
ur'است\1\2'),
-            (ur'\bإست(حال|قال|طال|زاد|عان|قام|راح|جاب|عاض|مال)ة\b',
ur'است\1ة'),
+            (r'\b' + u'إمرأة' + r'\b', u'امرأة'),
+            (r'\b' + u'الى' + r'\b', u'إلى'),
+            (r'\b' + u'إسم' + r'\b', u'اسم'),
+            (r'\b' + u'الأن' + r'\b', u'الآن'),
+            (r'\b' + u'الة' + r'\b', u'آلة'),
+            (r'\b' + u'فى' + r'\b', u'في'),
+            (r'\b' + u'إبن' + r'\b', u'ابن'),
+            (r'\b' + u'إبنة' + r'\b', u'ابنة'),
+            (r'\b' + u'إقتصاد' + r'\b', u'اقتصاد'),
+            (r'\b' + u'إجتماع' + r'\b', u'اجتماع'),
+            (r'\b' + u'انجيل' + r'\b', u'إنجيل'),
+            (r'\b' + u'اجماع' + r'\b', u'إجماع'),
+            (r'\b' + u'اكتوبر' + r'\b', u'أكتوبر'),
+            (r'\b' + u'إستخراج' + r'\b', u'استخراج'),
+            (r'\b' + u'إستعمال' + r'\b', u'استعمال'),
+            (r'\b' + u'إستبدال' + r'\b', u'استبدال'),
+            (r'\b' + u'إشتراك' + r'\b', u'اشتراك'),
+            (r'\b' + u'إستعادة' + r'\b', u'استعادة'),
+            (r'\b' + u'إستقلال' + r'\b', u'استقلال'),
+            (r'\b' + u'إنتقال' + r'\b', u'انتقال'),
+            (r'\b' + u'إتحاد' + r'\b', u'اتحاد'),
+            (r'\b' + u'املاء' + r'\b', u'إملاء'),
+            (r'\b' + u'إستخدام' + r'\b', u'استخدام'),
+            (r'\b' + u'أحدى' + r'\b', u'إحدى'),
+            (r'\b' + u'لاكن' + r'\b', u'لكن'),
+            (r'\b' + u'إثنان' + r'\b', u'اثنان'),
+            (r'\b' + u'إحتياط' + r'\b', u'احتياط'),
+            (r'\b' + u'إقتباس' + r'\b', u'اقتباس'),
+            (r'\b' + u'ادارة' + r'\b', u'إدارة'),
+            (r'\b' + u'ابناء' + r'\b', u'أبناء'),
+            (r'\b' + u'الانصار' + r'\b', u'الأنصار'),
+            (r'\b' + u'اشارة' + r'\b', u'إشارة'),
+            (r'\b' + u'إقرأ' + r'\b', u'اقرأ'),
+            (r'\b' + u'إمتياز' + r'\b', u'امتياز'),
+            (r'\b' + u'ارق' + r'\b', u'أرق'),
+            (r'\b' + u'اللة' + r'\b', u'الله'),
+            (r'\b' + u'إختبار' + r'\b', u'اختبار'),
+            (u'==[ ]?روابط خارجية[ ]?==', u'== وصلات خارجية =='),
+            (r'\b' + u'ارسال' + r'\b', u'إرسال'),
+            (r'\b' + u'إتصالات' + r'\b', u'اتصالات'),
+            (r'\b' + u'ابو' + r'\b', u'أبو'),
+            (r'\b' + u'ابا' + r'\b', u'أبا'),
+            (r'\b' + u'اخو' + r'\b', u'أخو'),
+            (r'\b' + u'اخا' + r'\b', u'أخا'),
+            (r'\b' + u'اخي' + r'\b', u'أخي'),
+            (r'\b' + u'احد' + r'\b', u'أحد'),
+            (r'\b' + u'اربعاء' + r'\b', u'أربعاء'),
+            (r'\b' + u'اول' + r'\b', u'أول'),
+            (r'\b' + u'(ال|)اهم' + r'\b', u'\\1أهم'),
+            (r'\b' + u'(ال|)اثقل' + r'\b', u'\\1أثقل'),
+            (r'\b' + u'(ال|)امجد' + r'\b', u'\\1أمجد'),
+            (r'\b' + u'(ال|)اوسط' + r'\b', u'\\1أوسط'),
+            (r'\b' + u'(ال|)اشقر' + r'\b', u'\\1أشقر'),
+            (r'\b' + u'(ال|)انور' + r'\b', u'\\1أنور'),
+            (r'\b' + u'(ال|)اصعب' + r'\b', u'\\1أصعب'),
+            (r'\b' + u'(ال|)اسهل' + r'\b', u'\\1أسهل'),
+            (r'\b' + u'(ال|)اجمل' + r'\b', u'\\1أجمل'),
+            (r'\b' + u'(ال|)اقبح' + r'\b', u'\\1أقبح'),
+            (r'\b' + u'(ال|)اطول' + r'\b', u'\\1أطول'),
+            (r'\b' + u'(ال|)اقصر' + r'\b', u'\\1أقصر'),
+            (r'\b' + u'(ال|)اسمن' + r'\b', u'\\1أسمن'),
+            (r'\b' + u'(ال|)اذكى' + r'\b', u'\\1أذكى'),
+            (r'\b' + u'(ال|)اكثر' + r'\b', u'\\1أكثر'),
+            (r'\b' + u'(ال|)افضل' + r'\b', u'\\1أفضل'),
+            (r'\b' + u'(ال|)اكبر' + r'\b', u'\\1أكبر'),
+            (r'\b' + u'(ال|)اشهر' + r'\b', u'\\1أشهر'),
+            (r'\b' + u'(ال|)ابطأ' + r'\b', u'\\1أبطأ'),
+            (r'\b' + u'(ال|)اماني' + r'\b',
u'\\1أماني'),
+            (r'\b' + u'(ال|)احلام' + r'\b',
u'\\1أحلام'),
+            (r'\b' + u'(ال|)اسماء' + r'\b',
u'\\1أسماء'),
+            (r'\b' + u'(ال|)اسامة' + r'\b',
u'\\1أسامة'),
+            (r'\b' + u'ابراهيم' + r'\b', u'إبراهيم'),
+            (r'\b' + u'اسماعيل' + r'\b', u'إسماعيل'),
+            (r'\b' + u'ايوب' + r'\b', u'أيوب'),
+            (r'\b' + u'ايمن' + r'\b', u'أيمن'),
+            (r'\b' + u'اوزبكستان' + r'\b',
u'أوزبكستان'),
+            (r'\b' + u'اذربيجان' + r'\b', u'أذربيجان'),
+            (r'\b' + u'افغانستان' + r'\b',
u'أفغانستان'),
+            (r'\b' + u'انجلترا' + r'\b', u'إنجلترا'),
+            (r'\b' + u'ايطاليا' + r'\b', u'إيطاليا'),
+            (r'\b' + u'اوربا' + r'\b', u'أوروبا'),
+            (r'\b' + u'أوربا' + r'\b', u'أوروبا'),
+            (r'\b' + u'اوغندة' + r'\b', u'أوغندة'),
+            (r'\b' +
u'(ال|)ا(لماني|فريقي|سترالي)(ا|ة|تان|ان|ين|ي|ون|و|ات|)' + r'\b',
u'\\1أ\\2\\3'),
+            (r'\b' +
u'(ال|)ا(وروب|مريك)(ا|ي|ية|يتان|يان|يين|يي|يون|يو|يات|)' + r'\b',
u'\\1أ\\2\\3'),
+            (r'\b' +
u'(ال|)ا(ردن|رجنتين|وغند|سبان|وكران|فغان)(ي|ية|يتان|يان|يين|يي|يون|يو|يات|)' +
r'\b', u'\\1أ\\2\\3'),
+            (r'\b' +
u'(ال|)ا(سرائيل|يران|مارات|نكليز|نجليز)(ي|ية|يتان|يان|يين|يي|يون|يو|يات|)' +
r'\b', u'\\1إ\\2\\3'),
+            (r'\b' +
u'(ال|)(ا|أ)(رثوذكس|رثوذوكس)(ي|ية|يتان|يان|يين|يي|يون|يو|يات|)' + r'\b',
u'\\1أرثوذكس\\4'),
+            (r'\b' +
u'إست(عمل|خدم|مر|مد|مال|عاض|قام|حال|جاب|قال|زاد|عان|طال)(ت|ا|وا|)' +
r'\b', u'است\\1\\2'),
+            (r'\b' + u'إست(حال|قال|طال|زاد|عان|قام|راح|جاب|عاض|مال)ة' +
r'\b', u'است\\1ة'),
         ],
         'exceptions': {
             'inside-tags': [
diff --git a/pywikibot/page.py b/pywikibot/page.py
index 1db43d3..9b6a6db 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -3046,7 +3046,7 @@
     """
     illegal_titles_pattern = re.compile(
         # Matching titles will be held as illegal.
-        ur'''[\x00-\x1f\x23\x3c\x3e\x5b\x5d\x7b\x7c\x7d\x7f]'''
+        r'''[\x00-\x1f\x23\x3c\x3e\x5b\x5d\x7b\x7c\x7d\x7f]'''
         # URL percent encoding sequences interfere with the ability
         # to round-trip titles -- you can't link to them consistently.
         u'|%[0-9A-Fa-f]{2}'
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index b4eb950..80ddf86 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -96,9 +96,9 @@
         # Module invocations (currently only Lua)
         'invoke':       re.compile(r'(?i)\{\{\s*#invoke:.*?}\}'),
         # categories
-        'category':     re.compile(ur'\[\[ *(?:%s)\s*:.*?\]\]' %
ur'|'.join(site.namespace(14, all=True))),
+        'category':     re.compile(u'\[\[ *(?:%s)\s*:.*?\]\]' %
u'|'.join(site.namespace(14, all=True))),
         #files
-        'file':         re.compile(ur'\[\[ *(?:%s)\s*:.*?\]\]' %
ur'|'.join(site.namespace(6, all=True))),
+        'file':         re.compile(u'\[\[ *(?:%s)\s*:.*?\]\]' %
u'|'.join(site.namespace(6, all=True))),
 
     }
 
@@ -947,12 +947,12 @@
     marker4 = findmarker(thistxt, u'§§', u'§')
 
     result = []
-    Rmath = re.compile(ur'<math>[^<]+</math>')
+    Rmath = re.compile(r'<math>[^<]+</math>')
     Rvalue = re.compile(r'{{{.+?}}}')
-    Rmarker1 = re.compile(ur'%s(\d+)%s' % (marker1, marker1))
-    Rmarker2 = re.compile(ur'%s(\d+)%s' % (marker2, marker2))
-    Rmarker3 = re.compile(ur'%s(\d+)%s' % (marker3, marker3))
-    Rmarker4 = re.compile(ur'%s(\d+)%s' % (marker4, marker4))
+    Rmarker1 = re.compile(r'%s(\d+)%s' % (marker1, marker1))
+    Rmarker2 = re.compile(r'%s(\d+)%s' % (marker2, marker2))
+    Rmarker3 = re.compile(r'%s(\d+)%s' % (marker3, marker3))
+    Rmarker4 = re.compile(r'%s(\d+)%s' % (marker4, marker4))
 
     # Replace math with markers
     maths = {}
diff --git a/pywikibot/titletranslate.py b/pywikibot/titletranslate.py
index db05833..9e1d15d 100644
--- a/pywikibot/titletranslate.py
+++ b/pywikibot/titletranslate.py
@@ -56,7 +56,7 @@
                     newname = page.title()
                 # ... unless we do want brackets
                 if removebrackets:
-                    newname = re.sub(re.compile(ur"\W*?\(.*?\)\W*?",
re.UNICODE), u" ", newname)
+                    newname = re.sub(re.compile(r"\W*?\(.*?\)\W*?",
re.UNICODE), u" ", newname)
             try:
                 number = int(codes)
                 codes = site.family.languages_by_size[:number]
diff --git a/scripts/blockpageschecker.py b/scripts/blockpageschecker.py
index d6a5677..fa0a65c 100755
--- a/scripts/blockpageschecker.py
+++ b/scripts/blockpageschecker.py
@@ -85,8 +85,8 @@
     'en': None,
     'it': [r'\{\{(?:[Tt]emplate:|)[Aa]vvisobloccoparziale(?:|[
_]scad\|.*?|\|.*?)\}\}',
            r'\{\{(?:[Tt]emplate:|)[Aa]bp(?:|[ _]scad\|(?:.*?))\}\}'],
-    'fr': [ur'\{\{(?:[Tt]emplate:|[Mm]odèle:|)[Ss]emi[-
]?protection(|[^\}]*)\}\}'],
-    'ja':
[ur'(?<!\<nowiki\>)\{\{(?:[Tt]emplate:|)半保護(?:[Ss]|)(?:\|.+|)\}\}(?!\<\/nowiki\>)\s*(?:\r\n|)*'],
+    'fr': [r'\{\{(?:[Tt]emplate:|' + u'[Mm]odèle:' +
r'|)[Ss]emi[- ]?protection(|[^\}]*)\}\}'],
+    'ja': [r'(?<!\<nowiki\>)\{\{(?:[Tt]emplate:|)' +
u'半保護' + r'(?:[Ss]|)(?:\|.+|)\}\}(?!\<\/nowiki\>)\s*(?:\r\n|)*'],
 }
 # Regex to get the total-protection template
 templateTotalProtection = {
@@ -94,23 +94,23 @@
     'it': [r'\{\{(?:[Tt]emplate:|)[Aa]vvisoblocco(?:|[
_]scad\|(?:.*?)|minaccia|cancellata)\}\}',
            r'\{\{(?:[Tt]emplate:|)(?:[Cc][Tt]|[Cc]anc fatte|[Cc][Ee])\}\}',
            r'<div class="toccolours[ _]itwiki[ _]template[
_]avviso">(?:\s|\n)*?[Qq]uesta pagina'],
-    'fr':
[ur'\{\{(?:[Tt]emplate:|[Mm]odèle:|)[Pp]rotection(|[^\}]*)\}\}',
-           ur'\{\{(?:[Tt]emplate:|[Mm]odèle:|)(?:[Pp]age|[Aa]rchive|[Mm]odèle)
protégée?(|[^\}]*)\}\}'],
-    'ja':
[ur'(?<!\<nowiki\>)\{\{(?:[Tt]emplate:|)保護(?:性急|)(?:[Ss]|)(?:\|.+|)\}\}(?!\<\/nowiki\>)\s*(?:\r\n|)*'],
+    'fr': [r'\{\{(?:[Tt]emplate:|' + u'[Mm]odèle:' +
r'|)[Pp]rotection(|[^\}]*)\}\}',
+           r'\{\{(?:[Tt]emplate:|' + u'[Mm]odèle:' +
r'|)(?:[Pp]age|[Aa]rchive|' + u'[Mm]odèle) protégée' +
r'?(|[^\}]*)\}\}'],
+    'ja': [r'(?<!\<nowiki\>)\{\{(?:[Tt]emplate:|)' +
u'保護(?:性急|)' +
r'(?:[Ss]|)(?:\|.+|)\}\}(?!\<\/nowiki\>)\s*(?:\r\n|)*'],
 }
 
 # Regex to get the semi-protection move template
 templateSemiMoveProtection = {
     'en': None,
     'it': [r'\{\{(?:[Tt]emplate:|)[Aa]vvisobloccospostamento(?:|[
_]scad\|.*?|\|.*?)\}\}'],
-    'ja':
[ur'(?<!\<nowiki\>)\{\{(?:[Tt]emplate:|)移動半保護(?:[Ss]|)(?:\|.+|)\}\}(?!\<\/nowiki\>)\s*(?:\r\n|)*'],
+    'ja': [r'(?<!\<nowiki\>)\{\{(?:[Tt]emplate:|)' +
u'移動半保護' + r'(?:[Ss]|)(?:\|.+|)\}\}(?!\<\/nowiki\>)\s*(?:\r\n|)*'],
 }
 
 # Regex to get the total-protection move template
 templateTotalMoveProtection = {
     'en': None,
     'it': [r'\{\{(?:[Tt]emplate:|)[Aa]vvisobloccospostamento(?:|[
_]scad\|.*?|\|.*?)\}\}'],
-    'ja':
[ur'(?<!\<nowiki\>)\{\{(?:[Tt]emplate:|)移動保護(?:[Ss]|)(?:\|.+|)\}\}(?!\<\/nowiki\>)\s*(?:\r\n|)*'],
+    'ja': [r'(?<!\<nowiki\>)\{\{(?:[Tt]emplate:|)' +
u'移動保護' + r'(?:[Ss]|)(?:\|.+|)\}\}(?!\<\/nowiki\>)\s*(?:\r\n|)*'],
 }
 
 # If you use only one template for all the type of protection, put it here.
diff --git a/scripts/category_redirect.py b/scripts/category_redirect.py
index a6f54c3..5f8a5bd 100755
--- a/scripts/category_redirect.py
+++ b/scripts/category_redirect.py
@@ -257,15 +257,15 @@
         #  note that any templates containing optional "category:" are
         #  incorrect and will be fixed by the bot
         template_regex = re.compile(
-            ur"""{{\s*(?:%(prefix)s\s*:\s*)?  # optional
"template:"
-                      (?:%(template)s)\s*\|   # catredir template name
-                      (\s*%(catns)s\s*:\s*)?  # optional "category:"
-                      ([^|}]+)                # redirect target cat
-                      (?:\|[^|}]*)*}}         # optional arguments 2+, ignored
-              """ % {'prefix': self.site.namespace(10).lower(),
-                     'template': "|".join(item.replace(" ",
"[ _]+")
-                                          for item in template_list),
-                     'catns': self.site.namespace(14)},
+            r"""{{\s*(?:%(prefix)s\s*:\s*)?  # optional
"template:"
+                     (?:%(template)s)\s*\|   # catredir template name
+                     (\s*%(catns)s\s*:\s*)?  # optional "category:"
+                     ([^|}]+)                # redirect target cat
+                     (?:\|[^|}]*)*}}         # optional arguments 2+, ignored
+             """ % {'prefix': self.site.namespace(10).lower(),
+                    'template': "|".join(item.replace(" ",
"[ _]+")
+                                         for item in template_list),
+                    'catns': self.site.namespace(14)},
             re.I | re.X)
 
         # check for hard-redirected categories that are not already marked
diff --git a/scripts/cosmetic_changes.py b/scripts/cosmetic_changes.py
index 557873e..a99b8db 100755
--- a/scripts/cosmetic_changes.py
+++ b/scripts/cosmetic_changes.py
@@ -697,27 +697,27 @@
         # convert prettytable to wikitable class
         if self.site.language in ('de', 'en'):
             text = pywikibot.replaceExcept(text,
-                                          
ur'(class="[^"]*)prettytable([^"]*")',
-                                           ur'\1wikitable\2', exceptions)
+                                          
r'(class="[^"]*)prettytable([^"]*")',
+                                           r'\1wikitable\2', exceptions)
         return text
 
     def fixTypo(self, text):
         exceptions = ['nowiki', 'comment', 'math', 'pre',
'source',
                       'startspace', 'gallery', 'hyperlink',
'interwiki', 'link']
         # change <number> ccm -> <number> cm³
-        text = pywikibot.replaceExcept(text, ur'(\d)\s*&nbsp;ccm',
-                                       ur'\1&nbsp;cm³', exceptions)
-        text = pywikibot.replaceExcept(text, ur'(\d)\s*ccm',
ur'\1&nbsp;cm³',
+        text = pywikibot.replaceExcept(text, r'(\d)\s*&nbsp;ccm',
+                                       r'\1&nbsp;' + u'cm³',
exceptions)
+        text = pywikibot.replaceExcept(text, r'(\d)\s*ccm',
r'\1&nbsp;' + u'cm³',
                                        exceptions)
         # Solve wrong Nº sign with °C or °F
         # additional exception requested on fr-wiki for this stuff
         pattern = re.compile(u'«.*?»', re.UNICODE)
         exceptions.append(pattern)
-        text = pywikibot.replaceExcept(text, ur'(\d)\s*&nbsp;[º°]([CF])',
-                                       ur'\1&nbsp;°\2', exceptions)
-        text = pywikibot.replaceExcept(text, ur'(\d)\s*[º°]([CF])',
-                                       ur'\1&nbsp;°\2', exceptions)
-        text = pywikibot.replaceExcept(text, ur'º([CF])', ur'°\1',
exceptions)
+        text = pywikibot.replaceExcept(text, r'(\d)\s*&nbsp;' +
u'[º°]([CF])',
+                                       r'\1&nbsp;' + u'°' +
r'\2', exceptions)
+        text = pywikibot.replaceExcept(text, r'(\d)\s*' + u'[º°]([CF])',
+                                       r'\1&nbsp;' + u'°' +
r'\2', exceptions)
+        text = pywikibot.replaceExcept(text, u'º([CF])', u'°' +
r'\1', exceptions)
         return text
 
     def fixArabicLetters(self, text):
@@ -756,12 +756,12 @@
         text = pywikibot.replaceExcept(text, u',', u'،', exceptions)
         if self.site.lang == 'ckb':
             text = pywikibot.replaceExcept(text,
-                                           ur'ه([.،_<\]\s])',
-                                           ur'ە\1', exceptions)
+                                           u'\u0647([.\u060c_<\\]\\s])',
+                                           u'\u06d5\\1', exceptions)
             text = pywikibot.replaceExcept(text, u'ه‌', u'ە',
exceptions)
             text = pywikibot.replaceExcept(text, u'ه', u'ھ', exceptions)
         text = pywikibot.replaceExcept(text, u'ك', u'ک', exceptions)
-        text = pywikibot.replaceExcept(text, ur'[ىي]', u'ی', exceptions)
+        text = pywikibot.replaceExcept(text, u'[ىي]', u'ی', exceptions)
         return text
         # replace persian/arabic digits
         ## deactivated due to bug #3539407
diff --git a/scripts/featured.py b/scripts/featured.py
index bffea39..b81bbb6 100644
--- a/scripts/featured.py
+++ b/scripts/featured.py
@@ -519,7 +519,7 @@
         def compile_link(site, templates):
             """compile one link template list"""
             findtemplate = '(%s)' % '|'.join(templates)
-            return re.compile(ur"\{\{%s\|%s\}\}"
+            return re.compile(r"\{\{%s\|%s\}\}"
                               % (findtemplate.replace(u' ', u'[ _]'),
                                  site.code), re.IGNORECASE)
 
diff --git a/scripts/interwiki.py b/scripts/interwiki.py
index 74fd13b..9a91417 100755
--- a/scripts/interwiki.py
+++ b/scripts/interwiki.py
@@ -516,7 +516,7 @@
             f = codecs.open(hintfilename, 'r', config.textfile_encoding)
 
             # hint or title ends either before | or before ]]
-            R = re.compile(ur'\[\[(.+?)(?:\]\]|\|)')
+            R = re.compile(r'\[\[(.+?)(?:\]\]|\|)')
             for pageTitle in R.findall(f.read()):
                 self.hints.append(pageTitle)
             f.close()
diff --git a/scripts/reflinks.py b/scripts/reflinks.py
index 6a024cd..f95c4ef 100644
--- a/scripts/reflinks.py
+++ b/scripts/reflinks.py
@@ -103,14 +103,14 @@
 
 
 soft404 = re.compile(
-    ur'\D404(\D|\Z)|error|errdoc|Not.{0,3}Found|sitedown|eventlog',
+    r'\D404(\D|\Z)|error|errdoc|Not.{0,3}Found|sitedown|eventlog',
     re.IGNORECASE)
 # matches an URL at the index of a website
 dirIndex = re.compile(
-   
ur'^\w+://[^/]+/((default|index)\.(asp|aspx|cgi|htm|html|phtml|mpx|mspx|php|shtml|var))?$',
+   
r'^\w+://[^/]+/((default|index)\.(asp|aspx|cgi|htm|html|phtml|mpx|mspx|php|shtml|var))?$',
     re.IGNORECASE)
 # Extracts the domain name
-domain = re.compile(ur'^(\w+)://(?:www.|)([^/]+)')
+domain = re.compile(r'^(\w+)://(?:www.|)([^/]+)')
 
 globalbadtitles = """
 # is
@@ -162,11 +162,11 @@
 # Regex that match bare references
 linksInRef = re.compile(
     # bracketed URLs
-   
ur'(?i)<ref(?P<name>[^>]*)>\s*\[?(?P<url>(?:http|https|ftp)://(?:'
+
+   
r'(?i)<ref(?P<name>[^>]*)>\s*\[?(?P<url>(?:http|https|ftp)://(?:'
+
     # unbracketed with()
-   
ur'^\[\]\s<>"]+\([^\[\]\s<>"]+[^\[\]\s\.:;\\,<>\?"]+|'
+
+   
r'^\[\]\s<>"]+\([^\[\]\s<>"]+[^\[\]\s\.:;\\,<>\?"]+|'
+
     # unbracketed without ()
-   
ur'[^\[\]\s<>"]+[^\[\]\s\)\.:;\\,<>\?"]+|[^\[\]\s<>"]+))[!?,\s]*\]?\s*</ref>')
+   
r'[^\[\]\s<>"]+[^\[\]\s\)\.:;\\,<>\?"]+|[^\[\]\s<>"]+))[!?,\s]*\]?\s*</ref>')
 
 # Download this file :
 # http://www.twoevils.org/files/wikipedia/404-links.txt.gz
@@ -436,18 +436,18 @@
             raise
 
         # Regex to grasp content-type meta HTML tag in HTML source
-        self.META_CONTENT =
re.compile(ur'(?i)<meta[^>]*content\-type[^>]*>')
+        self.META_CONTENT =
re.compile(r'(?i)<meta[^>]*content\-type[^>]*>')
         # Extract the encoding from a charset property (from content-type !)
-        self.CHARSET =
re.compile(ur'(?i)charset\s*=\s*(?P<enc>[^\'";>/]*)')
+        self.CHARSET =
re.compile(r'(?i)charset\s*=\s*(?P<enc>[^\'";>/]*)')
         # Extract html title from page
-        self.TITLE =
re.compile(ur'(?is)(?<=<title>).*?(?=</title>)')
+        self.TITLE =
re.compile(r'(?is)(?<=<title>).*?(?=</title>)')
         # Matches content inside <script>/<style>/HTML comments
         self.NON_HTML = re.compile(
-           
ur'(?is)<script[^>]*>.*?</script>|<style[^>]*>.*?</style>|<!--.*?-->|<!\[CDATA\[.*?\]\]>')
+           
r'(?is)<script[^>]*>.*?</script>|<style[^>]*>.*?</style>|<!--.*?-->|<!\[CDATA\[.*?\]\]>')
 
         # Authorized mime types for HTML pages
         self.MIME = re.compile(
-            ur'application/(?:xhtml\+xml|xml)|text/(?:ht|x)ml')
+            r'application/(?:xhtml\+xml|xml)|text/(?:ht|x)ml')
 
     def put_page(self, page, new):
         """ Prints diffs between orginal and new (text), puts new text for
page

-- 
To view, visit https://gerrit.wikimedia.org/r/98277
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ifd17b508baafcb30016d314852472371e47fa79c
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Merlijn van Deen &lt;valhallasw(a)arctus.nl&gt;
Gerrit-Reviewer: Ladsgroup &lt;ladsgroup(a)gmail.com&gt;
Gerrit-Reviewer: Xqt &lt;info(a)gno.de&gt;
Gerrit-Reviewer: jenkins-bot