Xqt submitted this change.

View Change


Approvals: Xqt: Verified; Looks good to me, approved
Use re.fullmatch instead of $ at the end of regex

Change-Id: I4da10e242518c69d86416e4682de260be905f5d9
---
M pywikibot/tools/__init__.py
M tests/uploadscript_tests.py
M pywikibot/family.py
M scripts/upload.py
M pywikibot/time.py
M pywikibot/site/_siteinfo.py
M scripts/dataextend.py
M scripts/reflinks.py
8 files changed, 49 insertions(+), 38 deletions(-)

diff --git a/pywikibot/family.py b/pywikibot/family.py
index 7f57190..981250b 100644
--- a/pywikibot/family.py
+++ b/pywikibot/family.py
@@ -6,7 +6,6 @@
#
import collections
import logging
-import re
import string
import sys
import types
@@ -555,7 +554,7 @@
which would work with the given URL.
"""
parsed = urlparse.urlparse(url)
- if not re.match('(https?)?$', parsed.scheme):
+ if parsed.scheme not in {'http', 'https', ''}:
return None

path = parsed.path
diff --git a/pywikibot/site/_siteinfo.py b/pywikibot/site/_siteinfo.py
index 3e3afac..4eb1293 100644
--- a/pywikibot/site/_siteinfo.py
+++ b/pywikibot/site/_siteinfo.py
@@ -29,7 +29,7 @@
"""

WARNING_REGEX = re.compile(r'Unrecognized values? for parameter '
- r'["\']siprop["\']: (.+?)\.?$')
+ r'["\']siprop["\']: (.+?)\.?')

# Until we get formatversion=2, we have to convert empty-string properties
# into booleans so they are easier to use.
@@ -139,7 +139,7 @@
"""
def warn_handler(mod, message) -> bool:
"""Return True if the warning is handled."""
- matched = Siteinfo.WARNING_REGEX.match(message)
+ matched = Siteinfo.WARNING_REGEX.fullmatch(message)
if mod == 'siteinfo' and matched:
invalid_properties.extend(
prop.strip() for prop in matched[1].split(','))
diff --git a/pywikibot/time.py b/pywikibot/time.py
index e96a5f7..f760456 100644
--- a/pywikibot/time.py
+++ b/pywikibot/time.py
@@ -114,8 +114,8 @@

.. versionadded:: 7.5
"""
- RE_MW = r'\d{14}$' # noqa: N806
- m = re.match(RE_MW, timestr)
+ RE_MW = r'\d{14}' # noqa: N806
+ m = re.fullmatch(RE_MW, timestr)

if not m:
msg = "time data '{timestr}' does not match MW format."
@@ -134,9 +134,9 @@
"""
RE_ISO8601 = (r'(?:\d{4}-\d{2}-\d{2})(?P<sep>[T ])' # noqa: N806
r'(?:\d{2}:\d{2}:\d{2})(?P<u>[.,]\d{1,6})?'
- r'(?P<tz>Z|[+\-]\d{2}:?\d{,2})?$'
+ r'(?P<tz>Z|[+\-]\d{2}:?\d{,2})?'
)
- m = re.match(RE_ISO8601, timestr)
+ m = re.fullmatch(RE_ISO8601, timestr)

if not m:
msg = "time data '{timestr}' does not match ISO8601 format."
@@ -174,8 +174,8 @@

.. versionadded:: 7.5
"""
- RE_POSIX = r'(?P<S>-?\d{1,13})(?:\.(?P<u>\d{1,6}))?$' # noqa: N806
- m = re.match(RE_POSIX, timestr)
+ RE_POSIX = r'(?P<S>-?\d{1,13})(?:\.(?P<u>\d{1,6}))?' # noqa: N806
+ m = re.fullmatch(RE_POSIX, timestr)

if not m:
msg = "time data '{timestr}' does not match POSIX format."
diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py
index 902653e..1c06290 100644
--- a/pywikibot/tools/__init__.py
+++ b/pywikibot/tools/__init__.py
@@ -438,7 +438,7 @@
"""

MEDIAWIKI_VERSION = re.compile(
- r'(\d+(?:\.\d+)+)(-?wmf\.?(\d+)|alpha|beta(\d+)|-?rc\.?(\d+)|.*)?$')
+ r'(\d+(?:\.\d+)+)(-?wmf\.?(\d+)|alpha|beta(\d+)|-?rc\.?(\d+)|.*)?')

def __init__(self, version_str: str) -> None:
"""
@@ -449,7 +449,8 @@
self._parse(version_str)

def _parse(self, version_str: str) -> None:
- version_match = MediaWikiVersion.MEDIAWIKI_VERSION.match(version_str)
+ version_match = MediaWikiVersion.MEDIAWIKI_VERSION.fullmatch(
+ version_str)

if not version_match:
raise ValueError(f'Invalid version number "{version_str}"')
diff --git a/scripts/dataextend.py b/scripts/dataextend.py
index fbde614..7f326bc 100644
--- a/scripts/dataextend.py
+++ b/scripts/dataextend.py
@@ -92,8 +92,8 @@

"""The Bot."""

- QRE = re.compile(r'Q\d+$')
- PQRE = re.compile(r'[PQ]\d+$')
+ QRE = re.compile(r'Q\d+')
+ PQRE = re.compile(r'[PQ]\d+')

def __init__(self, **kwargs):
"""Initializer."""
@@ -358,7 +358,7 @@
return self.showtime(self.createdateclaim(title[6:]))
if title.startswith('!q!'):
return title[3:]
- if not self.PQRE.match(title):
+ if not self.PQRE.fullmatch(title):
return title

if title in self.labels:
@@ -593,33 +593,34 @@
year = int(m[1])
month = int(m[2])
day = int(m[3])
- if re.match(r'\d{,4}(?:年頃|\.)?$', text):
- year = int(text)
+ m = re.fullmatch(r'(\d{1,4})(?:年頃|\.)?', text)
+ if m:
+ year = int(m[1])
month = None
day = None
- if re.match(r'(?:1\d{3}|20[01]\d)[01]\d[0123]\d$', text):
+ if re.fullmatch(r'(?:1\d{3}|20[01]\d)[01]\d[0123]\d', text):
year = int(text[:4])
month = int(text[4:6])
day = int(text[6:])
- if re.match(r'\d{4}-\d{2}$', text):
+ if re.fullmatch(r'\d{4}-\d{2}', text):
year = int(text[:4])
month = int(text[-2:])
m = re.match(r'(\d{1,2})[-/](\d{4})', text)
if m:
year = int(m[2])
month = int(m[1])
- m = re.match(r'(\d+)[-./|](\d{1,2})[-./|](\d{1,2})$', text)
+ m = re.fullmatch(r'(\d+)[-./|](\d{1,2})[-./|](\d{1,2})', text)
if m:
year = int(m[1])
month = int(m[2])
day = int(m[3])
- m = re.match(
- r'(\d{1,2})[-./|]\s*(\d{1,2})[-./|]\s*(\d{3,4})\.?$', text)
+ m = re.fullmatch(
+ r'(\d{1,2})[-./|]\s*(\d{1,2})[-./|]\s*(\d{3,4})\.?', text)
if m:
year = int(m[3])
month = int(m[2])
day = int(m[1])
- m = re.match(r'(\d{1,2})[-./\s]([iIvVxX]+)[-./\s](\d{4})$', text)
+ m = re.fullmatch(r'(\d{1,2})[-./\s]([iIvVxX]+)[-./\s](\d{4})', text)
if m:
year = int(m[3])
try:
@@ -627,8 +628,8 @@
except KeyError:
raise ValueError(f"Don't know month {m[2]}")
day = int(m[1])
- m = re.match(r"(\d+)(?:\.|er|eme|ème)?[\s.]\s*(?:d'|d[aei] )?"
- r'([^\s.]{2,})\.?[\s.]\s*(\d+)$', text)
+ m = re.fullmatch(r"(\d+)(?:\.|er|eme|ème)?[\s.]\s*(?:d'|d[aei] )?"
+ r'([^\s.]{2,})\.?[\s.]\s*(\d+)', text)
if m:
year = int(m[3])
try:
@@ -636,7 +637,8 @@
except KeyError:
raise ValueError(f"Don't know month {m[2]}")
day = int(m[1])
- m = re.match(r'(\d{4})\.?[\s.]\s*([^\s.]{3,})\.?[\s.]\s*(\d+)$', text)
+ m = re.fullmatch(
+ r'(\d{4})\.?[\s.]\s*([^\s.]{3,})\.?[\s.]\s*(\d+)', text)
if m:
year = int(m[1])
try:
@@ -652,15 +654,15 @@
except KeyError:
raise ValueError(f"Don't know month {m[2]}")
day = int(m[1])
- m = re.match(r'(\w*[a-zA-Z]\w*)\.? (\d+)$', text)
+ m = re.fullmatch(r'(\w*[a-zA-Z]\w*)\.? (\d+)', text)
if m:
year = int(m[2])
try:
month = self.MONTHNUMBER[m[1].lower()]
except KeyError:
raise ValueError(f"Don't know month {m[1]}")
- m = re.match(r'(\w+)\.? (\d{1,2})(?:st|nd|rd|th)?\.?\s*,\s*(\d{3,4})$',
- text)
+ m = re.fullmatch(
+ r'(\w+)\.? (\d{1,2})(?:st|nd|rd|th)?\.?\s*,\s*(\d{3,4})', text)
if m:
year = int(m[3])
try:
@@ -681,7 +683,7 @@
year = int(m[1])
month = int(m[2])
day = int(m[3])
- m = re.match(r'(\d+)年$', text)
+ m = re.fullmatch(r'(\d+)年', text)
if m:
year = int(m[1])
if day == 0:
@@ -868,7 +870,7 @@

createdclaim = pywikibot.Claim(self.site, claim[0])

- if self.QRE.match(claim[1]):
+ if self.QRE.fullmatch(claim[1]):
createdclaim.setTarget(pywikibot.ItemPage(
self.site, claim[1]))

diff --git a/scripts/reflinks.py b/scripts/reflinks.py
index eb673b2..2647f67 100755
--- a/scripts/reflinks.py
+++ b/scripts/reflinks.py
@@ -124,10 +124,10 @@
# matches an URL at the index of a website
dirIndex = re.compile(
r'\w+://[^/]+/((default|index)\.'
- r'(asp|aspx|cgi|htm|html|phtml|mpx|mspx|php|shtml|var))?$',
+ r'(asp|aspx|cgi|htm|html|phtml|mpx|mspx|php|shtml|var))?',
re.IGNORECASE)
# Extracts the domain name
-domain = re.compile(r'^(\w+)://(?:www.|)([^/]+)')
+domain = re.compile(r'^(\w+)://(?:www\.|)([^/]+)')

globalbadtitles = r"""
# is
@@ -601,8 +601,8 @@
f'Redirect 404 : {ref.link} ')
continue

- if dirIndex.match(redir) \
- and not dirIndex.match(ref.link):
+ if dirIndex.fullmatch(redir) \
+ and not dirIndex.fullmatch(ref.link):
pywikibot.info(f'<<lightyellow>>WARNING<<default>> : '
f'Redirect to root : {ref.link} ')
continue
diff --git a/scripts/upload.py b/scripts/upload.py
index 1812b4a..fd1944b 100755
--- a/scripts/upload.py
+++ b/scripts/upload.py
@@ -67,7 +67,7 @@


CHUNK_SIZE_REGEX = re.compile(
- r'-chunked(?::(\d+(?:\.\d+)?)[ \t]*(k|ki|m|mi)?b?)?$', re.I)
+ r'-chunked(?::(\d+(?:\.\d+)?)[ \t]*(k|ki|m|mi)?b?)?', re.I)


def get_chunk_size(match) -> int:
@@ -150,7 +150,7 @@
else:
ignorewarn = True
elif arg == '-chunked':
- match = CHUNK_SIZE_REGEX.match(option)
+ match = CHUNK_SIZE_REGEX.fullmatch(option)
chunk_size = get_chunk_size(match)
elif arg == '-async':
asynchronous = True
diff --git a/tests/uploadscript_tests.py b/tests/uploadscript_tests.py
index 89c7309..aa0441e 100755
--- a/tests/uploadscript_tests.py
+++ b/tests/uploadscript_tests.py
@@ -21,7 +21,7 @@
option = '-chunked'
if value:
option += ':' + value
- match = CHUNK_SIZE_REGEX.match(option)
+ match = CHUNK_SIZE_REGEX.fullmatch(option)
return get_chunk_size(match)



To view, visit change 871293. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I4da10e242518c69d86416e4682de260be905f5d9
Gerrit-Change-Number: 871293
Gerrit-PatchSet: 5
Gerrit-Owner: Matěj Suchánek <matejsuchanek97@gmail.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged