jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1007387?usp=email )
Change subject: [bugfix] remove content parameter of ItemPage.page_gen method
......................................................................
[bugfix] remove content parameter of ItemPage.page_gen method
The filter needs the page.text and the text is preloaded Page for page
already. Now always use the site.preload method to bulk load the content.
Bug: T358635
Change-Id: I62dca5845492a636cf556761d95bde5f5210d9f8
---
M pywikibot/proofreadpage.py
1 file changed, 22 insertions(+), 10 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/proofreadpage.py b/pywikibot/proofreadpage.py
index 2df5eb1..e5dc94d 100644
--- a/pywikibot/proofreadpage.py
+++ b/pywikibot/proofreadpage.py
@@ -47,7 +47,7 @@
from pywikibot.data.api import ListGenerator, Request
from pywikibot.exceptions import Error, InvalidTitleError, OtherPageSaveError
from pywikibot.page import PageSourceType
-from pywikibot.tools import MediaWikiVersion, cached
+from pywikibot.tools import MediaWikiVersion, cached, remove_last_args
try:
@@ -1305,12 +1305,13 @@
"""
return len(self._page_from_numbers)
- def page_gen(self, start: int = 1,
- end: int | None = None,
- filter_ql: Sequence[int] | None = None,
- only_existing: bool = False,
- content: bool = True
- ) -> Iterable[pywikibot.page.Page]:
+ @remove_last_args(['content']) # since 9.0.0
+ def page_gen(
+ self, start: int = 1,
+ end: int | None = None,
+ filter_ql: Sequence[int] | None = None,
+ only_existing: bool = False
+ ) -> Iterable[pywikibot.page.Page]:
"""Return a page generator which yields pages contained in Index page.
Range is [start ... end], extremes included.
@@ -1324,7 +1325,6 @@
:param filter_ql: filters quality levels
if None: all but 'Without Text'.
:param only_existing: yields only existing pages.
- :param content: preload content.
"""
if end is None:
end = self.num_pages
@@ -1346,8 +1346,7 @@
gen = [(self.get_number(p), p) for p in gen]
gen = [p for n, p in sorted(gen)]
- if content:
- gen = self.site.preloadpages(gen)
+ gen = self.site.preloadpages(gen)
# Filter by QL.
gen = (p for p in gen if p.ql in filter_ql)
# Yield only existing.
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1007387?usp=email
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I62dca5845492a636cf556761d95bde5f5210d9f8
Gerrit-Change-Number: 1007387
Gerrit-PatchSet: 1
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1005523?usp=email )
Change subject: [bugfix] Use site.code instead of site.lang in interwiki.py
......................................................................
[bugfix] Use site.code instead of site.lang in interwiki.py
The site.code specifies the site. This may be different from language
spezifier 'lang'. Therefore Use site.code instead of site.lang.
Also add a new method get_alternative() to ask for a alternative page
hint to prevent code duplication.
Change-Id: I15d66b29d13bbf60c31c5a1cb0003d53601110ae
---
M scripts/interwiki.py
1 file changed, 42 insertions(+), 21 deletions(-)
Approvals:
jenkins-bot: Verified
Xqt: Looks good to me, approved
diff --git a/scripts/interwiki.py b/scripts/interwiki.py
index ead72d0..33102ef 100755
--- a/scripts/interwiki.py
+++ b/scripts/interwiki.py
@@ -821,6 +821,21 @@
or self.namespaceMismatch(page, target, counter) \
or self.wiktionaryMismatch(target)
+ def get_alternative(
+ self,
+ site: pywikibot.site.BaseSite
+ ) -> pywikibot.Page | None:
+ """Ask for an alternative Page for a given site.
+
+ :param site: a BaseSite
+ """
+ title = pywikibot.input(f'Give the alternative page for code '
+ f'{site.code!r} (without site code)')
+ if title:
+ return pywikibot.Page(site, title)
+
+ return None
+
def namespaceMismatch(self, linkingPage, linkedPage, counter) -> bool:
"""
Check whether or not the given page has a different namespace.
@@ -836,9 +851,9 @@
# Allow for a mapping between different namespaces
crossFrom = self.origin.site.family.crossnamespace.get(
self.origin.namespace(), {})
- crossTo = crossFrom.get(self.origin.site.lang,
+ crossTo = crossFrom.get(self.origin.site.code,
crossFrom.get('_default', {}))
- nsmatch = crossTo.get(linkedPage.site.lang,
+ nsmatch = crossTo.get(linkedPage.site.code,
crossTo.get('_default', []))
if linkedPage.namespace() in nsmatch:
return False
@@ -879,15 +894,10 @@
if choice == 'g':
self.makeForcedStop(counter)
elif choice == 'a':
- newHint = pywikibot.input(
- 'Give the alternative for language {}, not '
- 'using a language code:'
- .format(linkedPage.site.lang))
- if newHint:
- alternativePage = pywikibot.Page(
- linkedPage.site, newHint)
+ alternative_page = self.get_alternative(linkedPage.site)
+ if alternative_page:
# add the page that was entered by the user
- self.addIfNew(alternativePage, counter, None)
+ self.addIfNew(alternative_page, counter, None)
else:
pywikibot.info(
f'NOTE: ignoring {linkedPage} and its interwiki links')
@@ -986,11 +996,8 @@
return (True, None)
if choice == 'a':
- newHint = pywikibot.input(
- f'Give the alternative for language {page.site.lang}, '
- f'not using a language code:')
- alternativePage = pywikibot.Page(page.site, newHint)
- return (True, alternativePage)
+ alternative_page = self.get_alternative(page.site)
+ return (True, alternative_page)
if choice == 'g':
self.makeForcedStop(counter)
@@ -1001,7 +1008,7 @@
def isIgnored(self, page) -> bool:
"""Return True if pages is to be ignored."""
- if page.site.lang in self.conf.neverlink:
+ if page.site.code in self.conf.neverlink:
pywikibot.info(f'Skipping link {page} to an ignored language')
return True
@@ -1183,7 +1190,7 @@
# Ignore the interwiki links.
iw = ()
if self.conf.lacklanguage \
- and self.conf.lacklanguage in (link.site.lang for link in iw):
+ and self.conf.lacklanguage in (link.site.code for link in iw):
iw = ()
self.workonme = False
if len(iw) < self.conf.minlinks:
@@ -1269,10 +1276,9 @@
if dictName is not None:
if self.origin:
pywikibot.warning(
- '{}:{} relates to {}:{}, which is an '
- 'auto entry {}({})'
- .format(self.origin.site.lang, self.origin,
- page.site.lang, page, dictName, year))
+ f'{self.origin.site.code}:{self.origin} relates '
+ f'to {page.site.code}:{page}, which is an auto '
+ f'entry {dictName}({year})')
# Abort processing if the bot is running in autonomous mode
if self.conf.autonomous:
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1005523?usp=email
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I15d66b29d13bbf60c31c5a1cb0003d53601110ae
Gerrit-Change-Number: 1005523
Gerrit-PatchSet: 2
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: D3r1ck01 <dalangi-ctr(a)wikimedia.org>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1005504?usp=email )
Change subject: [cleanup] use self.origin attribute instead of property
......................................................................
[cleanup] use self.origin attribute instead of property
There is no reason tjo have properties here instead of an attribute
Change-Id: I8ed6bed0dd7bc28a5855893b2b0c1d0d49f4712a
---
M pywikibot/interwiki_graph.py
1 file changed, 12 insertions(+), 10 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/interwiki_graph.py b/pywikibot/interwiki_graph.py
index 66eb8be..40d67c4 100644
--- a/pywikibot/interwiki_graph.py
+++ b/pywikibot/interwiki_graph.py
@@ -62,7 +62,7 @@
:param origin: the page on the 'origin' wiki
"""
# Remember the "origin page"
- self._origin = origin
+ self.origin = origin
# found_in is a dictionary where pages are keys and lists of
# pages are values. It stores where we found each page.
@@ -72,15 +72,6 @@
if origin:
self.found_in = {origin: []}
- @property
- def origin(self) -> pywikibot.page.Page | None:
- """Page on the origin wiki."""
- return self._origin
-
- @origin.setter
- def origin(self, value: pywikibot.page.Page | None) -> None:
- self._origin = value
-
class GraphDrawer:
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1005504?usp=email
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I8ed6bed0dd7bc28a5855893b2b0c1d0d49f4712a
Gerrit-Change-Number: 1005504
Gerrit-PatchSet: 1
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1006184?usp=email )
Change subject: Populate MediaInfo._content with expected attributes when loaded
......................................................................
Populate MediaInfo._content with expected attributes when loaded
Added pageid, ns, title, lastrevid, modified, id values to _content
attribute when it is loaded using mediainfo.get() so the format
is identical with values returned by wbgetentities.
Bug: T357608
Change-Id: I9178e3fe5a3a1ccba439864891bb1834fb28b050
---
M pywikibot/page/_wikibase.py
M tests/file_tests.py
2 files changed, 56 insertions(+), 27 deletions(-)
Approvals:
jenkins-bot: Verified
Xqt: Looks good to me, approved
diff --git a/pywikibot/page/_wikibase.py b/pywikibot/page/_wikibase.py
index b5de1b3..143e323 100644
--- a/pywikibot/page/_wikibase.py
+++ b/pywikibot/page/_wikibase.py
@@ -451,11 +451,6 @@
def get(self, force: bool = False) -> dict:
"""Fetch all MediaInfo entity data and cache it.
- .. note:: This method may raise exception even if the associated file
- exists because the mediainfo may not have been initialized yet.
- :attr:`labels` and :attr:`statements` can still be accessed and
- modified. :meth:`exists` suppresses the exception.
-
.. note:: dicts returned by this method are references to content
of this entity and their modifying may indirectly cause
unwanted change to the live content
@@ -464,23 +459,51 @@
:raise NoWikibaseEntityError: if this entity doesn't exist
:return: actual data which entity holds
"""
- if self.id == '-1':
- if not force:
- try:
- data = self.file.latest_revision.slots['mediainfo']['*']
- except NoPageError as exc:
+ if force or not hasattr(self, '_content'):
+ if force:
+ self.file.clear_cache()
+
+ # accessing latest_revision loads the file data
+ try:
+ latest_revision = self.file.latest_revision
+ except NoPageError as exc:
+ raise NoWikibaseEntityError(self) from exc
+ except Error as exc:
+ error_message = str(exc)
+ if 'is not a file' in error_message:
raise NoWikibaseEntityError(self) from exc
- except KeyError:
- # reuse the reserved ID for better message
- self.id = 'M' + str(self.file.pageid)
- raise NoWikibaseEntityError(self) from None
+ else:
+ raise Error(self) from exc
- self._content = jsonlib.loads(data)
- self.id = self._content['id']
+ # Create _content. Format is same as with wbgetentities
+ # https://commons.wikimedia.org/w/api.php?action=wbgetentities&ids=M20985340
+ data = {
+ 'title': self.file.title,
+ 'lastrevid': latest_revision['revid'],
+ 'modified': str(latest_revision['timestamp']),
+ 'type': 'mediainfo',
+ 'pageid': self.file.pageid,
+ 'ns': self.file.namespace,
+ 'id': 'M' + str(self.file.pageid),
+ 'labels': {},
+ 'statements': {}
+ }
- self._assert_has_id()
+ # Update 'id', 'labels' and 'statements' if mediainfo is available.
+ # MediaInfo is returned only when it has values.
+ if 'mediainfo' in latest_revision.slots:
+ mediainfo_json = latest_revision.slots['mediainfo']['*']
+ mediainfo_data = jsonlib.loads(mediainfo_json)
+ data.update(mediainfo_data)
- return super().get(force=force)
+ self._content = data
+ self.id = self._content['id']
+
+ self._assert_has_id()
+
+ # Do not pass the force parameter to the upper level because
+ # reloading files without MediaInfo will fail.
+ return super().get()
def getID(self, numeric: bool = False):
"""
@@ -526,10 +549,6 @@
'The provided Claim instance is already used in an entity')
self._assert_has_id()
- if not hasattr(self, '_revid'):
- # workaround for uninitialized mediainfo's
- self._revid = self.file.latest_revision_id
-
self.repo.addClaim(self, claim, bot=bot, **kwargs)
claim.on_item = self
diff --git a/tests/file_tests.py b/tests/file_tests.py
index 40cc008..744d45c 100755
--- a/tests/file_tests.py
+++ b/tests/file_tests.py
@@ -430,11 +430,7 @@
item = page.data_item()
self.assertIsInstance(item, pywikibot.MediaInfo)
- # Get fails as there is no mediainfo.
- with self.assertRaises(NoWikibaseEntityError):
- item.get()
-
- self.assertFalse(item.exists())
+ self.assertTrue(item.exists())
self.assertEqual(f'M{page.pageid}', item.id)
self.assertIsInstance(
item.labels, pywikibot.page._collections.LanguageDict)
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1006184?usp=email
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I9178e3fe5a3a1ccba439864891bb1834fb28b050
Gerrit-Change-Number: 1006184
Gerrit-PatchSet: 3
Gerrit-Owner: Zache-tool <kimmo.virtanen(a)gmail.com>
Gerrit-Reviewer: Ipr1 <ilkka.prusi(a)gmail.com>
Gerrit-Reviewer: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged