jenkins-bot has submitted this change and it was merged.
Change subject: Implement wbsearchentities
......................................................................
Implement wbsearchentities
'wbsearchentities' API request was implemented as search_entities method in
DataSite class. WikibaseSearchItemPageGenerator was created which yields
the pages from the newly created method, regarding the language code
specified.
Bug: T68949
Change-Id: Ib7459a4b7c6bafe04d56dcd09ee0f8386711b4cf
---
M pywikibot/data/api.py
M pywikibot/pagegenerators.py
M pywikibot/site.py
M tests/pagegenerators_tests.py
M tests/site_tests.py
5 files changed, 230 insertions(+), 0 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py
index f979b81..d5100ad 100644
--- a/pywikibot/data/api.py
+++ b/pywikibot/data/api.py
@@ -1464,6 +1464,110 @@
return self._data
+class APIGenerator(object):
+
+ """Iterator that handle API responses containing lists.
+
+ The iterator will iterate each item in the query response and use the
+ continue request parameter to retrieve the next portion of items
+ automatically. If the limit attribute is set, the iterator will stop
+ after iterating that many values.
+ """
+
+ def __init__(self, action, continue_name='continue',
limit_name='limit',
+ data_name='data', **kwargs):
+ """
+ Construct an APIGenerator object.
+
+ kwargs are used to create a Request object; see that object's
+ documentation for values.
+
+ @param action: API action name.
+ @type action: str
+ @param continue_name: Name of the continue API parameter.
+ @type continue_name: str
+ @param limit_name: Name of the limit API parameter.
+ @type limit_name: str
+ @param data_name: Name of the data in API response.
+ @type data_name: str
+ """
+ kwargs['action'] = action
+ try:
+ self.site = kwargs['site']
+ except KeyError:
+ self.site = pywikibot.Site()
+ kwargs['site'] = self.site
+
+ self.continue_name = continue_name
+ self.limit_name = limit_name
+ self.data_name = data_name
+
+ self.limit = None
+ self.starting_offset = kwargs.pop(self.continue_name, 0)
+ self.request = Request(**kwargs)
+ self.request[self.limit_name] = 50
+
+ def set_query_increment(self, value):
+ """
+ Set the maximum number of items to be retrieved per API query.
+
+ If not called, the default is 50.
+
+ @param value: The value of maximum number of items to be retrieved
+ per API request to set.
+ @type value: int
+ """
+ self.request[self.limit_name] = int(value)
+ pywikibot.debug(u"%s: Set query_limit to %i."
+ % (self.__class__.__name__, int(value)), _logger)
+
+ def set_maximum_items(self, value):
+ """
+ Set the maximum number of items to be retrieved from the wiki.
+
+ If not called, most queries will continue as long as there is
+ more data to be retrieved from the API.
+
+ @param value: The value of maximum number of items to be retrieved
+ in total to set.
+ @type value: int
+ """
+ self.limit = int(value)
+ if self.limit < self.request[self.limit_name]:
+ self.request[self.limit_name] = self.limit
+
+ def __iter__(self):
+ """Submit request and iterate the response.
+
+ Continues response as needed until limit (if defined) is reached.
+ """
+ offset = self.starting_offset
+ n = 0
+ while True:
+ self.request[self.continue_name] = offset
+ pywikibot.debug(u"%s: Request: %s" % (self.__class__.__name__,
+ self.request), _logger)
+ data = self.request.submit()
+
+ n_items = len(data[self.data_name])
+ pywikibot.debug(u"%s: Retrieved %d items" % (
+ self.__class__.__name__, n_items), _logger)
+ if n_items > 0:
+ for item in data[self.data_name]:
+ yield item
+ n += 1
+ if self.limit is not None and n >= self.limit:
+ pywikibot.debug(u"%s: Stopped iterating due to "
+ u"exceeding item limit." %
+ self.__class__.__name__, _logger)
+ return
+ offset += n_items
+ else:
+ pywikibot.debug(u"%s: Stopped iterating due to empty list in "
+ u"response." % self.__class__.__name__,
_logger)
+ break
+
+
class QueryGenerator(object):
"""Base class for iterators that handle responses to API
action=query.
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index 6f01605..792e092 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -199,6 +199,12 @@
-wikidataquery Takes a WikidataQuery query string like claim[31:12280]
and works on the resulting pages.
+-searchitem Takes a search string and works on Wikibase pages that
+ contain it.
+ Argument can be given as "-searchitem:text", where text
+ is the string to look for, or "-searchitem:lang:text", where
+ lang is the langauge to search items in.
+
-random Work on random pages returned by [[Special:Random]].
Can also be given as "-random:n" where n is the number
of pages to be returned, otherwise the default is 10 pages.
@@ -668,6 +674,15 @@
imagelinksPage = pywikibot.Page(pywikibot.Link(imagelinkstitle,
self.site))
gen = ImagesPageGenerator(imagelinksPage)
+ elif arg.startswith('-searchitem'):
+ text = arg[len('-searchitem:'):]
+ if not text:
+ text = pywikibot.input(u'Text to look for:')
+ params = text.split(':')
+ text = params[-1]
+ lang = params[0] if len(params) == 2 else None
+ gen = WikibaseSearchItemPageGenerator(text, language=lang,
+ site=self.site)
elif arg.startswith('-search'):
mediawikiQuery = arg[8:]
if not mediawikiQuery:
@@ -2258,6 +2273,33 @@
yield pywikibot.Page(pywikibot.Link(link, site))
+def WikibaseSearchItemPageGenerator(text, language=None, total=None, site=None):
+ """
+ Generate pages that contain the provided text.
+
+ @param text: Text to look for.
+ @type text: str
+ @param language: Code of the language to search in. If not specified,
+ value from pywikibot.config.data_lang is used.
+ @type language: str
+ @param total: Maximum number of pages to retrieve in total, or None in
+ case of no limit.
+ @type total: int or None
+ @param site: Site for generator results.
+ @type site: L{pywikibot.site.BaseSite}
+ """
+ if site is None:
+ site = pywikibot.Site()
+ if language is None:
+ language = site.lang
+ repo = site.data_repository()
+
+ data = repo.search_entities(text, language, limit=total, site=site)
+ pywikibot.output(u'retrieved %d items' % len(list(data)))
+ for item in data:
+ yield pywikibot.ItemPage(repo, item['id'])
+
+
if __name__ == "__main__":
pywikibot.output(u'Pagegenerators cannot be run as script - are you '
u'looking for listpages.py?')
diff --git a/pywikibot/site.py b/pywikibot/site.py
index 5454267..0e2009e 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -5676,6 +5676,31 @@
result = self.editEntity({}, data, bot=bot, **kwargs)
return pywikibot.ItemPage(self, result['entity']['id'])
+ def search_entities(self, search, language, limit=None, **kwargs):
+ """
+ Search for pages or properties that contain the given text.
+
+ @param search: Text to find.
+ @type search: str
+ @param language: Language to search in.
+ @type language: str
+ @param limit: Maximum number of pages to retrieve in total, or None in
+ case of no limit.
+ @type limit: int or None
+ @return: 'search' list from API output.
+ """
+ lang_codes = [lang['code'] for lang in
self._siteinfo.get('languages')]
+ if language not in lang_codes:
+ raise ValueError(u'Data site used does not support provided '
+ u'language.')
+
+ gen = api.APIGenerator('wbsearchentities', data_name='search',
+ search=search, language=language, **kwargs)
+ gen.set_query_increment(50)
+ if limit is not None:
+ gen.set_maximum_items(limit)
+ return gen
+
# deprecated BaseSite methods
def fam(self):
raise NotImplementedError
diff --git a/tests/pagegenerators_tests.py b/tests/pagegenerators_tests.py
index ae7e2d8..2dec8dd 100755
--- a/tests/pagegenerators_tests.py
+++ b/tests/pagegenerators_tests.py
@@ -657,6 +657,25 @@
gen = gf.getCombinedGenerator()
self.assertEqual(len(set(gen)), 1)
+ def test_searchitem(self):
+ """Test -searchitem."""
+ gf = pagegenerators.GeneratorFactory(site=self.site)
+ gf.handleArg('-searchitem:abc')
+ gen = gf.getCombinedGenerator()
+ self.assertGreater(len(set(gen)), 0)
+
+ def test_searchitem_language(self):
+ """Test -searchitem with custom language
specified."""
+ gf = pagegenerators.GeneratorFactory(site=self.site)
+ gf.handleArg('-searchitem:pl:abc')
+ gen = gf.getCombinedGenerator()
+ pages = set(gen)
+ gf = pagegenerators.GeneratorFactory(site=self.site)
+ gf.handleArg('-searchitem:en:abc')
+ gen = gf.getCombinedGenerator()
+ pages2 = set(gen)
+ self.assertNotEqual(pages, pages2)
+
class TestLogeventsFactoryGenerator(DefaultSiteTestCase):
diff --git a/tests/site_tests.py b/tests/site_tests.py
index 55e3d3f..760c965 100644
--- a/tests/site_tests.py
+++ b/tests/site_tests.py
@@ -2103,6 +2103,46 @@
self.assertEqual(item.id, 'Q5296')
+class TestDataSiteSearchEntities(WikidataTestCase):
+
+ """Test DataSite.search_entities."""
+
+ def test_general(self):
+ """Test basic search_entities functionality."""
+ datasite = self.get_repo()
+ pages = datasite.search_entities('abc', 'en', limit=50,
+ site=self.get_site())
+ self.assertGreater(len(list(pages)), 0)
+ self.assertLessEqual(len(list(pages)), 50)
+ pages = datasite.search_entities('alphabet', 'en',
type='property',
+ limit=50, site=self.get_site())
+ self.assertGreater(len(list(pages)), 0)
+ self.assertLessEqual(len(list(pages)), 50)
+
+ def test_continue(self):
+ """Test that continue parameter in search_entities
works."""
+ datasite = self.get_repo()
+ kwargs = {'limit': 50, 'site': self.get_site()}
+ pages = datasite.search_entities('Rembrandt', 'en', **kwargs)
+ kwargs['continue'] = 1
+ pages_continue = datasite.search_entities('Rembrandt', 'en',
**kwargs)
+ self.assertNotEqual(list(pages), list(pages_continue))
+
+ def test_language_lists(self):
+ """Test that languages returned by paraminfo and MW are the
same."""
+ site = self.get_site()
+ lang_codes = site._paraminfo.parameter('wbsearchentities',
+ 'language')['type']
+ lang_codes2 = [lang['code'] for lang in
site._siteinfo.get('languages')]
+ self.assertEqual(lang_codes, lang_codes2)
+
+ def test_invalid_language(self):
+ """Test behavior of search_entities with invalid language
provided."""
+ datasite = self.get_repo()
+ self.assertRaises(ValueError, datasite.search_entities, 'abc',
+ 'invalidlanguage')
+
+
class TestSametitleSite(TestCase):
"""Test APISite.sametitle on sites with known
behaviour."""
--
To view, visit
https://gerrit.wikimedia.org/r/179586
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ib7459a4b7c6bafe04d56dcd09ee0f8386711b4cf
Gerrit-PatchSet: 9
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: M4tx <m4tx(a)m4tx.pl>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: M4tx <m4tx(a)m4tx.pl>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>