jenkins-bot has submitted this change and it was merged.
Change subject: Add custom formatter to listpages.py
......................................................................
Add custom formatter to listpages.py
Custom format can be applied to the following items extrapolated from a
page object:
site: obtained from page._link._site
title: obtained from page._link._title
loc_title: obtained from page._link.canonical_title()
can_title: obtained from page._link.ns_title()
based either the canonical ns name or on the ns name in the
language specified by the -trans_to param
onsite: obtained from pywikibot.Site(trans_to, self.site.family)
trs_title: obtained from page._link.ns_title(onsite=onsite)
Added tests for Link.ns_title().
Change-Id: Ia911be7fb45a1e29515208b1b54ee6a213ffc29e
---
M pywikibot/page.py
M scripts/listpages.py
M tests/page_tests.py
3 files changed, 184 insertions(+), 6 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
XZise: Looks good to me, but someone else must approve
jenkins-bot: Verified
diff --git a/pywikibot/page.py b/pywikibot/page.py
index 9a865a8..a6fe416 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -3840,6 +3840,41 @@
else:
return self.title
+ def ns_title(self, onsite=None):
+ """Return full page title, including namespace.
+
+ @param onsite: site object
+ if specified, present title using onsite local namespace,
+ otherwise use self canonical namespace.
+
+ if no corresponding namespace is found in onsite,
+ pywikibot.Error is raised.
+
+ """
+
+ ns_id = self.namespace
+ ns = self.site.namespaces()[ns_id]
+ ns_names = list(self.site.namespaces()[ns_id])
+
+ if onsite is None:
+ namespace = ns.canonical_name
+ else:
+ # look for corresponding ns in onsite by name comparison
+ for name in ns_names:
+ onsite_ns = ns.lookup_name(name, namespaces=onsite.namespaces())
+ # not found
+ if onsite_ns is None:
+ raise pywikibot.Error(
+ u'No corresponding namespace found for namespace %s on %s.'
+ % (self.site.namespaces()[ns_id], onsite))
+ else:
+ namespace = onsite_ns.custom_name
+
+ if namespace:
+ return u'%s:%s' % (namespace, self.title)
+ else:
+ return self.title
+
def astext(self, onsite=None):
"""Return a text representation of the link.
diff --git a/scripts/listpages.py b/scripts/listpages.py
index 3557566..aa10f47 100644
--- a/scripts/listpages.py
+++ b/scripts/listpages.py
@@ -5,12 +5,61 @@
These parameters are supported to specify which pages titles to print:
+-format Defines the output format.
+
+ Can be a custom string according to python string.format() notation or
+ can be selected by a number from following list (1 is default format):
+ 1 - u'{num:4d} {page.title}'
+ --> 10 PageTitle
+
+ 2 - u'{num:4d} {[[page.title]]}'
+ --> 10 [[PageTitle]]
+
+ 3 - u'{page.title}'
+ --> PageTitle
+
+ 4 - u'{[[page.title]]}'
+ --> [[PageTitle]]
+
+ 5 - u'{num:4d} \03{{lightred}}{page.loc_title:<40}\03{{default}}'
+ --> 10 PageTitle (colorised in lightred)
+
+ 6 - u'{num:4d} {page.loc_title:<40} {page.can_title:<40}'
+ --> 10 localised_Namespace:PageTitle canonical_Namespace:PageTitle
+
+ 7 - u'{num:4d} {page.loc_title:<40} {page.trs_title:<40}'
+ --> 10 localised_Namespace:PageTitle outputlang_Namespace:PageTitle
+ (*) requires "outputlang:lang" set.
+
+ num is the sequential number of the listed page.
+
+-outputlang Language for translation of namespaces
+
+-notitle Page title is not printed.
+
+-get Page content is printed.
+
+
+Custom format can be applied to the following items extrapolated from a
+ page object:
+
+ site: obtained from page._link._site
+
+ title: obtained from page._link._title
+
+ loc_title: obtained from page._link.canonical_title()
+
+ can_title: obtained from page._link.ns_title()
+ based either the canonical namespace name or on the namespace name
+ in the language specified by the -trans param;
+ a default value '******' will be used if no ns is found.
+
+ onsite: obtained from pywikibot.Site(outputlang, self.site.family)
+
+ trs_title: obtained from page._link.ns_title(onsite=onsite)
+
+
¶ms;
-
--notitle Page title is not printed.
-
--get Page content is printed.
-
"""
#
# (C) Pywikibot team, 2008-2014
@@ -20,15 +69,83 @@
__version__ = '$Id$'
#
+
import pywikibot
from pywikibot.pagegenerators import GeneratorFactory, parameterHelp
docuReplacements = {'¶ms;': parameterHelp}
+class Formatter(object):
+
+ """Structure with Page attributes exposed for formatting from cmd
line."""
+
+ fmt_options = {
+ '1': u"{num:4d} {page.title}",
+ '2': u"{num:4d} [[{page.title}]]",
+ '3': u"{page.title}",
+ '4': u"[[{page.title}]]",
+ '5': u"{num:4d}
\03{{lightred}}{page.loc_title:<40}\03{{default}}",
+ '6': u"{num:4d} {page.loc_title:<40}
{page.can_title:<40}",
+ '7': u"{num:4d} {page.loc_title:<40}
{page.trs_title:<40}",
+ }
+
+ # Identify which formats need outputlang
+ fmt_need_lang = [k for k, v in fmt_options.items() if 'trs_title' in v]
+
+ def __init__(self, page, outputlang=None, default='******'):
+ """
+ Constructor.
+
+ @param page: the page to be formatted.
+ @type page: Page object.
+ @param outputlang: language code in which namespace before title should
+ be translated.
+
+ Page namespace will be searched in Site(outputlang, page.site.family)
+ and, if found, its custom name will be used in page.title().
+
+ @type outputlang: str or None, if no translation is wanted.
+ @param default: default string to be used if no corresponding namespace
+ is found when outputlang is not None.
+
+ """
+
+ self.site = page._link._site
+ self.title = page._link.title
+ self.loc_title = page._link.canonical_title()
+ self.can_title = page._link.ns_title()
+ self.outputlang = outputlang
+ if outputlang is not None:
+ # Cache onsite in case of tranlations.
+ if not hasattr(self, "onsite"):
+ self.onsite = pywikibot.Site(outputlang, self.site.family)
+ try:
+ self.trs_title = page._link.ns_title(onsite=self.onsite)
+ # Fallback if no corresponding namespace is found in onsite.
+ except pywikibot.Error:
+ self.trs_title = u'%s:%s' % (default, page._link.title)
+
+ def output(self, num=None, fmt=1):
+ """Output formatted string."""
+ fmt = self.fmt_options.get(fmt, fmt)
+ # If selected format requires trs_title, outputlang must be set.
+ if (fmt in self.fmt_need_lang or
+ 'trs_title' in fmt and
+ self.outputlang is None):
+ raise ValueError(
+ u"Required format code needs 'outputlang' parameter
set.")
+ if num is None:
+ return fmt.format(page=self)
+ else:
+ return fmt.format(num=num, page=self)
+
+
def main(*args):
gen = None
notitle = False
+ fmt = '1'
+ outputlang = None
page_get = False
# Process global args and prepare generator args parser
@@ -38,6 +155,11 @@
for arg in local_args:
if arg == '-notitle':
notitle = True
+ elif arg.startswith("-format:"):
+ fmt = arg[len("-format:"):]
+ fmt = fmt.replace(u'\\03{{', u'\03{{')
+ elif arg.startswith("-outputlang:"):
+ outputlang = arg[len("-outputlang:"):]
elif arg == '-get':
page_get = True
else:
@@ -47,12 +169,14 @@
if gen:
for i, page in enumerate(gen, start=1):
if not notitle:
- pywikibot.stdout("%4d: %s" % (i, page.title()))
+ page_fmt = Formatter(page, outputlang)
+ pywikibot.stdout(page_fmt.output(num=i, fmt=fmt))
if page_get:
# TODO: catch exceptions
pywikibot.output(page.text, toStdout=True)
else:
pywikibot.showHelp()
+
if __name__ == "__main__":
main()
diff --git a/tests/page_tests.py b/tests/page_tests.py
index 4383cb1..b112ee3 100644
--- a/tests/page_tests.py
+++ b/tests/page_tests.py
@@ -27,6 +27,8 @@
enwiki = pywikibot.Site("en", "wikipedia")
frwiki = pywikibot.Site("fr", "wikipedia")
itwikt = pywikibot.Site("it", "wiktionary")
+ enws = pywikibot.Site("en", "wikisource")
+ itws = pywikibot.Site("it", "wikisource")
namespaces = {0: [u""], #
en.wikipedia.org namespaces for testing
1: [u"Talk:"], # canonical form first, then others
@@ -91,6 +93,7 @@
self.assertEqual(m.title, self.titles[title])
def testHashCmp(self):
+ """Test hash comparison."""
# All links point to en:wikipedia:Test
l1 = pywikibot.page.Link('Test', source=self.enwiki)
l2 = pywikibot.page.Link('en:Test', source=self.frwiki)
@@ -110,6 +113,22 @@
self.assertNotEqual(l1, other)
self.assertNotEqual(hash(l1), hash(other))
+ def test_ns_title(self):
+ """Test that title is returned with correct
namespace."""
+ l1 = pywikibot.page.Link('Indice:Test', source=self.itws)
+ self.assertEqual(l1.ns_title(), 'Index:Test')
+ self.assertEqual(l1.ns_title(onsite=self.enws), 'Index:Test')
+
+ # wikisource:it kept Autore as canonical name
+ l2 = pywikibot.page.Link('Autore:Albert Einstein', source=self.itws)
+ self.assertEqual(l2.ns_title(), 'Autore:Albert Einstein')
+ self.assertEqual(l2.ns_title(onsite=self.enws), 'Author:Albert
Einstein')
+
+ # Translation namespace does not exist on wikisource:it
+ l3 = pywikibot.page.Link('Translation:Albert Einstein',
source=self.enws)
+ self.assertEqual(l3.ns_title(), 'Translation:Albert Einstein')
+ self.assertRaises(pywikibot.Error, l3.ns_title, onsite=self.itws)
+
class TestPageObject(PywikibotTestCase):
--
To view, visit
https://gerrit.wikimedia.org/r/125501
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ia911be7fb45a1e29515208b1b54ee6a213ffc29e
Gerrit-PatchSet: 14
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Ricordisamoa <ricordisamoa(a)openmailbox.org>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>