Revision: 7819
Author: xqt
Date: 2009-12-23 10:34:31 +0000 (Wed, 23 Dec 2009)
Log Message:
-----------
handle iw-message for nn-wiki
Modified Paths:
--------------
trunk/pywikipedia/cosmetic_changes.py
Modified: trunk/pywikipedia/cosmetic_changes.py
===================================================================
--- trunk/pywikipedia/cosmetic_changes.py 2009-12-23 07:30:29 UTC (rev 7818)
+++ trunk/pywikipedia/cosmetic_changes.py 2009-12-23 10:34:31 UTC (rev 7819)
@@ -184,6 +184,8 @@
'zh': u'; 細部更改',
}
+nn_iw_msg = u'<!--interwiki (no, sv, da first; then other languages alphabetically by name)-->'
+
class CosmeticChangesToolkit:
def __init__(self, site, debug=False, redirect=False, namespace=None):
self.site = site
@@ -226,7 +228,7 @@
Interwiki links to the site itself are displayed like local links.
Remove their language code prefix.
"""
- if not self.talkpage:
+ if not self.talkpage and pywikibot.calledModuleName() <> 'interwiki':
interwikiR = re.compile(r'\[\[%s\s?:([^\[\]\n]*)\]\]' % self.site.lang)
text = interwikiR.sub(r'[[\1]]', text)
return text
@@ -236,20 +238,25 @@
Makes sure that interwiki links, categories and star templates are
put to the correct position and into the right order.
This combines the old instances standardizeInterwiki and standardizeCategories
+ The page footer has the following section in that sequence:
+ 1. categories
+ 2. additional information depending on local site policy
+ 3. stars templates for featured and good articles
+ 4. interwiki links
"""
- starsList = ['Link[ _][AFG][LA]', 'link[ _]adq', 'enllaç[ _]ad',
- 'link[ _]ua', 'legătură[ _]af', 'destacado',
+ starsList = ['link[ _][afgu]a', 'link[ _]adq', 'enllaç[ _]ad',
+ 'link[ _]fl', 'legătură[ _]af', 'destacado',
'ua', 'liên k[ _]t[ _]chọn[ _]lọc']
categories = None
interwikiLinks = None
allstars = []
+ hasCommentLine = False
# The PyWikipediaBot is no longer allowed to touch categories on the German Wikipedia.
# See http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/bis_2006…
# ignoring nn-wiki of cause of the comment line above iw section
- if not self.template and not '{{Personendaten' in text and \
- self.site.sitename() != 'wikipedia:nn':
+ if not self.template and not '{{Personendaten' in text:
categories = pywikibot.getCategoryLinks(text, site = self.site)
if not self.talkpage:# and pywikibot.calledModuleName() <> 'interwiki':
@@ -257,20 +264,33 @@
# Removing the interwiki
text = pywikibot.removeLanguageLinks(text, site = self.site)
- # Dealing the stars' issue
- if self.site.sitename() != 'wikipedia:nn':
- for star in starsList:
- regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I)
- found = regex.findall(text)
- if found != []:
- if pywikibot.verbose:
- print found
- text = regex.sub('', text)
- allstars += found
+ # Removing the stars' issue
+ for star in starsList:
+ regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I)
+ found = regex.findall(text)
+ if found != []:
+ if pywikibot.verbose:
+ print found
+ text = regex.sub('', text)
+ allstars += found
+ # nn got a message between the categories and the iw's
+ # and they want to keep it there, first remove it
+ if self.site.language()=='nn':
+ regex = re.compile('(<!-- ?interwiki \(no(/nv)?, ?sv, ?da first; then other languages alphabetically by name\) ?-->)')
+ found = regex.findall(text)
+ if found != []:
+ if pywikibot.verbose:
+ print found
+ hasCommentLine = True
+ text = regex.sub('', text)
+
# Adding categories
if categories != None:
text = pywikibot.replaceCategoryLinks(text, categories, site = self.site)
+ # Put the nn iw message back
+ if self.site.language()=='nn' and not self.talkpage and (interwikiLinks != None or hasCommentLine):
+ text = text + '\r\n\r\n' + nn_iw_msg
# Adding stars templates
if allstars != []:
text = text.strip()+'\r\n\r\n'
Revision: 7815
Author: xqt
Date: 2009-12-22 14:39:37 +0000 (Tue, 22 Dec 2009)
Log Message:
-----------
retrieving movelog pages via API
Modified Paths:
--------------
trunk/pywikipedia/redirect.py
Modified: trunk/pywikipedia/redirect.py
===================================================================
--- trunk/pywikipedia/redirect.py 2009-12-22 10:07:33 UTC (rev 7814)
+++ trunk/pywikipedia/redirect.py 2009-12-22 14:39:37 UTC (rev 7815)
@@ -19,17 +19,15 @@
-xml Retrieve information from a local XML dump
(http://download.wikimedia.org). Argument can also be given as
"-xml:filename.xml". Cannot be used with -api or -moves.
- If neither of -xml -api -moves is given, info will be loaded
- from a special page of the live wiki.
+-moves Use the page move log to find double-redirect candidates. Only
+ works with action "double", does not work with -xml. You may
+ use -api option for retrieving pages via API
+
-api Retrieve information from the wiki via MediaWikis application
- program interface (API). Cannot be used with -xml or -moves.
- If neither of -xml -api -moves is given, info will be loaded
- from a special page of the live wiki.
+ program interface (API). Cannot be used with -xml.
--moves Use the page move log to find double-redirect candidates. Only
- works with action "double", does not work with either -xml, or
- -api. If neither of -xml -api -moves is given, info will be
+ NOTE: If neither of -xml -api -moves is given, info will be
loaded from a special page of the live wiki.
-namespace:n Namespace to process. Works only with an XML dump, or the API
@@ -63,7 +61,7 @@
from __future__ import generators
import wikipedia, config, query
import xmlreader
-import re, sys
+import re, sys, datetime
__version__='$Id$'
@@ -411,7 +409,7 @@
yield key
def retrieve_double_redirects(self):
- if self.use_api:
+ if self.use_api and not self.use_move_log:
count = 0
for (pagetitle, type, target, final) \
in self.get_redirects_via_api(maxlen=2):
@@ -424,7 +422,11 @@
elif self.xmlFilename == None:
if self.use_move_log:
- for redir_page in self.get_moved_pages_redirects():
+ if config.use_api:
+ gen = self.get_moved_pages_redirects_via_api()
+ else:
+ gen = self.get_moved_pages_redirects()
+ for redir_page in gen:
yield redir_page.title()
return
# retrieve information from the live wiki's maintenance page
@@ -454,10 +456,46 @@
wikipedia.output(u'\nChecking redirect %i of %i...'
% (num + 1, len(redict)))
+ def get_moved_pages_redirects_via_api(self):
+ if self.offset <= 0:
+ self.offset = 1
+ start = datetime.datetime.utcnow() \
+ - datetime.timedelta(0, self.offset*3600)
+ offset_time = start.strftime("%Y%m%d%H%M%S")
+ params = {
+ 'action' :'query',
+ 'list' :'logevents',
+ 'letype' :'move',
+ 'leprop' :'title|details',
+ 'lelimit' : '500',
+ 'lestart' : offset_time,
+ }
+ data = query.GetData(params, encodeTitle = False)#['query']['logevents']
+ if 'warnings' in data:
+ raise
+ allmoves = data['query']['logevents']
+ wikipedia.output(u'Retrieving %d moved pages via API...' % len(allmoves))
+ if wikipedia.verbose:
+ wikipedia.output(u"[%s]" % offset_time)
+ for moved in allmoves:
+ moved_page = wikipedia.Page(self.site, moved['title'])
+ try:
+ if not moved_page.isRedirectPage():
+ continue
+ except wikipedia.BadTitle:
+ continue
+ except wikipedia.ServerError:
+ continue
+ try:
+ for page in moved_page.getReferences(follow_redirects=True, redirectsOnly=True):
+ yield page
+ except wikipedia.NoPage:
+ # original title must have been deleted after move
+ continue
+
def get_moved_pages_redirects(self):
'''generate redirects to recently-moved pages'''
# this will run forever, until user interrupts it
- import datetime
move_regex = re.compile(
r'moved <a href.*?>(.*?)</a> to <a href=.*?>.*?</a>.*?</li>')
@@ -823,7 +861,7 @@
else:
wikipedia.output(u'Unknown argument: %s' % arg)
- if not action or (api and moved_pages) or (xmlFilename and moved_pages)\
+ if not action or (xmlFilename and moved_pages)\
or (api and xmlFilename):
wikipedia.showHelp('redirect')
else:
Revision: 7810
Author: xqt
Date: 2009-12-22 05:16:45 +0000 (Tue, 22 Dec 2009)
Log Message:
-----------
Ignoring standardizePageFooter() for nn-wiki (comment line above iw section)
Modified Paths:
--------------
trunk/pywikipedia/cosmetic_changes.py
Modified: trunk/pywikipedia/cosmetic_changes.py
===================================================================
--- trunk/pywikipedia/cosmetic_changes.py 2009-12-21 08:28:27 UTC (rev 7809)
+++ trunk/pywikipedia/cosmetic_changes.py 2009-12-22 05:16:45 UTC (rev 7810)
@@ -235,7 +235,7 @@
put to the correct position and into the right order.
This combines the old instances standardizeInterwiki and standardizeCategories
"""
- starsList = ['Link[ _][FG][LA]', 'link[ _]adq', 'enllaç[ _]ad',
+ starsList = ['Link[ _][AFG][LA]', 'link[ _]adq', 'enllaç[ _]ad',
'link[ _]ua', 'legătură[ _]af', 'destacado',
'ua', 'liên k[ _]t[ _]chọn[ _]lọc']
@@ -245,7 +245,9 @@
# The PyWikipediaBot is no longer allowed to touch categories on the German Wikipedia.
# See http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/bis_2006…
- if not self.template and not '{{Personendaten' in text:
+ # ignoring nn-wiki of cause of the comment line above iw section
+ if not self.template and not '{{Personendaten' in text and \
+ self.site.sitename() != 'wikipedia:nn':
categories = pywikibot.getCategoryLinks(text, site = self.site)
if not self.talkpage:# and pywikibot.calledModuleName() <> 'interwiki':
@@ -254,16 +256,15 @@
# Removing the interwiki
text = pywikibot.removeLanguageLinks(text, site = self.site)
# Dealing the stars' issue
- #starsList = list()
- allstars = []
- for star in starsList:
- regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I)
- found = regex.findall(text)
- if found != []:
- if pywikibot.verbose:
- print found
- text = regex.sub('', text)
- allstars += found
+ if self.site.sitename() != 'wikipedia:nn'
+ for star in starsList:
+ regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I)
+ found = regex.findall(text)
+ if found != []:
+ if pywikibot.verbose:
+ print found
+ text = regex.sub('', text)
+ allstars += found
# Adding categories
if categories != None: